Commit 9c1af6531483d512d8ad817ee8dfbb4a1a59b985

Authored by Bartłomiej Nitoń
1 parent 2cc66960

Coordinated schemata validation is now limited by a minimum of 20 positions with…

… a coordinated pair of phrase types.
dictionary/ajax_lemma_view.py
... ... @@ -2586,7 +2586,6 @@ def validate_new_frames(request, data, id, examples, lemma_examples,
2586 2586 aspect_rel_lemmas = get_aspect_rel_lemmas(old_object)
2587 2587 missing_frames = get_all_test_missing_frames(frames, aspect_rel_lemmas)
2588 2588 missmatched_B_frames = validate_B_frames(old_object)
2589   - frames_to_merge = find_similar_frames(old_object.frames.all())
2590 2589 wrong_aspect_frames = get_wrong_aspect_frames(old_object, old_object.frames.order_by('text_rep'))
2591 2590 deriv_miss_frames_msg = get_deriv_miss_frames_message(old_object)
2592 2591 if old_object.entry_obj.pos.tag == 'verb':
... ... @@ -2612,6 +2611,7 @@ def validate_new_frames(request, data, id, examples, lemma_examples,
2612 2611 message_content += u'\t- %s\n' % (miss_frame.text_rep)
2613 2612 message_content += '\n'
2614 2613 message_content += deriv_miss_frames_msg
  2614 + frames_to_merge = find_similar_frames(old_object.frames.all())
2615 2615 if len(frames_to_merge) > 0:
2616 2616 message_content += u'Sugerowane jest połączenie poniższych schematów, zawierają one często koordynujące się typy fraz:\n'
2617 2617 for comb in frames_to_merge:
... ...
dictionary/validation.py
... ... @@ -171,6 +171,41 @@ def validate_B_frames(lemma_obj):
171 171  
172 172 ### KOORDYNACJA #####
173 173  
  174 +def find_similar_frames(frames):
  175 + frames_to_merge = []
  176 + if len(frames) > 1:
  177 + combinations = itertools.combinations(frames, 2)
  178 + for comb in combinations:
  179 + if (comb[0].characteristics.get(type=u'ZWROTNOŚĆ') == comb[1].characteristics.get(type=u'ZWROTNOŚĆ') and
  180 + comb[0].characteristics.get(type=u'ASPEKT') == comb[1].characteristics.get(type=u'ASPEKT') and
  181 + comb[0].characteristics.get(type=u'NEGATYWNOŚĆ') == comb[1].characteristics.get(type=u'NEGATYWNOŚĆ') and
  182 + comb[0].characteristics.get(type=u'PREDYKATYWNOŚĆ') == comb[1].characteristics.get(type=u'PREDYKATYWNOŚĆ')):
  183 + occurrences = check_max_args_coor(comb[0], comb[1])
  184 + if occurrences >= 20:
  185 + frames_to_merge.append({'frames': comb,
  186 + 'occurrences': occurrences})
  187 + frames_to_merge = sorted(frames_to_merge,
  188 + key=operator.itemgetter('occurrences'),
  189 + reverse=True)
  190 + return frames_to_merge
  191 +
  192 +def check_max_args_coor(frame1, frame2):
  193 + max_occurr = 0
  194 + pos_diff1 = frame1.positions.exclude(pk__in=frame2.positions.all())
  195 + pos_diff2 = frame2.positions.exclude(pk__in=frame1.positions.all())
  196 + if(frame1.positions.count() == frame2.positions.count() and
  197 + pos_diff1.count() == 1 and pos_diff2.count() == 1 and
  198 + pos_diff1.all()[0].categories.count() == pos_diff2.all()[0].categories.count() and
  199 + (pos_diff1.all()[0].categories.all() |
  200 + pos_diff2.all()[0].categories.all()).count() == pos_diff1.all()[0].categories.count()):
  201 + for phrase_type1 in pos_diff1.all()[0].arguments.all():
  202 + for phrase_type2 in pos_diff2.all()[0].arguments.all():
  203 + matching_positions = Position.objects.filter(arguments=phrase_type1).filter(arguments=phrase_type2)
  204 + occurr = matching_positions.aggregate(Sum('occurrences'))['occurrences__sum']
  205 + if occurr and occurr > max_occurr:
  206 + max_occurr = occurr
  207 + return max_occurr
  208 +
174 209 def check_frames_diff(frame1, frame2):
175 210 occurr = 0
176 211 pos_diff1 = frame1.positions.exclude(pk__in=frame2.positions.all())
... ... @@ -190,24 +225,6 @@ def check_frames_diff(frame1, frame2):
190 225 return occurr
191 226 return occurr
192 227  
193   -def find_similar_frames(frames):
194   - frames_to_merge = []
195   - if len(frames) > 1:
196   - combinations = itertools.combinations(frames, 2)
197   - for comb in combinations:
198   - if (comb[0].characteristics.get(type=u'ZWROTNOŚĆ') == comb[1].characteristics.get(type=u'ZWROTNOŚĆ') and
199   - comb[0].characteristics.get(type=u'ASPEKT') == comb[1].characteristics.get(type=u'ASPEKT') and
200   - comb[0].characteristics.get(type=u'NEGATYWNOŚĆ') == comb[1].characteristics.get(type=u'NEGATYWNOŚĆ') and
201   - comb[0].characteristics.get(type=u'PREDYKATYWNOŚĆ') == comb[1].characteristics.get(type=u'PREDYKATYWNOŚĆ')):
202   - occurrences = check_frames_diff(comb[0], comb[1])
203   - if occurrences > 0:
204   - frames_to_merge.append({'frames': comb,
205   - 'occurrences': occurrences})
206   - frames_to_merge = sorted(frames_to_merge,
207   - key=operator.itemgetter('occurrences'),
208   - reverse=True)
209   - return frames_to_merge
210   -
211 228 ###################### walidacja powiazanych hasel (nieczasownikowe) #######################################
212 229  
213 230 def get_deriv_miss_frames_message(lemma):
... ...