Coordinated schemata validation is now limited by a minimum of 20 positions with…

… a coordinated pair of phrase types.

Coordinated schemata validation is now limited by a minimum of 20 positions with…
… a coordinated pair of phrase types.
Bartłomiej Nitoń
1 parent 2cc66960
Showing 2 changed files with 36 additions and 19 deletions
dictionary/ajax_lemma_view.py
dictionary/validation.py
@@ -2586,7 +2586,6 @@ def validate_new_frames(request, data, id, examples, lemma_examples,
       aspect_rel_lemmas = get_aspect_rel_lemmas(old_object)  
       missing_frames = get_all_test_missing_frames(frames, aspect_rel_lemmas) 
       missmatched_B_frames = validate_B_frames(old_object)
-      frames_to_merge = find_similar_frames(old_object.frames.all())
       wrong_aspect_frames = get_wrong_aspect_frames(old_object, old_object.frames.order_by('text_rep'))
       deriv_miss_frames_msg = get_deriv_miss_frames_message(old_object)
       if old_object.entry_obj.pos.tag == 'verb':
@@ -2612,6 +2611,7 @@ def validate_new_frames(request, data, id, examples, lemma_examples,
               message_content += u'\t- %s\n' % (miss_frame.text_rep)
           message_content += '\n'
       message_content += deriv_miss_frames_msg
+      frames_to_merge = find_similar_frames(old_object.frames.all())
       if len(frames_to_merge) > 0:
           message_content += u'Sugerowane jest połączenie poniższych schematów, zawierają one często koordynujące się typy fraz:\n'
           for comb in frames_to_merge:
@@ -171,6 +171,41 @@ def validate_B_frames(lemma_obj):
  
 ### KOORDYNACJA #####
  
+def find_similar_frames(frames):
+    frames_to_merge = []
+    if len(frames) > 1:
+        combinations = itertools.combinations(frames, 2)
+        for comb in combinations:
+            if (comb[0].characteristics.get(type=u'ZWROTNOŚĆ') == comb[1].characteristics.get(type=u'ZWROTNOŚĆ') and
+                comb[0].characteristics.get(type=u'ASPEKT') == comb[1].characteristics.get(type=u'ASPEKT') and
+                comb[0].characteristics.get(type=u'NEGATYWNOŚĆ') == comb[1].characteristics.get(type=u'NEGATYWNOŚĆ') and
+                comb[0].characteristics.get(type=u'PREDYKATYWNOŚĆ') == comb[1].characteristics.get(type=u'PREDYKATYWNOŚĆ')):
+                occurrences = check_max_args_coor(comb[0], comb[1])
+                if occurrences >= 20:
+                    frames_to_merge.append({'frames': comb,
+                                            'occurrences': occurrences})
+    frames_to_merge = sorted(frames_to_merge, 
+                             key=operator.itemgetter('occurrences'), 
+                             reverse=True)
+    return frames_to_merge
+
+def check_max_args_coor(frame1, frame2):
+    max_occurr = 0
+    pos_diff1 = frame1.positions.exclude(pk__in=frame2.positions.all())
+    pos_diff2 = frame2.positions.exclude(pk__in=frame1.positions.all())
+    if(frame1.positions.count() == frame2.positions.count() and
+       pos_diff1.count() == 1 and pos_diff2.count() == 1 and 
+       pos_diff1.all()[0].categories.count() == pos_diff2.all()[0].categories.count() and
+       (pos_diff1.all()[0].categories.all() | 
+        pos_diff2.all()[0].categories.all()).count() == pos_diff1.all()[0].categories.count()):
+        for phrase_type1 in pos_diff1.all()[0].arguments.all():
+            for phrase_type2 in pos_diff2.all()[0].arguments.all():
+                matching_positions = Position.objects.filter(arguments=phrase_type1).filter(arguments=phrase_type2)
+                occurr = matching_positions.aggregate(Sum('occurrences'))['occurrences__sum']
+                if occurr and occurr > max_occurr:
+                    max_occurr = occurr
+    return max_occurr
+
 def check_frames_diff(frame1, frame2):
     occurr = 0
     pos_diff1 = frame1.positions.exclude(pk__in=frame2.positions.all())
@@ -190,24 +225,6 @@ def check_frames_diff(frame1, frame2):
             return occurr
     return occurr
  
-def find_similar_frames(frames):
-    frames_to_merge = []
-    if len(frames) > 1:
-        combinations = itertools.combinations(frames, 2)
-        for comb in combinations:
-            if (comb[0].characteristics.get(type=u'ZWROTNOŚĆ') == comb[1].characteristics.get(type=u'ZWROTNOŚĆ') and
-                comb[0].characteristics.get(type=u'ASPEKT') == comb[1].characteristics.get(type=u'ASPEKT') and
-                comb[0].characteristics.get(type=u'NEGATYWNOŚĆ') == comb[1].characteristics.get(type=u'NEGATYWNOŚĆ') and
-                comb[0].characteristics.get(type=u'PREDYKATYWNOŚĆ') == comb[1].characteristics.get(type=u'PREDYKATYWNOŚĆ')):
-                occurrences = check_frames_diff(comb[0], comb[1])
-                if occurrences > 0:
-                    frames_to_merge.append({'frames': comb,
-                                            'occurrences': occurrences})
-    frames_to_merge = sorted(frames_to_merge, 
-                             key=operator.itemgetter('occurrences'), 
-                             reverse=True)
-    return frames_to_merge
-
 ###################### walidacja powiazanych hasel (nieczasownikowe) #######################################
  
 def get_deriv_miss_frames_message(lemma):