validation.py 45.9 KB

Edit Raw Blame History

#-*- coding:utf-8 -*-

import copy
import itertools
import operator

from django.db.models import Sum, Q

from common.js_to_obj import frameObjToSerializableDict
from dictionary.common_func import subframe_exists
from dictionary.convert_frames import frame_conversion
from dictionary.forms import FrameAspectForm, FrameOpinionForm
from dictionary.models import Argument_Model, AspectRelationsGroup, Frame_Char_Model, \
                              Frame_Characteristic, Lemma, Position, PositionCategory, \
                              get_attribute_models, sortArguments, sortPositions
from settings import MORFEUSZ2

def get_wrong_aspect_frames(lemma, frames):
    wrong_aspect_frames = []
    for frame in frames:
        if not check_aspect(lemma, frame):
            wrong_aspect_frames.append(frame)
    return wrong_aspect_frames

def check_aspect(lemma, frame):
    good_aspect = False
    inf_present = False
    frame_aspect_obj = frame.characteristics.get(type=u'ASPEKT')
    frame_aspect = frame_aspect_obj.value.value
    interps = MORFEUSZ2.analyse(lemma.entry.encode('utf8'))
    if frame_aspect != '_':
        for interp in interps:
            tagstr = interp.getTag(MORFEUSZ2)
            tag_parts = tagstr.split(':')
            pos = tag_parts[0]
            if pos == 'inf':
                aspects = tag_parts[1].split('.')
                inf_present = True
                if frame_aspect in aspects:
                    good_aspect = True
                    break
    if good_aspect or not inf_present:
        good_aspect = True
    return good_aspect

def get_missing_aspects(lemma):
    missing_aspects = []
    aspect_model = Frame_Char_Model.objects.get(model_name=u'ASPEKT')
    any_aspect_val = aspect_model.frame_char_values.get(value='_')

    if not lemma.frames.filter(characteristics__value=any_aspect_val).exists():
        possible_aspects = get_possible_aspects(lemma)
        for aspect in possible_aspects:
            if not lemma.frames.filter(characteristics__value__value=aspect).exists():
                missing_aspects.append(aspect)
    return missing_aspects

def get_missing_aspects_msg(lemma):
    message_content = ''
    missing_aspects = get_missing_aspects(lemma)
    if missing_aspects:
        message_content = u'W haśle brakuje schematów o aspekcie: %s.\n\n' % u', '.join(missing_aspects)
    return message_content

def get_possible_aspects(lemma):
    possible_aspects = []
    interps = MORFEUSZ2.analyse(lemma.entry.encode('utf8'))
    for interp in interps:
        tagstr = interp.getTag(MORFEUSZ2)
        tag_parts = tagstr.split(':')
        pos = tag_parts[0]
        if pos == 'inf':
            for aspect in tag_parts[1].split('.'):
                if aspect not in possible_aspects:
                    possible_aspects.append(aspect)
    return possible_aspects

def match_arg_poss(arg_poss, frame):
    poss_positions = []
    for arg in arg_poss.possible_args.all():
        if "_" in arg.argument.text_rep:
            regex = ur"^%s$" % arg.argument.text_rep.replace("_", ".+?").replace("(", "\(").replace(")", "\)").replace("[", "\[").replace("]", "\]")
            if arg.position_category:
                poss_positions.extend(frame.positions.filter(categories=arg.position_category,
                                                             arguments__text_rep__regex=regex).distinct().all())
                poss_positions.extend(frame.positions.filter(categories=arg.position_category,
                                                             arguments__realizations__argument__text_rep__regex=regex).distinct().all())
            else:
                poss_positions.extend(frame.positions.filter(arguments__text_rep__regex=regex).
                                                             exclude(categories__control=False).distinct().all())
                poss_positions.extend(frame.positions.filter(arguments__realizations__argument__text_rep__regex=regex).
                                                             exclude(categories__control=False).distinct().all())
        else:
            if arg.position_category:
                poss_positions.extend(frame.positions.filter(categories=arg.position_category,
                                                             arguments=arg.argument).distinct().all())
                poss_positions.extend(frame.positions.filter(categories=arg.position_category,
                                                             arguments__realizations__argument=arg.argument).distinct().all())
            else:
                poss_positions.extend(frame.positions.filter(arguments=arg.argument).
                                                             exclude(categories__control=False).distinct().all())
                poss_positions.extend(frame.positions.filter(arguments__realizations__argument=arg.argument).
                                                             exclude(categories__control=False).distinct().all())
    return poss_positions

def check_sie(frame):
    sie = False
    if (frame.characteristics.filter(value__value=u'się', type=u'ZWROTNOŚĆ').exists() or
        frame.positions.filter(arguments__text_rep='refl').exists()):
        sie = True
    return sie

def check_combinations(somelists, args_to_match):
    for element in itertools.product(*somelists):
        if not element:
            continue
        if (len(element) == len(set(element)) and
            len(set(element)) >= args_to_match):
            return True
    return False

def validate_B_frames(lemma_obj):
    mismatched_b_frames = []
    for b_frame in lemma_obj.B_frames.all():
        match = False
        for frame in lemma_obj.frames.all():
            not_this_frame = False
            somelists = []
            if (not frame.characteristics.filter(id=b_frame.aspect.id).exists() or
                not frame.characteristics.filter(id=b_frame.negativity.id).exists()):
                continue
            if b_frame.reflex and not check_sie(frame):
                continue
            if frame.positions.count() < b_frame.arguments.count():
                continue
            for b_arg in b_frame.arguments.all():
                poss_positions = match_arg_poss(b_arg, frame)
                if len(poss_positions) == 0:
                    not_this_frame = True
                    break
                else:
                    somelists.append(poss_positions)
            if not_this_frame:
                continue
            match = check_combinations(somelists, b_frame.arguments.count())
            if match:
                break
        if not match:
            mismatched_b_frames.append(b_frame)
    return mismatched_b_frames

### KOORDYNACJA #####

def find_similar_frames(frames):
    frames_to_merge = []
    if len(frames) > 1:
        combinations = itertools.combinations(frames, 2)
        for comb in combinations:
            if (comb[0].characteristics.get(type=u'ZWROTNOŚĆ') == comb[1].characteristics.get(type=u'ZWROTNOŚĆ') and
                comb[0].characteristics.get(type=u'ASPEKT') == comb[1].characteristics.get(type=u'ASPEKT') and
                comb[0].characteristics.get(type=u'NEGATYWNOŚĆ') == comb[1].characteristics.get(type=u'NEGATYWNOŚĆ') and
                comb[0].characteristics.get(type=u'PREDYKATYWNOŚĆ') == comb[1].characteristics.get(type=u'PREDYKATYWNOŚĆ')):
                occurrences = check_max_args_coor(comb[0], comb[1])
                if occurrences >= 20:
                    frames_to_merge.append({'frames': comb,
                                            'occurrences': occurrences})
    frames_to_merge = sorted(frames_to_merge,
                             key=operator.itemgetter('occurrences'),
                             reverse=True)
    return frames_to_merge

def check_max_args_coor(frame1, frame2):
    max_occurr = 0
    if frame1.positions.count() == frame2.positions.count():
        frame1_poss_text_reps = [pos.text_rep for pos in frame1.positions.all()]
        frame2_poss_text_reps = [pos.text_rep for pos in frame2.positions.all()]
        pos_diff1 = frame1.positions.exclude(text_rep__in=frame2_poss_text_reps)
        pos_diff2 = frame2.positions.exclude(text_rep__in=frame1_poss_text_reps)
        if(pos_diff1.count() == 1 and pos_diff2.count() == 1 and
           pos_diff1.all()[0].categories.count() == pos_diff2.all()[0].categories.count() and
           (pos_diff1.all()[0].categories.all() |
            pos_diff2.all()[0].categories.all()).count() == pos_diff1.all()[0].categories.count()):
            for phrase_type1 in pos_diff1.all()[0].arguments.all():
                for phrase_type2 in pos_diff2.all()[0].arguments.all():
                    matching_positions = Position.objects.filter(arguments=phrase_type1).filter(arguments=phrase_type2)
                    occurr = matching_positions.aggregate(Sum('occurrences'))['occurrences__sum']
                    if occurr and occurr > max_occurr:
                        max_occurr = occurr
    return max_occurr

def check_frames_diff(frame1, frame2):
    occurr = 0
    pos_diff1 = frame1.positions.exclude(pk__in=frame2.positions.all())
    pos_diff2 = frame2.positions.exclude(pk__in=frame1.positions.all())
    if(frame1.positions.count() == frame2.positions.count() and
       pos_diff1.count() == 1 and pos_diff2.count() == 1 and
       pos_diff1.all()[0].categories.count() == pos_diff2.all()[0].categories.count() and
       (pos_diff1.all()[0].categories.all() |
        pos_diff2.all()[0].categories.all()).count() == pos_diff1.all()[0].categories.count()):
        arguments_sum = (pos_diff1.all()[0].arguments.all() |
                         pos_diff2.all()[0].arguments.all())
        matching_positions = Position.objects.all()
        for arg in arguments_sum.distinct().all():
            matching_positions = matching_positions.filter(arguments=arg)
        occurr = matching_positions.aggregate(Sum('occurrences'))['occurrences__sum']
        if occurr:
            return occurr
    return occurr

###################### walidacja powiazanych hasel (nieczasownikowe) #######################################

def get_deriv_miss_frames_message(lemma):
    message_content = ''
    missing_frames = get_deriv_miss_frames(lemma)
    if missing_frames:
        message_content = u'Edytowane hasło nie posiada następujących schematów z powiązanych derywacyjnie haseł:\n'
        for miss_frame in missing_frames:
            message_content += u'  %s (%s):\n' % (miss_frame['entry'].name,
                                                  miss_frame['entry'].pos.name)
            for frame in miss_frame['frames']:
                message_content += u'\t- %s\n' % (frame.text_rep)
        message_content += '\n'
    return message_content

def get_deriv_miss_frames(lemma):
    entry = lemma.entry_obj
    deriv_rel_lemmas = get_deriv_related_lemmas(entry)
    missing_frames = []
    for deriv_rel_lemma in deriv_rel_lemmas:
        deriv_frames = get_deriv_frames_to_check(deriv_rel_lemma)
        for frame in deriv_frames:
            converted_frames = frame_conversion(frame,
                                                deriv_rel_lemma.entry_obj.pos,
                                                entry.pos)
            if (len(converted_frames) != 0 and
                not deriv_frame_exists(lemma.frames.all(), converted_frames)):
                add_missing_deriv_frame(missing_frames, frame, deriv_rel_lemma.entry_obj)
    return missing_frames

def get_deriv_related_lemmas(entry):
    deriv_related_lemmas = []
    for rel_entry in entry.related_entries.order_by('name'):
        try:
            rel_lemma = Lemma.objects.get(entry_obj=rel_entry, old=False)
            deriv_related_lemmas.append(rel_lemma)
        except Lemma.DoesNotExist:
            pass
    return deriv_related_lemmas

def get_deriv_frames_to_check(lemma):
    lemma_frames = lemma.frames.order_by('text_rep')
    frames = [frame for frame in lemma_frames
              if not frame.char_exists(u'ZWROTNOŚĆ', u'się')]
    if len(frames) == 0:
        frames = lemma_frames
    return frames

def deriv_frame_exists(lemma_frames, converted_frames):
    frame_exists = False
    for conv_frame in converted_frames:
        if subframe_exists(lemma_frames, conv_frame):
            frame_exists = True
            break
    return frame_exists

def add_missing_deriv_frame(missing_frames, frame, deriv_rel_entry):
    miss_frame = next((miss_frame for miss_frame in missing_frames
                       if miss_frame['entry'] == deriv_rel_entry), None)
    if miss_frame:
        miss_frame['frames'].append(frame)
    else:
        miss_frame = {'entry': deriv_rel_entry,
                      'frames': [frame]}
        missing_frames.append(miss_frame)


######################## powiazane aspektowo hasla #########################
def get_aspect_rel_lemmas(lemma_obj):
    aspect_rel_lemmas = []
    try:
        aspect_rel_members = AspectRelationsGroup.objects.get(members__name=lemma_obj.entry).members.exclude(name=lemma_obj.entry)
        q_aspect_rel_members = []
        q_statuses = [Q(status__type__sym_name='checked'), Q(status__type__sym_name='ready'),
                      Q(status__type__sym_name='edit_f'),
                      Q(status__type__sym_name='checked_f'), Q(status__type__sym_name='ready_f'),
                      Q(status__type__sym_name='edit_s'),
                      Q(status__type__sym_name='checked_s'), Q(status__type__sym_name='ready_s')]
        for member in aspect_rel_members.all():
            if member.name != lemma_obj.entry:
                q_aspect_rel_members.append(Q(entry=member.name))
        aspect_rel_lemmas = Lemma.objects.filter(old=False).filter(reduce(operator.or_, q_statuses)).filter(reduce(operator.or_, q_aspect_rel_members)).all()
    except AspectRelationsGroup.DoesNotExist:
        pass
    return aspect_rel_lemmas

def get_all_test_missing_frames(test_frames, aspect_rel_lemmas):
    missing_frames = []
    for pattern_lemma in aspect_rel_lemmas:
        frames_to_add = get_miss_test_frames(test_frames, pattern_lemma.frames)
        if len(frames_to_add) > 0:
            frames_dict_ls = []
            for frame in frames_to_add:
                frame_aspect = frame.characteristics.get(type=u'ASPEKT')
                if frame_aspect.value.default:
                    opposite_aspect = frame_aspect
                else:
                    opposite_aspect = Frame_Characteristic.objects.filter(type=u'ASPEKT').exclude(id=frame_aspect.id).exclude(value__default=True)[0]
                frame_opinion_val = pattern_lemma.frame_opinions.get(frame=frame).value
                frames_dict_ls.append({'frame'       : frame,
                                       'aspect_form' : FrameAspectForm(aspect_val=opposite_aspect),
                                       'opinion_form': FrameOpinionForm(opinion_val=frame_opinion_val)})
            missing_frames.append({'lemma'      : pattern_lemma.entry,
                                   'frames'     : frames_dict_ls})
    return missing_frames

def get_miss_test_frames(test_frames, pattern_frames):
    missing_frames = []
    for pattern_frame in pattern_frames.all():
        match_found = False
        reflex_val = pattern_frame.characteristics.get(type=u'ZWROTNOŚĆ').value.value
        negativity_val = pattern_frame.characteristics.get(type=u'NEGATYWNOŚĆ').value.value
        pred_val = pattern_frame.characteristics.get(type=u'PREDYKATYWNOŚĆ').value.value
        for test_frame in test_frames:
            wrong_frame = False
            if(reflex_val == test_frame['characteristics'][list(Frame_Char_Model.objects.order_by('priority').values_list('model_name', flat=True)).index(u'ZWROTNOŚĆ')] and
               negativity_val == test_frame['characteristics'][list(Frame_Char_Model.objects.order_by('priority').values_list('model_name', flat=True)).index(u'NEGATYWNOŚĆ')] and
               pred_val == test_frame['characteristics'][list(Frame_Char_Model.objects.order_by('priority').values_list('model_name', flat=True)).index(u'PREDYKATYWNOŚĆ')]):
                for pattern_pos in pattern_frame.positions.all():
                    unmatched_poss = copy.deepcopy(test_frame['positions'])
                    smallest_poss = None
                    for position in unmatched_poss:
                        test_args = set([arg['text_rep'] for arg in position['arguments']])
                        pattern_args = set([arg.text_rep for arg in pattern_pos.arguments.all()])
                        test_cats = set(position['categories'])
                        pattern_cats = set([cat.category for cat in pattern_pos.categories.all()])
                        if len(pattern_args - test_args) == 0 and test_cats == pattern_cats:
                            if (not smallest_poss or
                                len(smallest_poss['arguments']) > len(position['arguments'])):
                                smallest_poss = position
                    if not smallest_poss:
                        wrong_frame = True
                    else:
                        unmatched_poss.remove(smallest_poss)
            else:
                wrong_frame = True
            if not wrong_frame:
                match_found = True
                break
        if not match_found:
            missing_frames.append(pattern_frame)
    return missing_frames


def prep_check(arg):
    # prepnp, prepncp, prepadjp, preplexnp
    #if arg.find('prep') != -1 and not arg.startswith('comprepnp('):
    if arg.startswith('prep'):
        params = arg[arg.find('(')+1:arg.find(')')].split(',')
        pform = params[0].split()[-1]
        pcase = params[1]
        # postp is used by prepadjp
        if pcase != 'postp':
            # str is used by prepadjp
            if pcase == 'str':
                pcase = ['nom', 'acc']
            else:
                pcase = [pcase]
            for case in pcase:
                for interp in MORFEUSZ2.analyse(pform.encode('utf8')):
                    tagstr = interp.getTag(MORFEUSZ2)
                    tag_parts = tagstr.split(':')
                    pos = tag_parts[0]
                    if pos == 'prep' and case in tag_parts[1].split('.'):
                        return True
            return False
    return True

#################################### phraseology binded frames ############################
def validate_phraseology_binded_frames(lemma):
    msg_content = ''
    miss_frames = get_missing_binded_frames(lemma)
    if len(miss_frames) > 0:
        msg_content = create_miss_binded_frames_msg_content(miss_frames)
    return msg_content

def get_missing_binded_frames(lemma):
    miss_frames = []
    entry = lemma.entry_obj
    for frame_proposition in entry.phraseologic_propositions.all():
        if not lemma.contains_frame_with_exact_positions(frame_proposition.positions.all()):
            miss_frames.append(frame_proposition)
    return miss_frames

def create_miss_binded_frames_msg_content(missing_frames):
    message_content = u'Sprawdź, czy hasło nie powinno posiadać następujących schematów frazeologicznych:\n'
    for miss_frame in missing_frames:
        message_content += u'\t- %s\n' % (miss_frame.text_rep)
    message_content += '\n'
    return message_content

####################### same positions validation #######################
def validate_same_positions_schemata(lemma):
    msg_content = ''
    same_positions_schemata = get_same_positions_schemata(lemma)
    if len(same_positions_schemata) > 0:
        msg_content = same_positions_message(same_positions_schemata)
    return msg_content

def get_same_positions_schemata(lemma):
    same_positions_schemata = []
    for schema in lemma.frames.all():
        for pos in schema.positions.all():
            if schema.positions.filter(text_rep=pos.text_rep).count() > 1:
                same_positions_schemata.append(schema)
                break
    return same_positions_schemata

def same_positions_message(same_positions_schemata):
    message_content = u'W następujących schematach występuje więcej niż jedna identyczna pozycja:\n'
    for schema in same_positions_schemata:
        message_content += u'\t- [%d] %s\n' % (schema.id, schema.text_rep)
    message_content += '\n'
    return message_content

####################### WALIDACJA ############################
def get_napprv_examples(lemma):
    nApprovedExamples = lemma.nkjp_examples.filter(source__confirmation_required=True,
                                                   approved=False)
    return nApprovedExamples

# WALIDACJA 5: Schemat nie moze zawierac dwoch pozycji oznaczonych w taki sam sposob
#              (np. dwoch podmiotow). Chyba, ze jest to oznaczenie (pred_)controllee(2).
def validate_rule_5(frame):
    for pos_cat in PositionCategory.objects.exclude(category__in=['controllee', 'controllee2', 'pred_controllee']).all():
        if frame.positions.filter(categories__category=pos_cat.category).count() > 1:
            return False
    return True

def validate_examples_and_mark_errors(lemma, status_obj, selected_frame_id):
    error = False
    serialized_frames = []
    for frame_obj in lemma.frames.all():
        serialized_frame = frameObjToSerializableDict(lemma, frame_obj, True)
        if selected_frame_id and frame_obj.id != selected_frame_id:
            serialized_frames.append(serialized_frame)
            continue
        # WALIDACJA 16: czy wszystkie przyklady wlasne zostaly zatwierdzone
        if status_obj and status_obj.check_examples:
            if are_examples_approved(lemma, frame_obj, serialized_frame):
                error = True
        serialized_frames.append(serialized_frame)
    return serialized_frames, error

# WALIDACJA 16: czy wszystkie przyklady wlasne zostaly zatwierdzone
def are_examples_approved(lemma, schema, serialized_schema):
    error = False
    if get_napprv_examples(lemma).filter(frame=schema).exists():
        serialized_schema['error'] = True
        serialized_schema['tooltip'] += u'Schemat posiada niezatwierdzone przykłady własne.\n'
        error = True
    return error

################### schemata validation for semantic statuses ##########
def validate_schemata_for_semantics_and_mark_errors(lemma, status, selected_schema_id):
    error = False
    serialized_schemata = []
    for schema_obj in lemma.frames.all():
        serialized_schema = frameObjToSerializableDict(lemma, schema_obj, True)
        if selected_schema_id and schema_obj.id != selected_schema_id:
            serialized_schemata.append(serialized_schema)
            continue
        if check_schema_for_semantics_and_mark_errors(lemma, status, schema_obj, serialized_schema):
            error = True
        serialized_schemata.append(serialized_schema)
    return serialized_schemata, error

def check_schema_for_semantics_and_mark_errors(lemma, status, schema, serialized_schema):
    error = False
    # WALIDACJA 34: Pod schematem z refl nie mogą być przykłady niepodpięte pod refl
    if check_and_mark_examples_must_use_refl_rule(lemma, schema, serialized_schema):
        error = True
    return error

###################### schemas validation ##############################
def validate_schemas_and_mark_errors(lemma, status, selected_frame_id):
    error = False
    serialized_frames = []
    for frame_obj in lemma.frames.all():
        serialized_frame = frameObjToSerializableDict(lemma, frame_obj, True)
        if selected_frame_id and frame_obj.id != selected_frame_id:
            serialized_frames.append(serialized_frame)
            continue
        if check_schema_and_mark_errors(lemma, status, frame_obj, serialized_frame):
            error = True
        serialized_frames.append(serialized_frame)
    return serialized_frames, error

def check_schema_and_mark_errors(lemma, status, schema, serialized_schema):
    error = False
    lemma_pos = lemma.entry_obj.pos
    if PositionCategory.objects.filter(poss=lemma_pos).exists():
        # WALIDACJA 5: Schemat nie moze zawierac dwoch pozycji oznaczonych w taki sam sposob
        # (np. dwoch podmiotow). Chyba, ze jest to oznaczenie (pred_)controllee(2).
        if check_and_mark_cant_have_same_functions_rule(schema, serialized_schema):
            error = True
        # WALIDACJA 7: Rama zawierajaca pozycje oznaczona jako (pred_)controllee/(pred_)controller musi jednoczesnie
        # zawierac inna pozycje oznaczona jako (pred_)controller/(pred_)controllee o tym samym indeksie.
        # z hardcodami niestety, do poprawy na bazie, kiedy tempo zmaleje :(
        if check_and_mark_controllee_require_controller_rule(schema, serialized_schema):
            error = True
        # WALIDACJA 19: Pozycje nie mogą być oznaczane jako controllee2 i controller2,
        # jeśli wcześniej nie wykorzystano oznaczeń controllee i controller
        if check_and_mark_controllee1_must_be_used_first_rule(schema, serialized_schema):
            error = True
        # WALIDACJA 23: Schemat posiadajacy argument refl nie moze posiadac pozycji obj
        if check_and_mark_refl_cant_be_in_schema_with_obj_rule(schema, serialized_schema):
            error = True
        # WALIDACJA 24: Schemat o sienności 'sie' nie moze posiadac pozycji obj
        if check_and_mark_sie_cant_be_in_schema_with_obj_rule(schema, serialized_schema):
            error = True
        # WALIDACJA 25: Schemat posiadajacy argument refl nie moze miec zwrotnosci sie
        if check_and_mark_sie_cant_be_in_schema_with_refl_rule(schema, serialized_schema):
            error = True
        # WALIDACJA 32: Schemat posiadajacy pozycje oznaczona jako obj musi posiadac inna pozycje oznaczona jako
        # subj
        if check_and_mark_obj_and_subj_positions_must_coexist_rule(schema, serialized_schema):
            error = True
    # WALIDACJA 13: Kazdy schemat musi posiadac wyznaczona ocene
    if check_and_mark_schema_must_have_opinion_rule(lemma, schema, serialized_schema):
        error = True
    # WALIDACJA 15: Haslo nie moze posiadac pustych schematow
    if check_and_mark_schemas_cant_be_empty_rule(schema, serialized_schema):
        error = True
    # WALIDACJA 16: czy wszystkie przyklady wlasne zostaly zatwierdzone
    if check_and_mark_examples_approved_rule(lemma, status, schema, serialized_schema):
        error = True
    # WALIDACJA 21: Kazdy schemat musi posiadac dowiazany co najmniej jeden przyklad
    if check_and_mark_schema_must_have_example_rule(lemma, schema, serialized_schema):
        error = True
    # WALIDACJA 29: Schemat nie moze posiadac nieaktywnych typow argumentow
    if check_and_mark_all_phrase_types_must_be_active_rule(schema, serialized_schema):
        error = True
    # WALIDACJA 33: Przynajmniej jeden podrzednik w schemacie musi byc niezleksykalizowany
    if check_and_mark_at_least_one_sec_elem_must_be_nlexicalized_rule(schema, serialized_schema):
        error = True
    # WALIDACJA 34: Pod ramką z refl nie mogą być przykłady niepodpięte pod refl
    if check_and_mark_examples_must_use_refl_rule(lemma, schema, serialized_schema):
        error = True
    # walidacja pozycji
    if check_positions_and_mark_errors(lemma, schema, serialized_schema):
        error = True
    return error

def check_and_mark_cant_have_same_functions_rule(schema, serialized_schema):
    error = False
    for pos_cat in PositionCategory.objects.exclude(category__in=['controllee', 'controllee2', 'pred_controllee']).all():
        if schema.positions.filter(categories__category=pos_cat.category).count() > 1:
            serialized_schema['error'] = True
            serialized_schema['tooltip'] += u'W schemacie występują co najmniej dwie pozycje oznaczone w ten sam sposób (reguła nie obejmuje oznaczenia (pred_)controllee).\n'
            error = True
            break
    return error

def check_and_mark_controllee_require_controller_rule(schema, serialized_schema):
    error = False
    if((schema.positions.filter(categories__category=u'controllee').exists() and
       not schema.positions.filter(categories__category=u'controller').exists()) or
       (schema.positions.filter(categories__category=u'controllee2').exists() and
       not schema.positions.filter(categories__category=u'controller2').exists()) or
       (schema.positions.filter(categories__category=u'controller').exists() and
       not schema.positions.filter(categories__category=u'controllee').exists()) or
       (schema.positions.filter(categories__category=u'controller2').exists() and
       not schema.positions.filter(categories__category=u'controllee2').exists()) or
       (schema.positions.filter(categories__category=u'pred_controller').exists() and
       not schema.positions.filter(categories__category=u'pred_controllee').exists()) or
       (schema.positions.filter(categories__category=u'pred_controllee').exists() and
       not schema.positions.filter(categories__category=u'pred_controller').exists())):
        serialized_schema['error'] = True
        serialized_schema['tooltip'] += u'Schemat zawierający pozycję oznaczoną jako (pred_)controllee/(pred_)controller musi jednocześnie zawierać inną pozycję oznaczoną jako (pred_)controller/(pred_)controllee o tym samym indeksie.\n'
        error = True
    return error

def check_and_mark_controllee1_must_be_used_first_rule(schema, serialized_schema):
    error = False
    if((schema.positions.filter(categories__category=u'controller2').exists() or schema.positions.filter(categories__category=u'controllee2').exists()) and
       not (schema.positions.filter(categories__category=u'controller').exists() or schema.positions.filter(categories__category=u'controllee').exists())):
        serialized_schema['error'] = True
        serialized_schema['tooltip'] += u'Schemat posiada pozycję oznaczoną jako controller2/controllee2, ale nie posiada pozycji oznaczonej jako controller/controllee.\n'
        error = True
    return error

def check_and_mark_refl_cant_be_in_schema_with_obj_rule(schema, serialized_schema):
    error = False
    if (schema.positions.filter(categories__category=u'obj').exists() and
        schema.positions.filter(arguments__type=u'refl').exists()):
        serialized_schema['error'] = True
        serialized_schema['tooltip'] += u'Schemat zawierający typ frazy refl nie może posiadać pozycji oznaczonej jako obj.\n'
        error = True
    return error

def check_and_mark_sie_cant_be_in_schema_with_obj_rule(schema, serialized_schema):
    error = False
    if (schema.positions.filter(categories__category=u'obj').exists() and
        schema.characteristics.filter(type=u'ZWROTNOŚĆ', value__value=u'się').exists()):
        serialized_schema['error'] = True
        serialized_schema['tooltip'] += u'Schemat o zwrotności "się" nie może posiadać pozycji oznaczonej jako obj.\n'
        error = True
    return error

def check_and_mark_sie_cant_be_in_schema_with_refl_rule(schema, serialized_schema):
    error = False
    if(schema.positions.filter(arguments__type=u'refl').exists() and
       schema.characteristics.filter(type=u'ZWROTNOŚĆ', value__value=u'się').exists()):
        serialized_schema['error'] = True
        serialized_schema['tooltip'] += u'Schemat o zwrotności "się" nie może zawierać typu frazy refl.\n'
        error = True
    return error

def check_and_mark_obj_and_subj_positions_must_coexist_rule(schema, serialized_schema):
    error = False
    if(schema.positions.filter(categories__category=u'obj').exists() and
       not schema.positions.filter(categories__category=u'subj').exists()):
        serialized_schema['error'] = True
        serialized_schema['tooltip'] += u'Schemat posiadający pozycję oznaczoną jako obj musi posiadać pozycję oznaczoną jako subj.\n'
        error = True
    return error

def check_and_mark_schema_must_have_opinion_rule(lemma, schema, serialized_schema):
    error = False
    if not lemma.frame_opinions.filter(frame=schema).exists():
        serialized_schema['error'] = True
        serialized_schema['tooltip'] += u'Schemat nie posiada oceny.\n'
        error = True
    return error

def check_and_mark_schemas_cant_be_empty_rule(schema, serialized_schema):
    error = False
    if schema.positions.count() == 0:
        serialized_schema['error'] = True
        serialized_schema['tooltip'] += u'Schemat jest pusty.\n'
        error = True
    return error

def check_and_mark_examples_approved_rule(lemma, status, schema, serialized_schema):
    error = False
    if status and status.check_examples:
        if are_examples_approved(lemma, schema, serialized_schema):
            error = True
    return error

def check_and_mark_schema_must_have_example_rule(lemma, schema, serialized_schema):
    error = False
    if not lemma.nkjp_examples.filter(frame=schema).exists():
        serialized_schema['error'] = True
        serialized_schema['tooltip'] += u'Schemat nie posiada żadnego dowiązanego przykładu.\n'
        error = True
    return error

def check_and_mark_all_phrase_types_must_be_active_rule(schema, serialized_schema):
    error = False
    if schema.has_inactive_arguments():
        serialized_schema['error'] = True
        serialized_schema['tooltip'] += u'Schemat zawiera nieaktywne typy fraz.\n'
        error = True
    return error

def check_and_mark_at_least_one_sec_elem_must_be_nlexicalized_rule(schema, serialized_schema):
    error = False
    if schema.is_fully_lexicalized():
        serialized_schema['error'] = True
        serialized_schema['tooltip'] += u'Przynajmniej jeden podrzędnik w schemacie musi być niezleksykalizowany.\n'
        error = True
    return error

def check_and_mark_examples_must_use_refl_rule(lemma, schema, serialized_schema):
    error = False
    if schema.positions.filter(arguments__text_rep='refl').exists():
        schema_examples = lemma.nkjp_examples.filter(frame=schema)
        for example in schema_examples:
            if not example.pinned_to('refl'):
                serialized_schema['error'] = True
                serialized_schema['tooltip'] += u'Wszystkie przykłady dopięte do schematu muszą wykorzystywać typ frazy refl.\n'
                error = True
                break
    return error

###################### positions validation ##############################
def check_positions_and_mark_errors(lemma, schema, serialized_schema):
    error = False
    for position, serialized_position in zip(sortPositions(schema.positions.all()),
                                             serialized_schema['positions']):
        position = position['position']
        lemma_pos = lemma.entry_obj.pos
        if PositionCategory.objects.filter(poss=lemma_pos).exists():
            # WALIDACJA 6: Pozycje (pred_)controller o danym indeksie nie moga byc jednoczesnie oznaczone jako (pred_)controllee o tym samym indeksie.
            # z hardcodami niestety, do poprawy na bazie, kiedy tempo zmaleje :(
            if check_and_mark_controller_not_controllee_rule(position, serialized_position):
                error = True
            # WALIDACJA 8: Przypadek pred nie może wystąpić w pozycji NIEBĘDĄCEJ jednocześnie jako pred_controllee.
            # niestety z hardcodami :(
            if check_and_mark_pred_require_controllee_rule(position, serialized_position):
                error = True
            # WALIDACJA 9: Niedopuszczone jest refl w jednej pozycji z jakimkolwiek innym argumentem.
            # niestety z hardcodami :(
            if check_and_mark_refl_must_be_lonely_rule(position, serialized_position):
                error = True
            # WALIDACJA 20: Niedopuszczone jest "E" w jednej pozycji z jakimkolwiek innym argumentem i
            # w pozycji nie bedacej "subj"
            # niestety z hardcodami :(
            if check_and_mark_E_must_be_lonely_and_subj_rule(position, serialized_position):
                error = True
            # WALIDACJA 26: Argument z przypadkiem inst nie moze byc oznaczony jako subj
            if check_and_mark_inst_cant_be_subj_rule(position, serialized_position):
                error = True
            # WALIDACJA 27: Pozycja oznaczona jako subj musi posiadac taki sam przypadek we wszystkich argumentach
            if check_and_mark_subj_position_need_same_case_rule(position, serialized_position):
                error = True
        else:
            # WALIDACJA 28: Pozycja nie moze posiadac zadnej funkcji
            if check_and_mark_position_cant_have_function_rule(position, serialized_position):
                error = True
        # WALIDACJA 31: Jeśli xp(mod) jest jedynym typem frazy w pozycji nie powinien być oznaczony jako (pred_)controllee.
        # niestety z hardcodami :(
        if check_and_mark_lonely_xp_mod_cant_be_controllee_rule(position, serialized_position):
            error = True
        # Walidacja typów fraz
        if check_arguments_and_mark_errors(lemma, position, serialized_position):
            error = True
    return error

def check_and_mark_controller_not_controllee_rule(position, serialized_position):
    error = False
    if((position.categories.filter(category=u'controller').exists() and position.categories.filter(category=u'controllee').exists()) or
       (position.categories.filter(category=u'controller2').exists() and position.categories.filter(category=u'controllee2').exists()) or
       (position.categories.filter(category=u'pred_controller').exists() and position.categories.filter(category=u'pred_controllee').exists())):
        serialized_position['error'] = True
        serialized_position['tooltip'] += u'Pozycja jest jednocześnie oznaczona jako (pred_)controller i (pred_)controllee o tym samym indeksie.\n'
        error = True
    return error

def check_and_mark_pred_require_controllee_rule(position, serialized_position):
    error = False
    if (not position.categories.filter(category='pred_controllee').exists() and
        position.arguments.filter(atributes__type=u'PRZYPADEK',
                                  atributes__values__parameter__type__name=u'pred').exists()):
        serialized_position['error'] = True
        serialized_position['tooltip'] += u'Pozycja posiada przypadek pred nie posiadając funkcji pred_controllee.\n'
        error = True
    return error

def check_and_mark_refl_must_be_lonely_rule(position, serialized_position):
    error = False
    if position.arguments.filter(type=u'refl').exists() and position.arguments.count() > 1:
        serialized_position['error'] = True
        serialized_position['tooltip'] += u'Pozycja posiada typ frazy refl występujący wraz z innymi typami fraz.\n'
        error = True
    return error

def check_and_mark_E_must_be_lonely_and_subj_rule(position, serialized_position):
    error = False
    if(position.arguments.filter(type='E').exists() and
       (position.arguments.count() > 1 or not position.categories.filter(category='subj').exists())):
        serialized_position['error'] = True
        serialized_position['tooltip'] += u'Pozycja posiada typ frazy E wraz z innymi typami fraz i/lub nie jest oznaczona jako subj.\n'
        error = True
    return error

def check_and_mark_inst_cant_be_subj_rule(position, serialized_position):
    error = False
    if (position.categories.filter(category=u'subj', control=False).exists() and
        position.arguments.filter(atributes__type=u'PRZYPADEK',
                                  atributes__values__parameter__type__name=u'inst').exists()):
        serialized_position['error'] = True
        serialized_position['tooltip'] += u'Pozycja oznaczona jako subj nie może zawierać przypadka inst.\n'
        error = True
    return error

def check_and_mark_subj_position_need_same_case_rule(position, serialized_position):
    error = False
    if (position.categories.filter(category=u'subj', control=False).exists()):
        arguments_with_case = position.arguments.filter(atributes__type=u'PRZYPADEK').distinct()
        if arguments_with_case.exists():
            case = arguments_with_case.all()[0].atributes.get(type=u'PRZYPADEK')
            for argument in arguments_with_case.all():
                if argument.atributes.get(type=u'PRZYPADEK') != case:
                    serialized_position['error'] = True
                    serialized_position['tooltip'] += u'Pozycja oznaczona jako subj musi mieć jednakowy przypadek we wszystkich typach fraz.\n'
                    error = True
                    break
    return error

def check_and_mark_position_cant_have_function_rule(position, serialized_position):
    error = False
    if position.categories.exists():
        serialized_position['error'] = True
        serialized_position['tooltip'] += u'Pozycja nie może posiadać żadnej funkcji.\n'
        error = True
    return error

def check_and_mark_lonely_xp_mod_cant_be_controllee_rule(position, serialized_position):
    error = False
    if(position.categories.filter(category__in=[u'controllee', u'controllee2', u'pred_controllee']).exists() and
       position.arguments.count() == 1 and
       position.arguments.filter(type=u'xp', atributes__selection_mode__name='mod').exists()):
        serialized_position['error'] = True
        serialized_position['tooltip'] += u'Typ frazy xp(mod) nie może występować samodzielnie w pozycji oznaczonej jako (pred_)controllee.\n'
        error = True
    return error

###################### arguments validation ##############################
def check_arguments_and_mark_errors(lemma, position, serialized_position):
    error = False
    for argument, serialized_argument in zip(sortArguments(position.arguments.all()),
                                             serialized_position['arguments']):
        # WALIDACJA 10: Przy zapisie hasła xp musi otrzymać konkretną realizację (nie _).
        # niestety z hardcodami :(
        if check_and_mark_xp_must_have_type_rule(argument, serialized_argument):
            error = True
        # WALIDACJA 12: Niedopuszczone jest np, prepnp, ncp ani prepncp z przypadkiem postp.
        # niestety z hardcodami, wielkimi :(
        if check_and_mark_np_prepnp_cp_ncp_prepncp_cant_have_postp_rule(argument, serialized_argument):
            error = True
        # WALIDACJA 17: przypadek nadawany przez przyimek musi znajdować się wśród
        # interpretacji Morfeuszowych.
        if check_and_mark_prep_case_pair_must_be_in_morfeusz_rule(argument, serialized_argument):
            error = True
        # WALIDACJA 22: Schemat zawiera typy fraz nie mogace wystepowac dla danej czesci mowy
        if check_and_mark_phrase_types_must_be_accepted_by_pos_rule(lemma, argument, serialized_argument):
            error = True
        # WALIDACJA 23: Schemat zawiera atrybuty nie mogace wystepowac dla danej czesci mowy
        if check_and_mark_attributes_must_be_accepted_by_pos_rule(lemma, argument, serialized_argument):
            error = True
        # WALIDACJA 30: Argument musi posiadac liczbe atrybutow zgodna ze swoim modelem
        if check_and_mark_attributes_count_must_be_agreed_with_phrase_type_model_rule(argument, serialized_argument):
            error = True
        # WALIDACJA 22: Przyimek 'jako' może występowac jedynie z przypadkiem 'str'
        # niestety z hardcodami, wielkimi :(
        if check_and_mark_jako_prep_can_be_str_only_rule(argument, serialized_argument):
            error = True
    return error

def check_and_mark_xp_must_have_type_rule(argument, serialized_argument):
    error = False
    if argument.type == 'xp' and argument.atributes.filter(selection_mode__name='_').exists():
        serialized_argument['error'] = True
        serialized_argument['tooltip'] += u'Xp musi posiadać określoną kategorię.\n'
        error = True
    return error

def check_and_mark_np_prepnp_cp_ncp_prepncp_cant_have_postp_rule(argument, serialized_argument):
    error = False
    types_to_validate = ['np', 'prepnp', 'ncp', 'prepncp']
    if(argument.type in types_to_validate and
       argument.atributes.filter(type=u'PRZYPADEK', values__parameter__type__name=u'postp').exists()):
        serialized_argument['error'] = True
        serialized_argument['tooltip'] += u'Typ frazy nie może posiadać przypadka postp.\n'
        error = True
    return error

def check_and_mark_prep_case_pair_must_be_in_morfeusz_rule(argument, serialized_argument):
    error = False
    if not prep_check(argument.text_rep):
        serialized_argument['error'] = True
        serialized_argument['tooltip'] += u'Typ frazy nie uzgadnia przypadka z przyimkiem.\n'
        error = True
    return error

def check_and_mark_phrase_types_must_be_accepted_by_pos_rule(lemma, argument, serialized_argument):
    error = False
    argument_model = Argument_Model.objects.get(arg_model_name=argument.type)
    pos = lemma.entry_obj.pos
    if not argument_model.poss.filter(id=pos.id).exists():
        serialized_argument['error'] = True
        serialized_argument['tooltip'] += u'Typ frazy nie może wystąpić dla części mowy %s.\n' % pos.name
        error = True
    return error

def check_and_mark_attributes_must_be_accepted_by_pos_rule(lemma, argument, serialized_argument):
    error = False
    pos = lemma.entry_obj.pos
    if has_wrong_pos_values(argument, pos):
        serialized_argument['error'] = True
        serialized_argument['tooltip'] += u'Typ frazy posiada atrybuty nie mogące występować dla części mowy %s.\n' % pos.name
        error = True
    return error

def has_wrong_pos_values(argument, pos):
    for attr in argument.atributes.all():
        parameter_type_values = attr.values.filter(type__sym_name=u'parameter')
        for val in parameter_type_values.all():
            if not val.parameter.type.poss.filter(pk=pos.pk).exists():
                return True
    return False

def check_and_mark_attributes_count_must_be_agreed_with_phrase_type_model_rule(argument, serialized_argument):
    error = False
    attr_models = get_attribute_models(argument)
    if len(attr_models) != argument.atributes.count():
        serialized_argument['error'] = True
        serialized_argument['tooltip'] += u'Typ frazy ma niezgodną liczbę atrybutów ze swoim modelem.\n'
        error = True
    return error

def check_and_mark_jako_prep_can_be_str_only_rule(argument, serialized_argument):
    error = False
    if(argument.atributes.filter(type=u'PRZYIMEK', values__parameter__type__name=u'jako').exists() and
       not argument.atributes.filter(type=u'PRZYPADEK', values__parameter__type__name=u'str').exists()):
        serialized_argument['error'] = True
        serialized_argument['tooltip'] += u'Przyimek jako może występować jedynie z przypadkiem str.\n'
        error = True
    return error