#-*- coding:utf-8 -*- import copy import itertools import operator from django.db.models import Sum, Q from common.js_to_obj import frameObjToSerializableDict from dictionary.common_func import subframe_exists from dictionary.convert_frames import frame_conversion from dictionary.forms import FrameAspectForm, FrameOpinionForm from dictionary.models import Argument_Model, AspectRelationsGroup, Frame_Char_Model, \ Frame_Characteristic, Lemma, Position, PositionCategory, \ get_attribute_models, sortArguments, sortPositions from settings import MORFEUSZ2 def get_wrong_aspect_frames(lemma, frames): wrong_aspect_frames = [] for frame in frames: if not check_aspect(lemma, frame): wrong_aspect_frames.append(frame) return wrong_aspect_frames def check_aspect(lemma, frame): good_aspect = False inf_present = False frame_aspect_obj = frame.characteristics.get(type=u'ASPEKT') frame_aspect = frame_aspect_obj.value.value interps = MORFEUSZ2.analyse(lemma.entry.encode('utf8')) if frame_aspect != '_': for interp in interps: tagstr = interp.getTag(MORFEUSZ2) tag_parts = tagstr.split(':') pos = tag_parts[0] if pos == 'inf': aspects = tag_parts[1].split('.') inf_present = True if frame_aspect in aspects: good_aspect = True break if good_aspect or not inf_present: good_aspect = True return good_aspect def get_missing_aspects(lemma): missing_aspects = [] aspect_model = Frame_Char_Model.objects.get(model_name=u'ASPEKT') any_aspect_val = aspect_model.frame_char_values.get(value='_') if not lemma.frames.filter(characteristics__value=any_aspect_val).exists(): possible_aspects = get_possible_aspects(lemma) for aspect in possible_aspects: if not lemma.frames.filter(characteristics__value__value=aspect).exists(): missing_aspects.append(aspect) return missing_aspects def get_missing_aspects_msg(lemma): message_content = '' missing_aspects = get_missing_aspects(lemma) if missing_aspects: message_content = u'W haśle brakuje schematów o aspekcie: %s.\n\n' % u', '.join(missing_aspects) return message_content def get_possible_aspects(lemma): possible_aspects = [] interps = MORFEUSZ2.analyse(lemma.entry.encode('utf8')) for interp in interps: tagstr = interp.getTag(MORFEUSZ2) tag_parts = tagstr.split(':') pos = tag_parts[0] if pos == 'inf': for aspect in tag_parts[1].split('.'): if aspect not in possible_aspects: possible_aspects.append(aspect) return possible_aspects def match_arg_poss(arg_poss, frame): poss_positions = [] for arg in arg_poss.possible_args.all(): if "_" in arg.argument.text_rep: regex = ur"^%s$" % arg.argument.text_rep.replace("_", ".+?").replace("(", "\(").replace(")", "\)").replace("[", "\[").replace("]", "\]") if arg.position_category: poss_positions.extend(frame.positions.filter(categories=arg.position_category, arguments__text_rep__regex=regex).distinct().all()) poss_positions.extend(frame.positions.filter(categories=arg.position_category, arguments__realizations__argument__text_rep__regex=regex).distinct().all()) else: poss_positions.extend(frame.positions.filter(arguments__text_rep__regex=regex). exclude(categories__control=False).distinct().all()) poss_positions.extend(frame.positions.filter(arguments__realizations__argument__text_rep__regex=regex). exclude(categories__control=False).distinct().all()) else: if arg.position_category: poss_positions.extend(frame.positions.filter(categories=arg.position_category, arguments=arg.argument).distinct().all()) poss_positions.extend(frame.positions.filter(categories=arg.position_category, arguments__realizations__argument=arg.argument).distinct().all()) else: poss_positions.extend(frame.positions.filter(arguments=arg.argument). exclude(categories__control=False).distinct().all()) poss_positions.extend(frame.positions.filter(arguments__realizations__argument=arg.argument). exclude(categories__control=False).distinct().all()) return poss_positions def check_sie(frame): sie = False if (frame.characteristics.filter(value__value=u'się', type=u'ZWROTNOŚĆ').exists() or frame.positions.filter(arguments__text_rep='refl').exists()): sie = True return sie def check_combinations(somelists, args_to_match): for element in itertools.product(*somelists): if not element: continue if (len(element) == len(set(element)) and len(set(element)) >= args_to_match): return True return False def validate_B_frames(lemma_obj): mismatched_b_frames = [] for b_frame in lemma_obj.B_frames.all(): match = False for frame in lemma_obj.frames.all(): not_this_frame = False somelists = [] if (not frame.characteristics.filter(id=b_frame.aspect.id).exists() or not frame.characteristics.filter(id=b_frame.negativity.id).exists()): continue if b_frame.reflex and not check_sie(frame): continue if frame.positions.count() < b_frame.arguments.count(): continue for b_arg in b_frame.arguments.all(): poss_positions = match_arg_poss(b_arg, frame) if len(poss_positions) == 0: not_this_frame = True break else: somelists.append(poss_positions) if not_this_frame: continue match = check_combinations(somelists, b_frame.arguments.count()) if match: break if not match: mismatched_b_frames.append(b_frame) return mismatched_b_frames ### KOORDYNACJA ##### def find_similar_frames(frames): frames_to_merge = [] if len(frames) > 1: combinations = itertools.combinations(frames, 2) for comb in combinations: if (comb[0].characteristics.get(type=u'ZWROTNOŚĆ') == comb[1].characteristics.get(type=u'ZWROTNOŚĆ') and comb[0].characteristics.get(type=u'ASPEKT') == comb[1].characteristics.get(type=u'ASPEKT') and comb[0].characteristics.get(type=u'NEGATYWNOŚĆ') == comb[1].characteristics.get(type=u'NEGATYWNOŚĆ') and comb[0].characteristics.get(type=u'PREDYKATYWNOŚĆ') == comb[1].characteristics.get(type=u'PREDYKATYWNOŚĆ')): occurrences = check_max_args_coor(comb[0], comb[1]) if occurrences >= 20: frames_to_merge.append({'frames': comb, 'occurrences': occurrences}) frames_to_merge = sorted(frames_to_merge, key=operator.itemgetter('occurrences'), reverse=True) return frames_to_merge def check_max_args_coor(frame1, frame2): max_occurr = 0 if frame1.positions.count() == frame2.positions.count(): frame1_poss_text_reps = [pos.text_rep for pos in frame1.positions.all()] frame2_poss_text_reps = [pos.text_rep for pos in frame2.positions.all()] pos_diff1 = frame1.positions.exclude(text_rep__in=frame2_poss_text_reps) pos_diff2 = frame2.positions.exclude(text_rep__in=frame1_poss_text_reps) if(pos_diff1.count() == 1 and pos_diff2.count() == 1 and pos_diff1.all()[0].categories.count() == pos_diff2.all()[0].categories.count() and (pos_diff1.all()[0].categories.all() | pos_diff2.all()[0].categories.all()).count() == pos_diff1.all()[0].categories.count()): for phrase_type1 in pos_diff1.all()[0].arguments.all(): for phrase_type2 in pos_diff2.all()[0].arguments.all(): matching_positions = Position.objects.filter(arguments=phrase_type1).filter(arguments=phrase_type2) occurr = matching_positions.aggregate(Sum('occurrences'))['occurrences__sum'] if occurr and occurr > max_occurr: max_occurr = occurr return max_occurr def check_frames_diff(frame1, frame2): occurr = 0 pos_diff1 = frame1.positions.exclude(pk__in=frame2.positions.all()) pos_diff2 = frame2.positions.exclude(pk__in=frame1.positions.all()) if(frame1.positions.count() == frame2.positions.count() and pos_diff1.count() == 1 and pos_diff2.count() == 1 and pos_diff1.all()[0].categories.count() == pos_diff2.all()[0].categories.count() and (pos_diff1.all()[0].categories.all() | pos_diff2.all()[0].categories.all()).count() == pos_diff1.all()[0].categories.count()): arguments_sum = (pos_diff1.all()[0].arguments.all() | pos_diff2.all()[0].arguments.all()) matching_positions = Position.objects.all() for arg in arguments_sum.distinct().all(): matching_positions = matching_positions.filter(arguments=arg) occurr = matching_positions.aggregate(Sum('occurrences'))['occurrences__sum'] if occurr: return occurr return occurr ###################### walidacja powiazanych hasel (nieczasownikowe) ####################################### def get_deriv_miss_frames_message(lemma): message_content = '' missing_frames = get_deriv_miss_frames(lemma) if missing_frames: message_content = u'Edytowane hasło nie posiada następujących schematów z powiązanych derywacyjnie haseł:\n' for miss_frame in missing_frames: message_content += u' %s (%s):\n' % (miss_frame['entry'].name, miss_frame['entry'].pos.name) for frame in miss_frame['frames']: message_content += u'\t- %s\n' % (frame.text_rep) message_content += '\n' return message_content def get_deriv_miss_frames(lemma): entry = lemma.entry_obj deriv_rel_lemmas = get_deriv_related_lemmas(entry) missing_frames = [] for deriv_rel_lemma in deriv_rel_lemmas: deriv_frames = get_deriv_frames_to_check(deriv_rel_lemma) for frame in deriv_frames: converted_frames = frame_conversion(frame, deriv_rel_lemma.entry_obj.pos, entry.pos) if (len(converted_frames) != 0 and not deriv_frame_exists(lemma.frames.all(), converted_frames)): add_missing_deriv_frame(missing_frames, frame, deriv_rel_lemma.entry_obj) return missing_frames def get_deriv_related_lemmas(entry): deriv_related_lemmas = [] for rel_entry in entry.rel_entries.order_by('name'): try: rel_lemma = Lemma.objects.get(entry_obj=rel_entry, old=False) deriv_related_lemmas.append(rel_lemma) except Lemma.DoesNotExist: pass return deriv_related_lemmas def get_deriv_frames_to_check(lemma): lemma_frames = lemma.frames.order_by('text_rep') frames = [frame for frame in lemma_frames if not frame.char_exists(u'ZWROTNOŚĆ', u'się')] if len(frames) == 0: frames = lemma_frames return frames def deriv_frame_exists(lemma_frames, converted_frames): frame_exists = False for conv_frame in converted_frames: if subframe_exists(lemma_frames, conv_frame): frame_exists = True break return frame_exists def add_missing_deriv_frame(missing_frames, frame, deriv_rel_entry): miss_frame = next((miss_frame for miss_frame in missing_frames if miss_frame['entry'] == deriv_rel_entry), None) if miss_frame: miss_frame['frames'].append(frame) else: miss_frame = {'entry': deriv_rel_entry, 'frames': [frame]} missing_frames.append(miss_frame) ######################## powiazane aspektowo hasla ######################### def get_aspect_rel_lemmas(lemma_obj): aspect_rel_lemmas = [] try: aspect_rel_members = AspectRelationsGroup.objects.get(members__name=lemma_obj.entry).members.exclude(name=lemma_obj.entry) q_aspect_rel_members = [] q_statuses = [Q(status__type__sym_name='checked'), Q(status__type__sym_name='ready'), Q(status__type__sym_name='edit_f'), Q(status__type__sym_name='checked_f'), Q(status__type__sym_name='ready_f'), Q(status__type__sym_name='edit_s'), Q(status__type__sym_name='checked_s'), Q(status__type__sym_name='ready_s')] for member in aspect_rel_members.all(): if member.name != lemma_obj.entry: q_aspect_rel_members.append(Q(entry=member.name)) aspect_rel_lemmas = Lemma.objects.filter(old=False).filter(reduce(operator.or_, q_statuses)).filter(reduce(operator.or_, q_aspect_rel_members)).all() except AspectRelationsGroup.DoesNotExist: pass return aspect_rel_lemmas def get_all_test_missing_frames(test_frames, aspect_rel_lemmas): missing_frames = [] for pattern_lemma in aspect_rel_lemmas: frames_to_add = get_miss_test_frames(test_frames, pattern_lemma.frames) if len(frames_to_add) > 0: frames_dict_ls = [] for frame in frames_to_add: frame_aspect = frame.characteristics.get(type=u'ASPEKT') if frame_aspect.value.default: opposite_aspect = frame_aspect else: opposite_aspect = Frame_Characteristic.objects.filter(type=u'ASPEKT').exclude(id=frame_aspect.id).exclude(value__default=True)[0] frame_opinion_val = pattern_lemma.frame_opinions.get(frame=frame).value frames_dict_ls.append({'frame' : frame, 'aspect_form' : FrameAspectForm(aspect_val=opposite_aspect), 'opinion_form': FrameOpinionForm(opinion_val=frame_opinion_val)}) missing_frames.append({'lemma' : pattern_lemma.entry, 'frames' : frames_dict_ls}) return missing_frames def get_miss_test_frames(test_frames, pattern_frames): missing_frames = [] for pattern_frame in pattern_frames.all(): match_found = False reflex_val = pattern_frame.characteristics.get(type=u'ZWROTNOŚĆ').value.value negativity_val = pattern_frame.characteristics.get(type=u'NEGATYWNOŚĆ').value.value pred_val = pattern_frame.characteristics.get(type=u'PREDYKATYWNOŚĆ').value.value for test_frame in test_frames: wrong_frame = False if(reflex_val == test_frame['characteristics'][list(Frame_Char_Model.objects.order_by('priority').values_list('model_name', flat=True)).index(u'ZWROTNOŚĆ')] and negativity_val == test_frame['characteristics'][list(Frame_Char_Model.objects.order_by('priority').values_list('model_name', flat=True)).index(u'NEGATYWNOŚĆ')] and pred_val == test_frame['characteristics'][list(Frame_Char_Model.objects.order_by('priority').values_list('model_name', flat=True)).index(u'PREDYKATYWNOŚĆ')]): for pattern_pos in pattern_frame.positions.all(): unmatched_poss = copy.deepcopy(test_frame['positions']) smallest_poss = None for position in unmatched_poss: test_args = set([arg['text_rep'] for arg in position['arguments']]) pattern_args = set([arg.text_rep for arg in pattern_pos.arguments.all()]) test_cats = set(position['categories']) pattern_cats = set([cat.category for cat in pattern_pos.categories.all()]) if len(pattern_args - test_args) == 0 and test_cats == pattern_cats: if (not smallest_poss or len(smallest_poss['arguments']) > len(position['arguments'])): smallest_poss = position if not smallest_poss: wrong_frame = True else: unmatched_poss.remove(smallest_poss) else: wrong_frame = True if not wrong_frame: match_found = True break if not match_found: missing_frames.append(pattern_frame) return missing_frames def prep_check(arg): # prepnp, prepncp, prepadjp, preplexnp #if arg.find('prep') != -1 and not arg.startswith('comprepnp('): if arg.startswith('prep'): params = arg[arg.find('(')+1:arg.find(')')].split(',') pform = params[0].split()[-1] pcase = params[1] # postp is used by prepadjp if pcase != 'postp': # str is used by prepadjp if pcase == 'str': pcase = ['nom', 'acc'] else: pcase = [pcase] for case in pcase: for interp in MORFEUSZ2.analyse(pform.encode('utf8')): tagstr = interp.getTag(MORFEUSZ2) tag_parts = tagstr.split(':') pos = tag_parts[0] if pos == 'prep' and case in tag_parts[1].split('.'): return True return False return True #################################### phraseology binded frames ############################ def validate_phraseology_binded_frames(lemma): msg_content = '' miss_frames = get_missing_binded_frames(lemma) if len(miss_frames) > 0: msg_content = create_miss_binded_frames_msg_content(miss_frames) return msg_content def get_missing_binded_frames(lemma): miss_frames = [] entry = lemma.entry_obj for frame_proposition in entry.phraseologic_propositions.all(): if not lemma.contains_frame_with_exact_positions(frame_proposition.positions.all()): miss_frames.append(frame_proposition) return miss_frames def create_miss_binded_frames_msg_content(missing_frames): message_content = u'Sprawdź, czy hasło nie powinno posiadać następujących schematów frazeologicznych:\n' for miss_frame in missing_frames: message_content += u'\t- %s\n' % (miss_frame.text_rep) message_content += '\n' return message_content ####################### same positions validation ####################### def validate_same_positions_schemata(lemma): msg_content = '' same_positions_schemata = get_same_positions_schemata(lemma) if len(same_positions_schemata) > 0: msg_content = same_positions_message(same_positions_schemata) return msg_content def get_same_positions_schemata(lemma): same_positions_schemata = [] for schema in lemma.frames.all(): for pos in schema.positions.all(): if schema.positions.filter(text_rep=pos.text_rep).count() > 1: same_positions_schemata.append(schema) break return same_positions_schemata def same_positions_message(same_positions_schemata): message_content = u'W następujących schematach występuje więcej niż jedna identyczna pozycja:\n' for schema in same_positions_schemata: message_content += u'\t- [%d] %s\n' % (schema.id, schema.text_rep) message_content += '\n' return message_content ####################### WALIDACJA ############################ def get_napprv_examples(lemma): nApprovedExamples = lemma.nkjp_examples.filter(source__confirmation_required=True, approved=False) return nApprovedExamples # WALIDACJA 5: Rama nie moze zawierac dwoch pozycji oznaczonych w taki sam sposob # (np. dwoch podmiotow). Chyba, ze jest to oznaczenie controllee. def validate_rule_5(frame): for pos_cat in PositionCategory.objects.exclude(category__startswith='controllee').all(): if frame.positions.filter(categories__category=pos_cat.category).count() > 1: return False return True def validate_examples_and_mark_errors(lemma, status_obj, selected_frame_id): error = False serialized_frames = [] for frame_obj in lemma.frames.all(): serialized_frame = frameObjToSerializableDict(lemma, frame_obj, True) if selected_frame_id and frame_obj.id != selected_frame_id: serialized_frames.append(serialized_frame) continue # WALIDACJA 16: czy wszystkie przyklady wlasne zostaly zatwierdzone if status_obj and status_obj.check_examples: if are_examples_approved(lemma, frame_obj, serialized_frame): error = True serialized_frames.append(serialized_frame) return serialized_frames, error # WALIDACJA 16: czy wszystkie przyklady wlasne zostaly zatwierdzone def are_examples_approved(lemma, schema, serialized_schema): error = False if get_napprv_examples(lemma).filter(frame=schema).exists(): serialized_schema['error'] = True serialized_schema['tooltip'] += u'Schemat posiada niezatwierdzone przykłady własne.\n' error = True return error ################### schemata validation for semantic statuses ########## def validate_schemata_for_semantics_and_mark_errors(lemma, status, selected_schema_id): error = False serialized_schemata = [] for schema_obj in lemma.frames.all(): serialized_schema = frameObjToSerializableDict(lemma, schema_obj, True) if selected_schema_id and schema_obj.id != selected_schema_id: serialized_schemata.append(serialized_schema) continue if check_schema_for_semantics_and_mark_errors(lemma, status, schema_obj, serialized_schema): error = True serialized_schemata.append(serialized_schema) return serialized_schemata, error def check_schema_for_semantics_and_mark_errors(lemma, status, schema, serialized_schema): error = False # WALIDACJA 34: Pod schematem z refl nie mogą być przykłady niepodpięte pod refl if check_and_mark_examples_must_use_refl_rule(lemma, schema, serialized_schema): error = True return error ###################### schemas validation ############################## def validate_schemas_and_mark_errors(lemma, status, selected_frame_id): error = False serialized_frames = [] for frame_obj in lemma.frames.all(): serialized_frame = frameObjToSerializableDict(lemma, frame_obj, True) if selected_frame_id and frame_obj.id != selected_frame_id: serialized_frames.append(serialized_frame) continue if check_schema_and_mark_errors(lemma, status, frame_obj, serialized_frame): error = True serialized_frames.append(serialized_frame) return serialized_frames, error def check_schema_and_mark_errors(lemma, status, schema, serialized_schema): error = False lemma_pos = lemma.entry_obj.pos if PositionCategory.objects.filter(poss=lemma_pos).exists(): # WALIDACJA 5: Schemat nie moze zawierac dwoch pozycji oznaczonych w taki sam sposob # (np. dwoch podmiotow). Chyba, ze jest to oznaczenie controllee. if check_and_mark_cant_have_same_functions_rule(schema, serialized_schema): error = True # WALIDACJA 7: Rama zawierajaca pozycje oznaczona jako controllee/controller musi jednoczesnie # zawierac inna pozycje oznaczona jako controller/controllee o tym samym indeksie. # z hardcodami niestety, do poprawy na bazie, kiedy tempo zmaleje :( if check_and_mark_controllee_require_controller_rule(schema, serialized_schema): error = True # WALIDACJA 19: Pozycje nie mogą być oznaczane jako controllee2 i controller2, # jeśli wcześniej nie wykorzystano oznaczeń controllee i controller if check_and_mark_controllee1_must_be_used_first_rule(schema, serialized_schema): error = True # WALIDACJA 23: Schemat posiadajacy argument refl nie moze posiadac pozycji obj if check_and_mark_refl_cant_be_in_schema_with_obj_rule(schema, serialized_schema): error = True # WALIDACJA 24: Schemat o sienności 'sie' nie moze posiadac pozycji obj if check_and_mark_sie_cant_be_in_schema_with_obj_rule(schema, serialized_schema): error = True # WALIDACJA 25: Schemat posiadajacy argument refl nie moze miec zwrotnosci sie if check_and_mark_sie_cant_be_in_schema_with_refl_rule(schema, serialized_schema): error = True # WALIDACJA 32: Schemat posiadajacy pozycje oznaczona jako obj musi posiadac inna pozycje oznaczona jako # subj if check_and_mark_obj_and_subj_positions_must_coexist_rule(schema, serialized_schema): error = True # WALIDACJA 13: Kazdy schemat musi posiadac wyznaczona ocene if check_and_mark_schema_must_have_opinion_rule(lemma, schema, serialized_schema): error = True # WALIDACJA 15: Haslo nie moze posiadac pustych schematow if check_and_mark_schemas_cant_be_empty_rule(schema, serialized_schema): error = True # WALIDACJA 16: czy wszystkie przyklady wlasne zostaly zatwierdzone if check_and_mark_examples_approved_rule(lemma, status, schema, serialized_schema): error = True # WALIDACJA 21: Kazdy schemat musi posiadac dowiazany co najmniej jeden przyklad if check_and_mark_schema_must_have_example_rule(lemma, schema, serialized_schema): error = True # WALIDACJA 29: Schemat nie moze posiadac nieaktywnych typow argumentow if check_and_mark_all_phrase_types_must_be_active_rule(schema, serialized_schema): error = True # WALIDACJA 33: Przynajmniej jeden podrzednik w schemacie musi byc niezleksykalizowany if check_and_mark_at_least_one_sec_elem_must_be_nlexicalized_rule(schema, serialized_schema): error = True # WALIDACJA 34: Pod ramką z refl nie mogą być przykłady niepodpięte pod refl if check_and_mark_examples_must_use_refl_rule(lemma, schema, serialized_schema): error = True # walidacja pozycji if check_positions_and_mark_errors(lemma, schema, serialized_schema): error = True return error def check_and_mark_cant_have_same_functions_rule(schema, serialized_schema): error = False for pos_cat in PositionCategory.objects.exclude(category__startswith='controllee').all(): if schema.positions.filter(categories__category=pos_cat.category).count() > 1: serialized_schema['error'] = True serialized_schema['tooltip'] += u'W schemacie występują co najmniej dwie pozycje oznaczone w ten sam sposób (reguła nie obejmuje oznaczenia controllee).\n' error = True break return error def check_and_mark_controllee_require_controller_rule(schema, serialized_schema): error = False if((schema.positions.filter(categories__category=u'controllee').exists() and not schema.positions.filter(categories__category=u'controller').exists()) or (schema.positions.filter(categories__category=u'controllee2').exists() and not schema.positions.filter(categories__category=u'controller2').exists()) or (schema.positions.filter(categories__category=u'controller').exists() and not schema.positions.filter(categories__category=u'controllee').exists()) or (schema.positions.filter(categories__category=u'controller2').exists() and not schema.positions.filter(categories__category=u'controllee2').exists())): serialized_schema['error'] = True serialized_schema['tooltip'] += u'Schemat zawierający pozycję oznaczoną jako controllee/controller musi jednocześnie zawierać inną pozycję oznaczoną jako controller/controllee o tym samym indeksie.\n' error = True return error def check_and_mark_controllee1_must_be_used_first_rule(schema, serialized_schema): error = False if((schema.positions.filter(categories__category=u'controller2').exists() or schema.positions.filter(categories__category=u'controllee2').exists()) and not (schema.positions.filter(categories__category=u'controller').exists() or schema.positions.filter(categories__category=u'controllee').exists())): serialized_schema['error'] = True serialized_schema['tooltip'] += u'Schemat posiada pozycję oznaczoną jako controller2/controllee2, ale nie posiada pozycji oznaczonej jako controller/controllee.\n' error = True return error def check_and_mark_refl_cant_be_in_schema_with_obj_rule(schema, serialized_schema): error = False if (schema.positions.filter(categories__category=u'obj').exists() and schema.positions.filter(arguments__type=u'refl').exists()): serialized_schema['error'] = True serialized_schema['tooltip'] += u'Schemat zawierający typ frazy refl nie może posiadać pozycji oznaczonej jako obj.\n' error = True return error def check_and_mark_sie_cant_be_in_schema_with_obj_rule(schema, serialized_schema): error = False if (schema.positions.filter(categories__category=u'obj').exists() and schema.characteristics.filter(type=u'ZWROTNOŚĆ', value__value=u'się').exists()): serialized_schema['error'] = True serialized_schema['tooltip'] += u'Schemat o zwrotności "się" nie może posiadać pozycji oznaczonej jako obj.\n' error = True return error def check_and_mark_sie_cant_be_in_schema_with_refl_rule(schema, serialized_schema): error = False if(schema.positions.filter(arguments__type=u'refl').exists() and schema.characteristics.filter(type=u'ZWROTNOŚĆ', value__value=u'się').exists()): serialized_schema['error'] = True serialized_schema['tooltip'] += u'Schemat o zwrotności "się" nie może zawierać typu frazy refl.\n' error = True return error def check_and_mark_obj_and_subj_positions_must_coexist_rule(schema, serialized_schema): error = False if(schema.positions.filter(categories__category=u'obj').exists() and not schema.positions.filter(categories__category=u'subj').exists()): serialized_schema['error'] = True serialized_schema['tooltip'] += u'Schemat posiadający pozycję oznaczoną jako obj musi posiadać pozycję oznaczoną jako subj.\n' error = True return error def check_and_mark_schema_must_have_opinion_rule(lemma, schema, serialized_schema): error = False if not lemma.frame_opinions.filter(frame=schema).exists(): serialized_schema['error'] = True serialized_schema['tooltip'] += u'Schemat nie posiada oceny.\n' error = True return error def check_and_mark_schemas_cant_be_empty_rule(schema, serialized_schema): error = False if schema.positions.count() == 0: serialized_schema['error'] = True serialized_schema['tooltip'] += u'Schemat jest pusty.\n' error = True return error def check_and_mark_examples_approved_rule(lemma, status, schema, serialized_schema): error = False if status and status.check_examples: if are_examples_approved(lemma, schema, serialized_schema): error = True return error def check_and_mark_schema_must_have_example_rule(lemma, schema, serialized_schema): error = False if not lemma.nkjp_examples.filter(frame=schema).exists(): serialized_schema['error'] = True serialized_schema['tooltip'] += u'Schemat nie posiada żadnego dowiązanego przykładu.\n' error = True return error def check_and_mark_all_phrase_types_must_be_active_rule(schema, serialized_schema): error = False if schema.has_inactive_arguments(): serialized_schema['error'] = True serialized_schema['tooltip'] += u'Schemat zawiera nieaktywne typy fraz.\n' error = True return error def check_and_mark_at_least_one_sec_elem_must_be_nlexicalized_rule(schema, serialized_schema): error = False if schema.is_fully_lexicalized(): serialized_schema['error'] = True serialized_schema['tooltip'] += u'Przynajmniej jeden podrzędnik w schemacie musi być niezleksykalizowany.\n' error = True return error def check_and_mark_examples_must_use_refl_rule(lemma, schema, serialized_schema): error = False if schema.positions.filter(arguments__text_rep='refl').exists(): schema_examples = lemma.nkjp_examples.filter(frame=schema) for example in schema_examples: if not example.pinned_to('refl'): serialized_schema['error'] = True serialized_schema['tooltip'] += u'Wszystkie przykłady dopięte do schematu muszą wykorzystywać typ frazy refl.\n' error = True break return error ###################### positions validation ############################## def check_positions_and_mark_errors(lemma, schema, serialized_schema): error = False for position, serialized_position in zip(sortPositions(schema.positions.all()), serialized_schema['positions']): position = position['position'] lemma_pos = lemma.entry_obj.pos if PositionCategory.objects.filter(poss=lemma_pos).exists(): # WALIDACJA 6: Pozycje controller o danym indeksie nie moga byc jednoczesnie oznaczone jako controllee o tym samym indeksie. # z hardcodami niestety, do poprawy na bazie, kiedy tempo zmaleje :( if check_and_mark_controller_not_controllee_rule(position, serialized_position): error = True # WALIDACJA 8: Przypadek pred nie może wystąpić w pozycji NIEBĘDĄCEJ jednocześnie jako controllee. # niestety z hardcodami :( if check_and_mark_pred_require_controllee_rule(position, serialized_position): error = True # WALIDACJA 9: Niedopuszczone jest refl w jednej pozycji z jakimkolwiek innym argumentem. # niestety z hardcodami :( if check_and_mark_refl_must_be_lonely_rule(position, serialized_position): error = True # WALIDACJA 20: Niedopuszczone jest "E" w jednej pozycji z jakimkolwiek innym argumentem i # w pozycji nie bedacej "subj" # niestety z hardcodami :( if check_and_mark_E_must_be_lonely_and_subj_rule(position, serialized_position): error = True # WALIDACJA 26: Argument z przypadkiem inst nie moze byc oznaczony jako subj if check_and_mark_inst_cant_be_subj_rule(position, serialized_position): error = True # WALIDACJA 27: Pozycja oznaczona jako subj musi posiadac taki sam przypadek we wszystkich argumentach if check_and_mark_subj_position_need_same_case_rule(position, serialized_position): error = True else: # WALIDACJA 28: Pozycja nie moze posiadac zadnej funkcji if check_and_mark_position_cant_have_function_rule(position, serialized_position): error = True # WALIDACJA 31: Jeśli xp(mod) jest jedynym typem frazy w pozycji nie powinien być oznaczony jako controllee. # niestety z hardcodami :( if check_and_mark_lonely_xp_mod_cant_be_controllee_rule(position, serialized_position): error = True # Walidacja typów fraz if check_arguments_and_mark_errors(lemma, position, serialized_position): error = True return error def check_and_mark_controller_not_controllee_rule(position, serialized_position): error = False if((position.categories.filter(category=u'controller').exists() and position.categories.filter(category=u'controllee').exists()) or (position.categories.filter(category=u'controller2').exists() and position.categories.filter(category=u'controllee2').exists())): serialized_position['error'] = True serialized_position['tooltip'] += u'Pozycja jest jednocześnie oznaczona jako controller i controllee o tym samym indeksie.\n' error = True return error def check_and_mark_pred_require_controllee_rule(position, serialized_position): error = False if (not position.categories.filter(category__startswith='controllee').exists() and position.arguments.filter(atributes__type=u'PRZYPADEK', atributes__values__parameter__type__name=u'pred').exists()): serialized_position['error'] = True serialized_position['tooltip'] += u'Pozycja posiada przypadek pred nie posiadając funkcji controllee.\n' error = True return error def check_and_mark_refl_must_be_lonely_rule(position, serialized_position): error = False if position.arguments.filter(type=u'refl').exists() and position.arguments.count() > 1: serialized_position['error'] = True serialized_position['tooltip'] += u'Pozycja posiada typ frazy refl występujący wraz z innymi typami fraz.\n' error = True return error def check_and_mark_E_must_be_lonely_and_subj_rule(position, serialized_position): error = False if(position.arguments.filter(type='E').exists() and (position.arguments.count() > 1 or not position.categories.filter(category='subj').exists())): serialized_position['error'] = True serialized_position['tooltip'] += u'Pozycja posiada typ frazy E wraz z innymi typami fraz i/lub nie jest oznaczona jako subj.\n' error = True return error def check_and_mark_inst_cant_be_subj_rule(position, serialized_position): error = False if (position.categories.filter(category=u'subj', control=False).exists() and position.arguments.filter(atributes__type=u'PRZYPADEK', atributes__values__parameter__type__name=u'inst').exists()): serialized_position['error'] = True serialized_position['tooltip'] += u'Pozycja oznaczona jako subj nie może zawierać przypadka inst.\n' error = True return error def check_and_mark_subj_position_need_same_case_rule(position, serialized_position): error = False if (position.categories.filter(category=u'subj', control=False).exists()): arguments_with_case = position.arguments.filter(atributes__type=u'PRZYPADEK').distinct() if arguments_with_case.exists(): case = arguments_with_case.all()[0].atributes.get(type=u'PRZYPADEK') for argument in arguments_with_case.all(): if argument.atributes.get(type=u'PRZYPADEK') != case: serialized_position['error'] = True serialized_position['tooltip'] += u'Pozycja oznaczona jako subj musi mieć jednakowy przypadek we wszystkich typach fraz.\n' error = True break return error def check_and_mark_position_cant_have_function_rule(position, serialized_position): error = False if position.categories.exists(): serialized_position['error'] = True serialized_position['tooltip'] += u'Pozycja nie może posiadać żadnej funkcji.\n' error = True return error def check_and_mark_lonely_xp_mod_cant_be_controllee_rule(position, serialized_position): error = False if(position.categories.filter(category__startswith=u'controllee').exists() and position.arguments.count() == 1 and position.arguments.filter(type=u'xp', atributes__selection_mode__name='mod').exists()): serialized_position['error'] = True serialized_position['tooltip'] += u'Typ frazy xp(mod) nie może występować samodzielnie w pozycji oznaczonej jako controllee.\n' error = True return error ###################### arguments validation ############################## def check_arguments_and_mark_errors(lemma, position, serialized_position): error = False for argument, serialized_argument in zip(sortArguments(position.arguments.all()), serialized_position['arguments']): # WALIDACJA 10: Przy zapisie hasła xp musi otrzymać konkretną realizację (nie _). # niestety z hardcodami :( if check_and_mark_xp_must_have_type_rule(argument, serialized_argument): error = True # WALIDACJA 12: Niedopuszczone jest np, prepnp, ncp ani prepncp z przypadkiem postp. # niestety z hardcodami, wielkimi :( if check_and_mark_np_prepnp_cp_ncp_prepncp_cant_have_postp_rule(argument, serialized_argument): error = True # WALIDACJA 17: przypadek nadawany przez przyimek musi znajdować się wśród # interpretacji Morfeuszowych. if check_and_mark_prep_case_pair_must_be_in_morfeusz_rule(argument, serialized_argument): error = True # WALIDACJA 22: Schemat zawiera typy fraz nie mogace wystepowac dla danej czesci mowy if check_and_mark_phrase_types_must_be_accepted_by_pos_rule(lemma, argument, serialized_argument): error = True # WALIDACJA 23: Schemat zawiera atrybuty nie mogace wystepowac dla danej czesci mowy if check_and_mark_attributes_must_be_accepted_by_pos_rule(lemma, argument, serialized_argument): error = True # WALIDACJA 30: Argument musi posiadac liczbe atrybutow zgodna ze swoim modelem if check_and_mark_attributes_count_must_be_agreed_with_phrase_type_model_rule(argument, serialized_argument): error = True # WALIDACJA 22: Przyimek 'jako' może występowac jedynie z przypadkiem 'str' # niestety z hardcodami, wielkimi :( if check_and_mark_jako_prep_can_be_str_only_rule(argument, serialized_argument): error = True return error def check_and_mark_xp_must_have_type_rule(argument, serialized_argument): error = False if argument.type == 'xp' and argument.atributes.filter(selection_mode__name='_').exists(): serialized_argument['error'] = True serialized_argument['tooltip'] += u'Xp musi posiadać określoną kategorię.\n' error = True return error def check_and_mark_np_prepnp_cp_ncp_prepncp_cant_have_postp_rule(argument, serialized_argument): error = False types_to_validate = ['np', 'prepnp', 'ncp', 'prepncp'] if(argument.type in types_to_validate and argument.atributes.filter(type=u'PRZYPADEK', values__parameter__type__name=u'postp').exists()): serialized_argument['error'] = True serialized_argument['tooltip'] += u'Typ frazy nie może posiadać przypadka postp.\n' error = True return error def check_and_mark_prep_case_pair_must_be_in_morfeusz_rule(argument, serialized_argument): error = False if not prep_check(argument.text_rep): serialized_argument['error'] = True serialized_argument['tooltip'] += u'Typ frazy nie uzgadnia przypadka z przyimkiem.\n' error = True return error def check_and_mark_phrase_types_must_be_accepted_by_pos_rule(lemma, argument, serialized_argument): error = False argument_model = Argument_Model.objects.get(arg_model_name=argument.type) pos = lemma.entry_obj.pos if not argument_model.poss.filter(id=pos.id).exists(): serialized_argument['error'] = True serialized_argument['tooltip'] += u'Typ frazy nie może wystąpić dla części mowy %s.\n' % pos.name error = True return error def check_and_mark_attributes_must_be_accepted_by_pos_rule(lemma, argument, serialized_argument): error = False pos = lemma.entry_obj.pos if has_wrong_pos_values(argument, pos): serialized_argument['error'] = True serialized_argument['tooltip'] += u'Typ frazy posiada atrybuty nie mogące występować dla części mowy %s.\n' % pos.name error = True return error def has_wrong_pos_values(argument, pos): for attr in argument.atributes.all(): parameter_type_values = attr.values.filter(type__sym_name=u'parameter') for val in parameter_type_values.all(): if not val.parameter.type.poss.filter(pk=pos.pk).exists(): return True return False def check_and_mark_attributes_count_must_be_agreed_with_phrase_type_model_rule(argument, serialized_argument): error = False attr_models = get_attribute_models(argument) if len(attr_models) != argument.atributes.count(): serialized_argument['error'] = True serialized_argument['tooltip'] += u'Typ frazy ma niezgodną liczbę atrybutów ze swoim modelem.\n' error = True return error def check_and_mark_jako_prep_can_be_str_only_rule(argument, serialized_argument): error = False if(argument.atributes.filter(type=u'PRZYIMEK', values__parameter__type__name=u'jako').exists() and not argument.atributes.filter(type=u'PRZYPADEK', values__parameter__type__name=u'str').exists()): serialized_argument['error'] = True serialized_argument['tooltip'] += u'Przyimek jako może występować jedynie z przypadkiem str.\n' error = True return error