From 7c0a78887cfb7c3f89ded4166495ad23085050cc Mon Sep 17 00:00:00 2001 From: bniton <bartek.niton@gmail.com> Date: Thu, 2 Aug 2018 11:43:59 +0200 Subject: [PATCH] Added new control types. --- dictionary/management/commands/add_new_control_types.py | 29 +++++++++++++++++++++++++++++ dictionary/management/commands/change_control2pred_control.py | 354 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ dictionary/management/commands/check_text_reps.py | 108 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ dictionary/models.py | 19 ++++++++++++------- 4 files changed, 503 insertions(+), 7 deletions(-) create mode 100644 dictionary/management/commands/add_new_control_types.py create mode 100644 dictionary/management/commands/change_control2pred_control.py create mode 100644 dictionary/management/commands/check_text_reps.py diff --git a/dictionary/management/commands/add_new_control_types.py b/dictionary/management/commands/add_new_control_types.py new file mode 100644 index 0000000..e7dfd12 --- /dev/null +++ b/dictionary/management/commands/add_new_control_types.py @@ -0,0 +1,29 @@ +# -*- coding:utf-8 -*- + +from django.core.management.base import BaseCommand + +from dictionary.models import POS, PositionCategory + + +class Command(BaseCommand): + + def handle(self, **options): + add_new_controll_types() + + +def add_new_controll_types(): + poss = POS.objects.all() + + pred_controller, xx = PositionCategory.objects.get_or_create(category='pred_controller', + control=True, + priority=70) + for pos in poss: + if pos.tag != 'noun': + pred_controller.poss.add(pos) + + pred_controllee, xx = PositionCategory.objects.get_or_create(category='pred_controllee', + control=True, + priority=80) + for pos in poss: + if pos.tag != 'noun': + pred_controllee.poss.add(pos) diff --git a/dictionary/management/commands/change_control2pred_control.py b/dictionary/management/commands/change_control2pred_control.py new file mode 100644 index 0000000..3fa3bd3 --- /dev/null +++ b/dictionary/management/commands/change_control2pred_control.py @@ -0,0 +1,354 @@ +# -*- coding:utf-8 -*- + +from django.contrib.auth.models import User +from django.core.management.base import BaseCommand + +from dictionary.models import Change, Frame, Frame_Opinion, Lemma, Position, PositionCategory, \ + get_or_create_nkjp_arg_selection, get_or_create_nkjp_example, get_ready_statuses, sortArguments, \ + sortFrameChars, sortPosCatsAsStrTab, sortPositions +from dictionary.saving import create_argument_ref, create_operation, create_phrase_type_ref, \ + reconnect_examples, update_connections +from semantics.models import LexicalUnitExamples + + +class Command(BaseCommand): + + def handle(self, **options): + self.print_schema4manual_change() + self.change_control2pred_control() + + def print_schema4manual_change(self): + ready_statuses = get_ready_statuses() + for lemma in Lemma.objects.filter(old=False, status__in=ready_statuses).order_by('entry_obj__name'): + for schema in lemma.frames.all(): + for position in schema.positions.all(): + if self.position_need_manual_change(position): + print (lemma, schema.pk) + break + + def position_need_manual_change(self, position): + if position.categories.filter(category='controllee').exists(): + for phrase_type in position.arguments.all(): + if self.phrase_type_is_infp(phrase_type): + return True + return False + + def phrase_type_is_infp(self, phrase_type): + if (phrase_type.type == 'infp'): + return True + if (phrase_type.type == 'lex' and + phrase_type.atributes.get(type='TYP FRAZY').values.all()[0].argument.type == 'infp'): + return True + return False + + def change_control2pred_control(self): + ready_statuses = get_ready_statuses() + for lemma in Lemma.objects.filter(old=False, status__in=ready_statuses).order_by('entry_obj__name'): + print (lemma) + changes = {'schemata': [], + 'schemata2change': []} + for schema in lemma.frames.all(): + if self.schema_need_auto_change(schema): + changes['schemata2change'].append(schema) + else: + changes['schemata'].append(schema) + + if changes['schemata2change']: + self.save_new_lemma_version(lemma, changes) + + def schema_need_auto_change(self, schema): + for position in schema.positions.all(): + if self.controllee_need_auto_change(position): + return True + return False + + def controllee_need_auto_change(self, position): + auto_change = False + if position.categories.filter(category__startswith='controllee').exists(): + auto_change = True + for phrase_type in position.arguments.all(): + if self.phrase_type_is_infp(phrase_type): + auto_change = False + return auto_change + + def save_new_lemma_version(self, old_lemma, changes): + admin_user = User.objects.get(username='bniton') + + old_lemma.old = True + old_lemma.save() + + # tworzenie nowej wersji hasla + new_lemma = Lemma(entry=old_lemma.entry_obj.name, + entry_obj=old_lemma.entry_obj, + owner=old_lemma.owner, + phraseologist=old_lemma.phraseologist, + semanticist=old_lemma.semanticist, + vocabulary=old_lemma.vocabulary, + status=old_lemma.status, + old=False, + frequency_1M=old_lemma.frequency_1M, + frequency_300M=old_lemma.frequency_300M) + new_lemma.save() + + # tworzenie zmiany do systemu kontroli zmian + if (old_lemma.owner): + lemma_change = Change(user=admin_user, entry=old_lemma, act_owner=old_lemma.owner) + lemma_change.save() + else: + lemma_change = Change(user=admin_user, entry=old_lemma) + lemma_change.save() + + # przepisywanie starych wersji dla kontroli zmian i dodanie nowej + for version in old_lemma.old_versions.all(): + new_lemma.old_versions.add(version) + new_lemma.old_versions.add(lemma_change) + + # przepisywanie historii zmian statusow + for status_change in old_lemma.status_history.all(): + new_lemma.status_history.add(status_change) + + # przepisywanie wiadomosci + for message in old_lemma.messages.all(): + new_lemma.messages.add(message) + + # przepisywanie starych ramek + for old_frame in old_lemma.old_frames.all(): + new_lemma.old_frames.add(old_frame) + + # przepisywanie ramek skladnicowych + for skladnica_frame in old_lemma.skladnica_frames.all(): + new_lemma.skladnica_frames.add(skladnica_frame) + + # przepisywanie ramek B + for B_frame in old_lemma.B_frames.all(): + new_lemma.B_frames.add(B_frame) + + # przepisywanie opinii o schematach + for schema_opinion in old_lemma.frame_opinions.all(): + new_lemma.frame_opinions.add(schema_opinion) + + # przepisywanie przykladow niepasujacych do zadnego schematu + for example in old_lemma.lemma_nkjp_examples.all(): + new_lemma.lemma_nkjp_examples.add(example) + + # dodawanie niezmienionych schematow + for schema in changes['schemata']: + new_lemma.frames.add(schema) + + # tworzenie nowych schematow i dolaczanie ich do czasownika + schemata_conversions = [] + for old_schema in changes['schemata2change']: + new_schema, positions_changes = self.get_or_create_new_schema(old_schema) + + # przepinanie opinii o ramce + try: + old_opinion = old_lemma.frame_opinions.get(frame=old_schema) + opinion_value = old_opinion.value + new_lemma.frame_opinions.remove(old_opinion) + try: + new_opinion = Frame_Opinion.objects.get(frame=new_schema, + value=opinion_value) + except Frame_Opinion.DoesNotExist: + new_opinion = Frame_Opinion(frame=new_schema, + value=opinion_value) + new_opinion.save() + new_lemma.frame_opinions.add(new_opinion) + except Frame_Opinion.DoesNotExist: + pass + + new_lemma.frames.add(new_schema) + schemata_conversions.append({'old_schema': old_schema, + 'new_schema': new_schema, + 'positions_changes': positions_changes}) + + # przepisywanie semantyki + sem_reconnect_operations = self.get_semantic_operations(new_lemma, schemata_conversions) + update_connections(new_lemma.id, sem_reconnect_operations, admin_user) + + # przepisywanie/dodawanie nowych przykladow do schematow + examples_operations = [] + for old_example in old_lemma.nkjp_examples.all(): + if new_lemma.frames.filter(pk=old_example.frame.pk).exists(): + new_lemma.nkjp_examples.add(old_example) + else: + conversion = (conv for conv in schemata_conversions if conv['old_schema'] == old_example.frame).next() + + argument_selections = [] + for old_arg_selection in old_example.arguments.all(): + + position_conversion = next((pos_conv for pos_conv in conversion['positions_changes'] + if pos_conv['from'].pk == old_arg_selection.position.pk), None) + + if position_conversion: + new_arg_selection, xx = get_or_create_nkjp_arg_selection(position_conversion['to'], + old_arg_selection.arguments.all()) + argument_selections.append(new_arg_selection) + else: + argument_selections.append(old_arg_selection) + + # sprawdzanie czy dany obiekt klasy NKJP_Example istnieje + new_example, xx = get_or_create_nkjp_example(conversion['new_schema'], argument_selections, + old_example.sentence, old_example.source, + old_example.comment, old_example.opinion, + old_example.approvers.all(), + old_example.approved, old_example.semantic) + new_lemma.nkjp_examples.add(new_example) + + # reconnect examples in semantic layer + for frame in new_lemma.entry_obj.visible_frames(): # czy actual ?? + for lu in frame.lexical_units.all(): + if LexicalUnitExamples.objects.filter(lexical_unit=lu, example=old_example).exists(): + examples_operations.append(self.disconnect_example_operation(lu, old_example)) + examples_operations.append(self.connect_example_operation(lu, new_example)) + + reconnect_examples(new_lemma, examples_operations) + + def get_or_create_new_schema(self, old_schema): + positions = [] + positions_changes = [] + for position in old_schema.positions.all(): + if self.position_need_manual_change(position): + new_position = self.get_or_create_new_position(position) + positions.append(new_position) + positions_changes.append({'from': position, 'to': new_position}) + else: + positions.append(position) + + sorted_positions = [] + sorted_positions_dict = sortPositions(positions) + for position_dict in sorted_positions_dict: + sorted_positions.append(position_dict['position']) + + sorted_positions_strs = [] + for position in sorted_positions: + sorted_positions_strs.append(position.text_rep) + + sorted_schema_chars = sortFrameChars(old_schema.characteristics.all()) + sorted_schema_chars_strs = [char.value.value for char in sorted_schema_chars] + + text_rep = u'%s:%s' % (':'.join(sorted_schema_chars_strs), + '+'.join(sorted_positions_strs)) + + try: + new_schema = Frame.objects.get(text_rep=text_rep) + except Frame.DoesNotExist: + new_schema = Frame(text_rep=text_rep) + new_schema.save() + + last_pos_obj = None + pos_obj_count = 0 + for pos_obj in sorted_positions: + same_pos_db = Position.objects.filter(text_rep=pos_obj.text_rep).order_by('id') + if not last_pos_obj or last_pos_obj.text_rep != pos_obj.text_rep: + pos_obj_count = 1 + new_schema.positions.add(same_pos_db[0]) + else: + pos_obj_count = pos_obj_count + 1 + if pos_obj_count <= len(same_pos_db): + same_pos_obj = same_pos_db[pos_obj_count - 1] + new_schema.positions.add(same_pos_obj) + else: + same_pos_obj = Position(text_rep=pos_obj.text_rep) + same_pos_obj.save() + for category in pos_obj.categories.all(): + same_pos_obj.categories.add(category) + for arg in pos_obj.arguments.all(): + same_pos_obj.arguments.add(arg) + new_schema.positions.add(same_pos_obj) + last_pos_obj = pos_obj + for schema_char in old_schema.characteristics.all(): + new_schema.characteristics.add(schema_char) + if new_schema.has_phraseologic_arguments(): + new_schema.phraseologic = True + new_schema.save() + + return new_schema, positions_changes + + def position_need_manual_change(self, position): + if position.categories.filter(category__in=['controllee', 'controller']).exists(): + return True + return False + + def get_or_create_new_position(self, old_position): + categories_strs = [] + for category in old_position.categories.all(): + if category.category == 'controllee': + categories_strs.append('pred_controllee') + elif category.category == 'controller': + categories_strs.append('pred_controller') + else: + categories_strs.append(category.category) + + sorted_categories_strs = sortPosCatsAsStrTab(categories_strs) + sorted_arguments = sortArguments(old_position.arguments.all()) + + args_strs = [] + for arg in sorted_arguments: + args_strs.append(arg.text_rep) + + pos_text_rep = '%s{%s}' % (','.join(sorted_categories_strs), ';'.join(args_strs)) + + try: + new_position = Position.objects.get(text_rep=pos_text_rep) + except Position.DoesNotExist: + new_position = Position(text_rep=pos_text_rep) + new_position.save() + + for category_name in sorted_categories_strs: + category = PositionCategory.objects.get(category=category_name) + new_position.categories.add(category) + + for arg in old_position.arguments.all(): + new_position.arguments.add(arg) + + return new_position + + def get_semantic_operations(self, lemma, schemata_conversions): + operations = [] + + frames = lemma.entry_obj.visible_frames() # czy actual?? + + for conv in schemata_conversions: + schema_operations = self.get_reconnect_operations(frames, conv) + operations.extend(schema_operations) + + return operations + + + def get_reconnect_operations(self, frames, conversion): + operations = [] + + for frame in frames: + for compl in frame.complements.all(): + arg_ref = create_argument_ref(frame, compl) + for rel in compl.realizations.all(): + schema_change = False + position_change = None + if rel.frame.pk == conversion['old_schema'].pk: + schema_change = True + for change in conversion['positions_changes']: + if change['from'].pk == rel.position.pk: + position_change = change + if schema_change: + old_phrase_type_ref = create_phrase_type_ref(rel.frame, rel.position, + rel.argument, rel.alternation) + if position_change: + new_phrase_type_ref = create_phrase_type_ref(conversion['new_schema'], + position_change['to'], + rel.argument, + rel.alternation) + else: + new_phrase_type_ref = create_phrase_type_ref(conversion['new_schema'], + rel.position, + rel.argument, + rel.alternation) + if new_phrase_type_ref != old_phrase_type_ref: + operations.append(create_operation('disconnect', arg_ref, old_phrase_type_ref)) + operations.append(create_operation('connect', arg_ref, new_phrase_type_ref)) + return operations + + def disconnect_example_operation(self, lu, example): + return {'operation': 'remove_example', 'unit': lu.id, 'example': example.id} + + def connect_example_operation(self, lu, example): + return {'operation': 'add_example', 'unit': lu.id, 'example': example.id} diff --git a/dictionary/management/commands/check_text_reps.py b/dictionary/management/commands/check_text_reps.py new file mode 100644 index 0000000..c2f41e9 --- /dev/null +++ b/dictionary/management/commands/check_text_reps.py @@ -0,0 +1,108 @@ +# -*- coding:utf-8 -*- + +import codecs +import os + +from django.core.management.base import BaseCommand +from django.db.models import Count + +from dictionary.common_func import frame_data_to_text_rep, \ + position_data_to_text_rep +from dictionary.models import Argument, Argument_Model, Frame, \ + Position, sortatributes, AttributeParameter +from settings import PROJECT_PATH + +WRONG_PARAMETERS_PATH = os.path.join(PROJECT_PATH, 'data', 'wrong', 'wrong_parameters_po_reperacji_20180801.txt') +WRONG_ARGUMENTS_PATH = os.path.join(PROJECT_PATH, 'data', 'wrong', 'wrong_arguments_po_reperacji_20180801.txt') +WRONG_POSITIONS_PATH = os.path.join(PROJECT_PATH, 'data', 'wrong', 'wrong_positions_po_reperacji_20180801.txt') +WRONG_FRAMES_PATH = os.path.join(PROJECT_PATH, 'data', 'wrong', 'wrong_frames_po_reperacji_20180801.txt') + +class Command(BaseCommand): + args = 'none' + help = 'Looking for wrong text_reps.' + + def handle(self, **options): + check_attr_parameters() + check_arguments_text_reps() + check_positions_text_reps() + check_frames_text_reps() + + +def check_attr_parameters(): + print 'Checking parameters.' + # try: + wrong_parameters_file = codecs.open(WRONG_PARAMETERS_PATH, 'wt', 'utf-8') + for attr_param in AttributeParameter.objects.all(): + print attr_param + possible_param_objs = AttributeParameter.objects.annotate(subparams_count=Count('subparameters')).filter( + subparams_count=attr_param.subparameters.count()) + for subparam in attr_param.subparameters.all(): + possible_param_objs = possible_param_objs.filter(subparameters=subparam) + # possible_param_objs = possible_param_objs.distinct() + possible_param_objs = possible_param_objs.filter(type=attr_param.type) + if possible_param_objs.count() > 1: + wrong_parameters_file.write(u'%s' % unicode(attr_param)) + # finally: + wrong_parameters_file.close() + + +def check_arguments_text_reps(): + print 'Checking arguments.' + # try: + wrong_arguments_file = codecs.open(WRONG_ARGUMENTS_PATH, 'wt', 'utf-8') + for argument in Argument.objects.all(): + print argument + proper_text_rep = arg_data_to_text_rep(argument) + if proper_text_rep != argument.text_rep: + wrong_arguments_file.write(u'%s --> proper: %s\n' % (argument.text_rep, proper_text_rep)) + # finally: + wrong_arguments_file.close() + + +def check_positions_text_reps(): + print 'Checking positions.' + # try: + wrong_positions_file = codecs.open(WRONG_POSITIONS_PATH, 'wt', 'utf-8') + for position in Position.objects.all(): + print position + proper_text_rep = position_data_to_text_rep(position.categories, position.arguments) + if proper_text_rep != position.text_rep: + wrong_positions_file.write(u'%s --> proper: %s\n' % (position.text_rep, proper_text_rep)) + # finally: + wrong_positions_file.close() + + +def check_frames_text_reps(): + print 'Checking frames.' + # try: + wrong_frames_file = codecs.open(WRONG_FRAMES_PATH, 'wt', 'utf-8') + for frame in Frame.objects.all(): + print frame + proper_text_rep = frame_data_to_text_rep(frame.characteristics, frame.positions) + if proper_text_rep != frame.text_rep: + wrong_frames_file.write(u'%s --> proper: %s\n' % (frame.text_rep, proper_text_rep)) + for lemma in frame.lemmas.all(): + if lemma.old: + wrong_frames_file.write(u'\t\told: %s\n' % (lemma.entry)) + else: + wrong_frames_file.write('u\t\tnew: %s\n' % (lemma.entry)) + for opinion in frame.opinions.all(): + wrong_frames_file.write(u'\t\topinion: %s\n' % (opinion.frame.text_rep)) + for example in frame.nkjp_examples.all(): + wrong_frames_file.write(u'\t\texample: %s\n' % (example.sentence)) + # finally: + wrong_frames_file.close() + + +def arg_data_to_text_rep(argument): + sorted_attributes = sortatributes(argument) + arg_model = Argument_Model.objects.get(arg_model_name=argument.type) + # sorted_attributes = sort_arg_attributes(arg_model, attributes) + attributes_text_reps = [unicode(attr) for attr in sorted_attributes] + if len(sorted_attributes) == 0: + arg_text_rep = argument.type + elif arg_model.hide_type: + arg_text_rep = u'%s' % (','.join(attributes_text_reps)) + else: + arg_text_rep = u'%s(%s)' % (argument.type, ','.join(attributes_text_reps)) + return arg_text_rep diff --git a/dictionary/models.py b/dictionary/models.py index fc83c48..902a41d 100644 --- a/dictionary/models.py +++ b/dictionary/models.py @@ -500,12 +500,13 @@ class NKJP_Example(Model): ) def get_or_create_nkjp_example(frame, arguments, sentence, source, - comment, opinion, approvers, approved): + comment, opinion, approvers, approved, semantic): created = False example = None possible_examples = NKJP_Example.objects.filter(frame=frame, sentence=sentence, source=source, comment=comment, - opinion=opinion, approved=approved) + opinion=opinion, approved=approved, + semantic=semantic) if possible_examples.exists(): for arg_sel in arguments: possible_examples = possible_examples.filter(arguments=arg_sel) @@ -521,27 +522,31 @@ def get_or_create_nkjp_example(frame, arguments, sentence, source, example = create_nkjp_example(frame=frame, arguments=arguments, sentence=sentence, source=source, comment=comment, opinion=opinion, - approvers=approvers, approved=approved) + approvers=approvers, approved=approved, + semantic=semantic) created = True else: example = create_nkjp_example(frame=frame, arguments=arguments, sentence=sentence, source=source, comment=comment, opinion=opinion, - approvers=approvers, approved=approved) + approvers=approvers, approved=approved, + semantic=semantic) created = True else: example = create_nkjp_example(frame=frame, arguments=arguments, sentence=sentence, source=source, comment=comment, opinion=opinion, - approvers=approvers, approved=approved) + approvers=approvers, approved=approved, + semantic=semantic) created = True return example, created def create_nkjp_example(frame, arguments, sentence, source, - comment, opinion, approvers, approved): + comment, opinion, approvers, approved, semantic): example = NKJP_Example(frame=frame, sentence=sentence, source=source, comment=comment, - opinion=opinion, approved=approved) + opinion=opinion, approved=approved, + semantic=semantic) example.save() example.arguments.add(*arguments) example.approvers.add(*approvers) -- libgit2 0.22.2