diff --git a/LICENSE b/LICENSE index a9c712a..3620dc1 100644 --- a/LICENSE +++ b/LICENSE @@ -1,7 +1,7 @@ Slowal, a web tool designed for creating, editing and browsing valence dictionaries. http://zil.ipipan.waw.pl/Slowal -Copyright (c) 2012-2016 by Institute of Computer Science, Polish Academy of Sciences (IPI PAN) +Copyright (c) 2012-2018 by Institute of Computer Science, Polish Academy of Sciences (IPI PAN) All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff --git a/dictionary/ajax_vocabulary_management.py b/dictionary/ajax_vocabulary_management.py index c5b57e1..726898d 100644 --- a/dictionary/ajax_vocabulary_management.py +++ b/dictionary/ajax_vocabulary_management.py @@ -121,7 +121,7 @@ def create_copyrights_str(dictionary_file, frame_opinions_pks, license_clause.extend([u"% http://zil.ipipan.waw.pl/Walenty", u"%% version: %s" % date.strftime(month + ' %d, %Y'), u"%", - u"% © Copyright 2012–2017 by the Institute of Computer Science, Polish", + u"% © Copyright 2012–2018 by the Institute of Computer Science, Polish", u"% Academy of Sciences (IPI PAN)", u"%", u"% This work is distributed under a CC BY-SA license:", diff --git a/dictionary/management/commands/add_new_control_types.py b/dictionary/management/commands/add_new_control_types.py new file mode 100644 index 0000000..e7dfd12 --- /dev/null +++ b/dictionary/management/commands/add_new_control_types.py @@ -0,0 +1,29 @@ +# -*- coding:utf-8 -*- + +from django.core.management.base import BaseCommand + +from dictionary.models import POS, PositionCategory + + +class Command(BaseCommand): + + def handle(self, **options): + add_new_controll_types() + + +def add_new_controll_types(): + poss = POS.objects.all() + + pred_controller, xx = PositionCategory.objects.get_or_create(category='pred_controller', + control=True, + priority=70) + for pos in poss: + if pos.tag != 'noun': + pred_controller.poss.add(pos) + + pred_controllee, xx = PositionCategory.objects.get_or_create(category='pred_controllee', + control=True, + priority=80) + for pos in poss: + if pos.tag != 'noun': + pred_controllee.poss.add(pos) diff --git a/dictionary/management/commands/change_control2pred_control.py b/dictionary/management/commands/change_control2pred_control.py new file mode 100644 index 0000000..3fa3bd3 --- /dev/null +++ b/dictionary/management/commands/change_control2pred_control.py @@ -0,0 +1,354 @@ +# -*- coding:utf-8 -*- + +from django.contrib.auth.models import User +from django.core.management.base import BaseCommand + +from dictionary.models import Change, Frame, Frame_Opinion, Lemma, Position, PositionCategory, \ + get_or_create_nkjp_arg_selection, get_or_create_nkjp_example, get_ready_statuses, sortArguments, \ + sortFrameChars, sortPosCatsAsStrTab, sortPositions +from dictionary.saving import create_argument_ref, create_operation, create_phrase_type_ref, \ + reconnect_examples, update_connections +from semantics.models import LexicalUnitExamples + + +class Command(BaseCommand): + + def handle(self, **options): + self.print_schema4manual_change() + self.change_control2pred_control() + + def print_schema4manual_change(self): + ready_statuses = get_ready_statuses() + for lemma in Lemma.objects.filter(old=False, status__in=ready_statuses).order_by('entry_obj__name'): + for schema in lemma.frames.all(): + for position in schema.positions.all(): + if self.position_need_manual_change(position): + print (lemma, schema.pk) + break + + def position_need_manual_change(self, position): + if position.categories.filter(category='controllee').exists(): + for phrase_type in position.arguments.all(): + if self.phrase_type_is_infp(phrase_type): + return True + return False + + def phrase_type_is_infp(self, phrase_type): + if (phrase_type.type == 'infp'): + return True + if (phrase_type.type == 'lex' and + phrase_type.atributes.get(type='TYP FRAZY').values.all()[0].argument.type == 'infp'): + return True + return False + + def change_control2pred_control(self): + ready_statuses = get_ready_statuses() + for lemma in Lemma.objects.filter(old=False, status__in=ready_statuses).order_by('entry_obj__name'): + print (lemma) + changes = {'schemata': [], + 'schemata2change': []} + for schema in lemma.frames.all(): + if self.schema_need_auto_change(schema): + changes['schemata2change'].append(schema) + else: + changes['schemata'].append(schema) + + if changes['schemata2change']: + self.save_new_lemma_version(lemma, changes) + + def schema_need_auto_change(self, schema): + for position in schema.positions.all(): + if self.controllee_need_auto_change(position): + return True + return False + + def controllee_need_auto_change(self, position): + auto_change = False + if position.categories.filter(category__startswith='controllee').exists(): + auto_change = True + for phrase_type in position.arguments.all(): + if self.phrase_type_is_infp(phrase_type): + auto_change = False + return auto_change + + def save_new_lemma_version(self, old_lemma, changes): + admin_user = User.objects.get(username='bniton') + + old_lemma.old = True + old_lemma.save() + + # tworzenie nowej wersji hasla + new_lemma = Lemma(entry=old_lemma.entry_obj.name, + entry_obj=old_lemma.entry_obj, + owner=old_lemma.owner, + phraseologist=old_lemma.phraseologist, + semanticist=old_lemma.semanticist, + vocabulary=old_lemma.vocabulary, + status=old_lemma.status, + old=False, + frequency_1M=old_lemma.frequency_1M, + frequency_300M=old_lemma.frequency_300M) + new_lemma.save() + + # tworzenie zmiany do systemu kontroli zmian + if (old_lemma.owner): + lemma_change = Change(user=admin_user, entry=old_lemma, act_owner=old_lemma.owner) + lemma_change.save() + else: + lemma_change = Change(user=admin_user, entry=old_lemma) + lemma_change.save() + + # przepisywanie starych wersji dla kontroli zmian i dodanie nowej + for version in old_lemma.old_versions.all(): + new_lemma.old_versions.add(version) + new_lemma.old_versions.add(lemma_change) + + # przepisywanie historii zmian statusow + for status_change in old_lemma.status_history.all(): + new_lemma.status_history.add(status_change) + + # przepisywanie wiadomosci + for message in old_lemma.messages.all(): + new_lemma.messages.add(message) + + # przepisywanie starych ramek + for old_frame in old_lemma.old_frames.all(): + new_lemma.old_frames.add(old_frame) + + # przepisywanie ramek skladnicowych + for skladnica_frame in old_lemma.skladnica_frames.all(): + new_lemma.skladnica_frames.add(skladnica_frame) + + # przepisywanie ramek B + for B_frame in old_lemma.B_frames.all(): + new_lemma.B_frames.add(B_frame) + + # przepisywanie opinii o schematach + for schema_opinion in old_lemma.frame_opinions.all(): + new_lemma.frame_opinions.add(schema_opinion) + + # przepisywanie przykladow niepasujacych do zadnego schematu + for example in old_lemma.lemma_nkjp_examples.all(): + new_lemma.lemma_nkjp_examples.add(example) + + # dodawanie niezmienionych schematow + for schema in changes['schemata']: + new_lemma.frames.add(schema) + + # tworzenie nowych schematow i dolaczanie ich do czasownika + schemata_conversions = [] + for old_schema in changes['schemata2change']: + new_schema, positions_changes = self.get_or_create_new_schema(old_schema) + + # przepinanie opinii o ramce + try: + old_opinion = old_lemma.frame_opinions.get(frame=old_schema) + opinion_value = old_opinion.value + new_lemma.frame_opinions.remove(old_opinion) + try: + new_opinion = Frame_Opinion.objects.get(frame=new_schema, + value=opinion_value) + except Frame_Opinion.DoesNotExist: + new_opinion = Frame_Opinion(frame=new_schema, + value=opinion_value) + new_opinion.save() + new_lemma.frame_opinions.add(new_opinion) + except Frame_Opinion.DoesNotExist: + pass + + new_lemma.frames.add(new_schema) + schemata_conversions.append({'old_schema': old_schema, + 'new_schema': new_schema, + 'positions_changes': positions_changes}) + + # przepisywanie semantyki + sem_reconnect_operations = self.get_semantic_operations(new_lemma, schemata_conversions) + update_connections(new_lemma.id, sem_reconnect_operations, admin_user) + + # przepisywanie/dodawanie nowych przykladow do schematow + examples_operations = [] + for old_example in old_lemma.nkjp_examples.all(): + if new_lemma.frames.filter(pk=old_example.frame.pk).exists(): + new_lemma.nkjp_examples.add(old_example) + else: + conversion = (conv for conv in schemata_conversions if conv['old_schema'] == old_example.frame).next() + + argument_selections = [] + for old_arg_selection in old_example.arguments.all(): + + position_conversion = next((pos_conv for pos_conv in conversion['positions_changes'] + if pos_conv['from'].pk == old_arg_selection.position.pk), None) + + if position_conversion: + new_arg_selection, xx = get_or_create_nkjp_arg_selection(position_conversion['to'], + old_arg_selection.arguments.all()) + argument_selections.append(new_arg_selection) + else: + argument_selections.append(old_arg_selection) + + # sprawdzanie czy dany obiekt klasy NKJP_Example istnieje + new_example, xx = get_or_create_nkjp_example(conversion['new_schema'], argument_selections, + old_example.sentence, old_example.source, + old_example.comment, old_example.opinion, + old_example.approvers.all(), + old_example.approved, old_example.semantic) + new_lemma.nkjp_examples.add(new_example) + + # reconnect examples in semantic layer + for frame in new_lemma.entry_obj.visible_frames(): # czy actual ?? + for lu in frame.lexical_units.all(): + if LexicalUnitExamples.objects.filter(lexical_unit=lu, example=old_example).exists(): + examples_operations.append(self.disconnect_example_operation(lu, old_example)) + examples_operations.append(self.connect_example_operation(lu, new_example)) + + reconnect_examples(new_lemma, examples_operations) + + def get_or_create_new_schema(self, old_schema): + positions = [] + positions_changes = [] + for position in old_schema.positions.all(): + if self.position_need_manual_change(position): + new_position = self.get_or_create_new_position(position) + positions.append(new_position) + positions_changes.append({'from': position, 'to': new_position}) + else: + positions.append(position) + + sorted_positions = [] + sorted_positions_dict = sortPositions(positions) + for position_dict in sorted_positions_dict: + sorted_positions.append(position_dict['position']) + + sorted_positions_strs = [] + for position in sorted_positions: + sorted_positions_strs.append(position.text_rep) + + sorted_schema_chars = sortFrameChars(old_schema.characteristics.all()) + sorted_schema_chars_strs = [char.value.value for char in sorted_schema_chars] + + text_rep = u'%s:%s' % (':'.join(sorted_schema_chars_strs), + '+'.join(sorted_positions_strs)) + + try: + new_schema = Frame.objects.get(text_rep=text_rep) + except Frame.DoesNotExist: + new_schema = Frame(text_rep=text_rep) + new_schema.save() + + last_pos_obj = None + pos_obj_count = 0 + for pos_obj in sorted_positions: + same_pos_db = Position.objects.filter(text_rep=pos_obj.text_rep).order_by('id') + if not last_pos_obj or last_pos_obj.text_rep != pos_obj.text_rep: + pos_obj_count = 1 + new_schema.positions.add(same_pos_db[0]) + else: + pos_obj_count = pos_obj_count + 1 + if pos_obj_count <= len(same_pos_db): + same_pos_obj = same_pos_db[pos_obj_count - 1] + new_schema.positions.add(same_pos_obj) + else: + same_pos_obj = Position(text_rep=pos_obj.text_rep) + same_pos_obj.save() + for category in pos_obj.categories.all(): + same_pos_obj.categories.add(category) + for arg in pos_obj.arguments.all(): + same_pos_obj.arguments.add(arg) + new_schema.positions.add(same_pos_obj) + last_pos_obj = pos_obj + for schema_char in old_schema.characteristics.all(): + new_schema.characteristics.add(schema_char) + if new_schema.has_phraseologic_arguments(): + new_schema.phraseologic = True + new_schema.save() + + return new_schema, positions_changes + + def position_need_manual_change(self, position): + if position.categories.filter(category__in=['controllee', 'controller']).exists(): + return True + return False + + def get_or_create_new_position(self, old_position): + categories_strs = [] + for category in old_position.categories.all(): + if category.category == 'controllee': + categories_strs.append('pred_controllee') + elif category.category == 'controller': + categories_strs.append('pred_controller') + else: + categories_strs.append(category.category) + + sorted_categories_strs = sortPosCatsAsStrTab(categories_strs) + sorted_arguments = sortArguments(old_position.arguments.all()) + + args_strs = [] + for arg in sorted_arguments: + args_strs.append(arg.text_rep) + + pos_text_rep = '%s{%s}' % (','.join(sorted_categories_strs), ';'.join(args_strs)) + + try: + new_position = Position.objects.get(text_rep=pos_text_rep) + except Position.DoesNotExist: + new_position = Position(text_rep=pos_text_rep) + new_position.save() + + for category_name in sorted_categories_strs: + category = PositionCategory.objects.get(category=category_name) + new_position.categories.add(category) + + for arg in old_position.arguments.all(): + new_position.arguments.add(arg) + + return new_position + + def get_semantic_operations(self, lemma, schemata_conversions): + operations = [] + + frames = lemma.entry_obj.visible_frames() # czy actual?? + + for conv in schemata_conversions: + schema_operations = self.get_reconnect_operations(frames, conv) + operations.extend(schema_operations) + + return operations + + + def get_reconnect_operations(self, frames, conversion): + operations = [] + + for frame in frames: + for compl in frame.complements.all(): + arg_ref = create_argument_ref(frame, compl) + for rel in compl.realizations.all(): + schema_change = False + position_change = None + if rel.frame.pk == conversion['old_schema'].pk: + schema_change = True + for change in conversion['positions_changes']: + if change['from'].pk == rel.position.pk: + position_change = change + if schema_change: + old_phrase_type_ref = create_phrase_type_ref(rel.frame, rel.position, + rel.argument, rel.alternation) + if position_change: + new_phrase_type_ref = create_phrase_type_ref(conversion['new_schema'], + position_change['to'], + rel.argument, + rel.alternation) + else: + new_phrase_type_ref = create_phrase_type_ref(conversion['new_schema'], + rel.position, + rel.argument, + rel.alternation) + if new_phrase_type_ref != old_phrase_type_ref: + operations.append(create_operation('disconnect', arg_ref, old_phrase_type_ref)) + operations.append(create_operation('connect', arg_ref, new_phrase_type_ref)) + return operations + + def disconnect_example_operation(self, lu, example): + return {'operation': 'remove_example', 'unit': lu.id, 'example': example.id} + + def connect_example_operation(self, lu, example): + return {'operation': 'add_example', 'unit': lu.id, 'example': example.id} diff --git a/dictionary/management/commands/check_text_reps.py b/dictionary/management/commands/check_text_reps.py new file mode 100644 index 0000000..c2f41e9 --- /dev/null +++ b/dictionary/management/commands/check_text_reps.py @@ -0,0 +1,108 @@ +# -*- coding:utf-8 -*- + +import codecs +import os + +from django.core.management.base import BaseCommand +from django.db.models import Count + +from dictionary.common_func import frame_data_to_text_rep, \ + position_data_to_text_rep +from dictionary.models import Argument, Argument_Model, Frame, \ + Position, sortatributes, AttributeParameter +from settings import PROJECT_PATH + +WRONG_PARAMETERS_PATH = os.path.join(PROJECT_PATH, 'data', 'wrong', 'wrong_parameters_po_reperacji_20180801.txt') +WRONG_ARGUMENTS_PATH = os.path.join(PROJECT_PATH, 'data', 'wrong', 'wrong_arguments_po_reperacji_20180801.txt') +WRONG_POSITIONS_PATH = os.path.join(PROJECT_PATH, 'data', 'wrong', 'wrong_positions_po_reperacji_20180801.txt') +WRONG_FRAMES_PATH = os.path.join(PROJECT_PATH, 'data', 'wrong', 'wrong_frames_po_reperacji_20180801.txt') + +class Command(BaseCommand): + args = 'none' + help = 'Looking for wrong text_reps.' + + def handle(self, **options): + check_attr_parameters() + check_arguments_text_reps() + check_positions_text_reps() + check_frames_text_reps() + + +def check_attr_parameters(): + print 'Checking parameters.' + # try: + wrong_parameters_file = codecs.open(WRONG_PARAMETERS_PATH, 'wt', 'utf-8') + for attr_param in AttributeParameter.objects.all(): + print attr_param + possible_param_objs = AttributeParameter.objects.annotate(subparams_count=Count('subparameters')).filter( + subparams_count=attr_param.subparameters.count()) + for subparam in attr_param.subparameters.all(): + possible_param_objs = possible_param_objs.filter(subparameters=subparam) + # possible_param_objs = possible_param_objs.distinct() + possible_param_objs = possible_param_objs.filter(type=attr_param.type) + if possible_param_objs.count() > 1: + wrong_parameters_file.write(u'%s' % unicode(attr_param)) + # finally: + wrong_parameters_file.close() + + +def check_arguments_text_reps(): + print 'Checking arguments.' + # try: + wrong_arguments_file = codecs.open(WRONG_ARGUMENTS_PATH, 'wt', 'utf-8') + for argument in Argument.objects.all(): + print argument + proper_text_rep = arg_data_to_text_rep(argument) + if proper_text_rep != argument.text_rep: + wrong_arguments_file.write(u'%s --> proper: %s\n' % (argument.text_rep, proper_text_rep)) + # finally: + wrong_arguments_file.close() + + +def check_positions_text_reps(): + print 'Checking positions.' + # try: + wrong_positions_file = codecs.open(WRONG_POSITIONS_PATH, 'wt', 'utf-8') + for position in Position.objects.all(): + print position + proper_text_rep = position_data_to_text_rep(position.categories, position.arguments) + if proper_text_rep != position.text_rep: + wrong_positions_file.write(u'%s --> proper: %s\n' % (position.text_rep, proper_text_rep)) + # finally: + wrong_positions_file.close() + + +def check_frames_text_reps(): + print 'Checking frames.' + # try: + wrong_frames_file = codecs.open(WRONG_FRAMES_PATH, 'wt', 'utf-8') + for frame in Frame.objects.all(): + print frame + proper_text_rep = frame_data_to_text_rep(frame.characteristics, frame.positions) + if proper_text_rep != frame.text_rep: + wrong_frames_file.write(u'%s --> proper: %s\n' % (frame.text_rep, proper_text_rep)) + for lemma in frame.lemmas.all(): + if lemma.old: + wrong_frames_file.write(u'\t\told: %s\n' % (lemma.entry)) + else: + wrong_frames_file.write('u\t\tnew: %s\n' % (lemma.entry)) + for opinion in frame.opinions.all(): + wrong_frames_file.write(u'\t\topinion: %s\n' % (opinion.frame.text_rep)) + for example in frame.nkjp_examples.all(): + wrong_frames_file.write(u'\t\texample: %s\n' % (example.sentence)) + # finally: + wrong_frames_file.close() + + +def arg_data_to_text_rep(argument): + sorted_attributes = sortatributes(argument) + arg_model = Argument_Model.objects.get(arg_model_name=argument.type) + # sorted_attributes = sort_arg_attributes(arg_model, attributes) + attributes_text_reps = [unicode(attr) for attr in sorted_attributes] + if len(sorted_attributes) == 0: + arg_text_rep = argument.type + elif arg_model.hide_type: + arg_text_rep = u'%s' % (','.join(attributes_text_reps)) + else: + arg_text_rep = u'%s(%s)' % (argument.type, ','.join(attributes_text_reps)) + return arg_text_rep diff --git a/dictionary/management/commands/get_stats_from.py b/dictionary/management/commands/get_stats_from.py index ea37bc6..e70a91d 100644 --- a/dictionary/management/commands/get_stats_from.py +++ b/dictionary/management/commands/get_stats_from.py @@ -11,7 +11,7 @@ from django.db.models import Count, Max from dictionary.models import get_ready_statuses -STARTDATE = datetime.datetime(2017, 1, 1, 00, 00) +STARTDATE = datetime.datetime(2016, 7, 1, 00, 00) class Command(BaseCommand): @@ -113,8 +113,6 @@ def get_stats(pos): stats_dict[u'frames_with_shared'] += visible_frames.count() stats_dict[u'sem_lemmas'] += 1 - - return stats_dict diff --git a/dictionary/models.py b/dictionary/models.py index 246a8aa..30d6d4b 100644 --- a/dictionary/models.py +++ b/dictionary/models.py @@ -501,12 +501,13 @@ class NKJP_Example(Model): ) def get_or_create_nkjp_example(frame, arguments, sentence, source, - comment, opinion, approvers, approved): + comment, opinion, approvers, approved, semantic): created = False example = None possible_examples = NKJP_Example.objects.filter(frame=frame, sentence=sentence, source=source, comment=comment, - opinion=opinion, approved=approved) + opinion=opinion, approved=approved, + semantic=semantic) if possible_examples.exists(): for arg_sel in arguments: possible_examples = possible_examples.filter(arguments=arg_sel) @@ -522,27 +523,31 @@ def get_or_create_nkjp_example(frame, arguments, sentence, source, example = create_nkjp_example(frame=frame, arguments=arguments, sentence=sentence, source=source, comment=comment, opinion=opinion, - approvers=approvers, approved=approved) + approvers=approvers, approved=approved, + semantic=semantic) created = True else: example = create_nkjp_example(frame=frame, arguments=arguments, sentence=sentence, source=source, comment=comment, opinion=opinion, - approvers=approvers, approved=approved) + approvers=approvers, approved=approved, + semantic=semantic) created = True else: example = create_nkjp_example(frame=frame, arguments=arguments, sentence=sentence, source=source, comment=comment, opinion=opinion, - approvers=approvers, approved=approved) + approvers=approvers, approved=approved, + semantic=semantic) created = True return example, created def create_nkjp_example(frame, arguments, sentence, source, - comment, opinion, approvers, approved): + comment, opinion, approvers, approved, semantic): example = NKJP_Example(frame=frame, sentence=sentence, source=source, comment=comment, - opinion=opinion, approved=approved) + opinion=opinion, approved=approved, + semantic=semantic) example.save() example.arguments.add(*arguments) example.approvers.add(*approvers) diff --git a/dictionary/teixml.py b/dictionary/teixml.py index 7307534..524ecc9 100644 --- a/dictionary/teixml.py +++ b/dictionary/teixml.py @@ -65,7 +65,7 @@ def write_license_elem(parent_elem): licence.attrib['target'] = u'http://creativecommons.org/licenses/by-sa/4.0/' p = etree.SubElement(licence, 'p') - p.text = u'(C) Copyright 2012–2017 by the Institute of Computer Science, Polish Academy of Sciences (IPI PAN)' + p.text = u'(C) Copyright 2012–2018 by the Institute of Computer Science, Polish Academy of Sciences (IPI PAN)' p = etree.SubElement(licence, 'p') p.text = u'This work is distributed under a CC BY-SA license: http://creativecommons.org/licenses/by-sa/4.0/' diff --git a/semantics/management/commands/adjectives_todo.py b/semantics/management/commands/adjectives_todo.py index 7b3a634..895f4f4 100644 --- a/semantics/management/commands/adjectives_todo.py +++ b/semantics/management/commands/adjectives_todo.py @@ -1,37 +1,34 @@ #! /usr/bin/python # -*- coding: utf-8 -*- -import sys, os, codecs - from django.core.management.base import BaseCommand from django.core.exceptions import ObjectDoesNotExist from dictionary.models import Entry, POS -from wordnet.models import LexicalUnit -from settings import PROJECT_PATH + + +REL_POS = 'noun' +REL_STATUS = '(S) sprawdzone' + class Command(BaseCommand): args = 'none' help = '' def handle(self, **options): - nouns_todo() + adj_todo() + -def nouns_todo(): +def adj_todo(): adj = POS.objects.get(tag='adj') - verb = POS.objects.get(tag='verb') entries = Entry.objects.filter(pos=adj).order_by('name') for entry in entries: try: - temp = entry.actual_lemma() + entry.actual_lemma() except ObjectDoesNotExist: continue - rel_entries = entry.rel_entries.filter(pos=verb) + + rel_entries = entry.rel_entries.filter(pos__tag=REL_POS) for rel_entry in rel_entries: - try: - temp = entry.actual_lemma() - except ObjectDoesNotExist: - continue - if rel_entry.actual_lemma().status.priority == 100: + if rel_entry.actual_lemma().status.status == REL_STATUS: print entry.name, ' ', entry.actual_lemma().status.status, '\t->\t', rel_entry.name, ' ', rel_entry.actual_lemma().status.status -