check_text_reps.py 4.44 KB
# -*- coding:utf-8 -*-

import codecs
import os

from django.core.management.base import BaseCommand
from django.db.models import Count

from dictionary.common_func import frame_data_to_text_rep, \
    position_data_to_text_rep
from dictionary.models import Argument, Argument_Model, Frame, \
    Position, sortatributes, AttributeParameter
from settings import PROJECT_PATH

WRONG_PARAMETERS_PATH = os.path.join(PROJECT_PATH, 'data', 'wrong', 'wrong_parameters_po_reperacji_20180801.txt')
WRONG_ARGUMENTS_PATH = os.path.join(PROJECT_PATH, 'data', 'wrong', 'wrong_arguments_po_reperacji_20180801.txt')
WRONG_POSITIONS_PATH = os.path.join(PROJECT_PATH, 'data', 'wrong', 'wrong_positions_po_reperacji_20180801.txt')
WRONG_FRAMES_PATH = os.path.join(PROJECT_PATH, 'data', 'wrong', 'wrong_frames_po_reperacji_20180801.txt')

class Command(BaseCommand):
    args = 'none'
    help = 'Looking for wrong text_reps.'

    def handle(self, **options):
        check_attr_parameters()
        check_arguments_text_reps()
        check_positions_text_reps()
        check_frames_text_reps()


def check_attr_parameters():
    print 'Checking parameters.'
    # try:
    wrong_parameters_file = codecs.open(WRONG_PARAMETERS_PATH, 'wt', 'utf-8')
    for attr_param in AttributeParameter.objects.all():
        print attr_param
        possible_param_objs = AttributeParameter.objects.annotate(subparams_count=Count('subparameters')).filter(
            subparams_count=attr_param.subparameters.count())
        for subparam in attr_param.subparameters.all():
            possible_param_objs = possible_param_objs.filter(subparameters=subparam)
        # possible_param_objs = possible_param_objs.distinct()
        possible_param_objs = possible_param_objs.filter(type=attr_param.type)
        if possible_param_objs.count() > 1:
            wrong_parameters_file.write(u'%s' % unicode(attr_param))
            # finally:
    wrong_parameters_file.close()


def check_arguments_text_reps():
    print 'Checking arguments.'
    # try:
    wrong_arguments_file = codecs.open(WRONG_ARGUMENTS_PATH, 'wt', 'utf-8')
    for argument in Argument.objects.all():
        print argument
        proper_text_rep = arg_data_to_text_rep(argument)
        if proper_text_rep != argument.text_rep:
            wrong_arguments_file.write(u'%s --> proper: %s\n' % (argument.text_rep, proper_text_rep))
            # finally:
    wrong_arguments_file.close()


def check_positions_text_reps():
    print 'Checking positions.'
    # try:
    wrong_positions_file = codecs.open(WRONG_POSITIONS_PATH, 'wt', 'utf-8')
    for position in Position.objects.all():
        print position
        proper_text_rep = position_data_to_text_rep(position.categories, position.arguments)
        if proper_text_rep != position.text_rep:
            wrong_positions_file.write(u'%s --> proper: %s\n' % (position.text_rep, proper_text_rep))
            # finally:
    wrong_positions_file.close()


def check_frames_text_reps():
    print 'Checking frames.'
    # try:
    wrong_frames_file = codecs.open(WRONG_FRAMES_PATH, 'wt', 'utf-8')
    for frame in Frame.objects.all():
        print frame
        proper_text_rep = frame_data_to_text_rep(frame.characteristics, frame.positions)
        if proper_text_rep != frame.text_rep:
            wrong_frames_file.write(u'%s --> proper: %s\n' % (frame.text_rep, proper_text_rep))
            for lemma in frame.lemmas.all():
                if lemma.old:
                    wrong_frames_file.write(u'\t\told: %s\n' % (lemma.entry))
                else:
                    wrong_frames_file.write('u\t\tnew: %s\n' % (lemma.entry))
            for opinion in frame.opinions.all():
                wrong_frames_file.write(u'\t\topinion: %s\n' % (opinion.frame.text_rep))
            for example in frame.nkjp_examples.all():
                wrong_frames_file.write(u'\t\texample: %s\n' % (example.sentence))
                # finally:
    wrong_frames_file.close()


def arg_data_to_text_rep(argument):
    sorted_attributes = sortatributes(argument)
    arg_model = Argument_Model.objects.get(arg_model_name=argument.type)
    # sorted_attributes = sort_arg_attributes(arg_model, attributes)
    attributes_text_reps = [unicode(attr) for attr in sorted_attributes]
    if len(sorted_attributes) == 0:
        arg_text_rep = argument.type
    elif arg_model.hide_type:
        arg_text_rep = u'%s' % (','.join(attributes_text_reps))
    else:
        arg_text_rep = u'%s(%s)' % (argument.type, ','.join(attributes_text_reps))
    return arg_text_rep