fix_homonym.py 1.63 KB
#-*- coding:utf-8 -*-

from django.core.management.base import BaseCommand
from django.db.models import Count
from common.util import no_history
from dictionary.management.commands.import_data import bulk_create
from dictionary.models import Lexeme, HomonymNumber

class Command(BaseCommand):
    args = 'none'
    help = 'fixes homonym numbers'

    def handle(self, **options):
        fix_homonym()
        fix_homonym_morfeusz()


def fix_homonym():
    no_history()
    homonyms = (
        Lexeme.objects.values('entry', 'part_of_speech')
        .annotate(count=Count('pk')).filter(count__gt=1))
    for homonym in homonyms:
        lexemes = Lexeme.objects.filter(
            entry=homonym['entry'],
            part_of_speech=homonym['part_of_speech']).order_by('pk')
        for i, lexeme in enumerate(lexemes, 1):
            lexeme.homonym_number = i
            lexeme.save()

def fix_homonym_morfeusz():
    no_history()
    HomonymNumber.objects.filter(variant_id='Morfeusz').delete()
    hn_list = []
    for pos_list in HomonymNumber.MORFEUSZ_LETTERS.itervalues():
        lexemes = Lexeme.objects.filter(part_of_speech_id__in=pos_list)
        homonym_entries = lexemes.values('entry').annotate(
            count=Count('pk')).filter(count__gt=1)
        for homonym_entry in homonym_entries:
            homonyms = Lexeme.objects.filter(
                part_of_speech_id__in=pos_list,
                entry=homonym_entry['entry']).order_by('pk')
            for i, lexeme in enumerate(homonyms, 1):
                hn_list.append(HomonymNumber(
                    variant_id='Morfeusz', lexeme=lexeme, number=i))
    bulk_create(HomonymNumber, hn_list)