wrong_derivatives.py 2.65 KB
# -*- coding: utf-8 -*-
from django.core.management.base import BaseCommand
from django.db.transaction import atomic

from common.util import uniprint, no_history
from dictionary.auto_derivatives import ppas_data, pact_data, create_derivative
from dictionary.models import Lexeme, LexemeAttributeValue, Ending, \
    LexemeForm
from patterns.models import Pattern


class Command(BaseCommand):
    help = "Convert derivatives so they don't use verb patterns"

    def handle(self, *args, **options):
        wrong_derivatives()

INTRANS = LexemeAttributeValue.objects.get(attribute__name=u'przechodniość', value='iT')
IMPROPER = LexemeAttributeValue.objects.get(attribute__name=u'właściwy', value='Q')
DK = LexemeAttributeValue.objects.get(attribute__name=u'aspekt', value='dk')
NDK = LexemeAttributeValue.objects.get(attribute__name=u'aspekt', value='ndk')

@atomic
def wrong_derivatives():
    no_history()
    verbs = Lexeme.objects.filter(part_of_speech='v', owner_vocabulary='SGJP')
    intrans = verbs.filter(lexemeattributevalue=INTRANS)
    improper = verbs.filter(lexemeattributevalue=IMPROPER)
    dk = verbs.filter(lexemeattributevalue=DK)
    ndk = verbs.filter(lexemeattributevalue=NDK)

    wrong_participles = set()
    for l in intrans | improper:
        for data in ppas_data(l):
            der = create_derivative(
                l, data['pos'], data['entry'], data['index'], pl=data['pl'])
            wrong_participles |= set((f, l.entry) for f in der.all_forms())
    for l in dk | improper:
        for data in pact_data(l):
            der = create_derivative(
                l, data['pos'], data['entry'], data['index'])
            wrong_participles |= set((f, l.entry) for f in der.all_forms())
        lips = list(l.lexemeinflectionpattern_set.all())
        for lip in lips:
            pattern = lip.pattern
            endings3 = Ending.objects.filter(
                pattern=pattern, base_form_label__symbol='3')
            for ending in endings3:
                wrong_participles.add((lip.root + ending.string + u'c', l.entry))
    for l in ndk | improper:
        lips = list(l.lexemeinflectionpattern_set.all())
        for lip in lips:
            pattern = lip.pattern
            endings6p = Ending.objects.filter(
                pattern=pattern, base_form_label__symbol="6'")
            for ending in endings6p:
                wrong_participles.add((lip.root + ending.string + u'szy', l.entry))

    existing_forms = set(LexemeForm.objects.values_list('form', flat=True))
    wrong_participles = sorted(
        (form, entry) for form, entry in wrong_participles if form not in existing_forms)
    for p in wrong_participles:
        uniprint('%s, %s' % p)