fix_morfologik.py 2.43 KB
#-*- coding:utf-8 -*-

import sys
from django.core.management.base import BaseCommand, CommandError
from common.util import no_history, debug
from dictionary.models import Lexeme, Pattern, Vocabulary, \
    Qualifier, InflectionCharacteristic


class Command(BaseCommand):
    args = 'none'
    help = 'Fixes various issues with Morfologik import'

    def handle(self, **options):
        fix_morfologik()


morfologik = Vocabulary.objects.get(id='Morfologik')
morf = morfologik.owned_lexemes_pk()
existing = Lexeme.objects # usunięte są odsiewane automatycznie


def sgtant_qualifiers():
    sgtant = existing.filter(comment__contains='singulare tantum')
    q, created = Qualifier.objects.get_or_create(
        label='zwykle lp', vocabulary=morfologik)
    for l in sgtant:
        l.qualifiers.add(q) #add


def cand_ndm():
    for l in existing.filter(
            status='desc', lexemeinflectionpattern__pattern__name='0000'):
        l.status = 'cand'
        l.save()
    for l in existing.filter(
            status='desc', lexemeinflectionpattern__pattern__name='P00'):
        l.status = 'cand'
        l.save()
    for l in (existing.filter(
            status='desc', lexemeinflectionpattern__pattern__name='ndm')
              .exclude(part_of_speech__symbol='adv')):
        l.status = 'cand'
        l.save()


def fix_ow():
    owy = existing.filter(
        source='Morfologik', part_of_speech__symbol='subst',
        entry__regex='^[A-ZĄĆĘŁŃÓŚŻŹ].*ów$')
    m3 = InflectionCharacteristic.objects.get(
        symbol='m3', part_of_speech__symbol='subst')
    for l in owy:
        for lip in l.lexemeinflectionpattern_set.all():
            lip.inflection_characteristic = m3
            lip.save()
    debug(u'ów', u'%s poprawionych' % owy.count())

# uwaga: nie sprawdza, czy po zmianie leksem nie jest identyczny z SGJP
def fix_stwo():
    stwo = existing.filter(
        entry__endswith='stwo', source='Morfologik',
        lexemeinflectionpattern__pattern__name='0173o')
    p1 = InflectionCharacteristic.objects.get(
        symbol='p1', part_of_speech__symbol='subst')
    p0205 = Pattern.objects.get(name='0205')
    for l in stwo:
        for lip in l.lexemeinflectionpattern_set.all():
            lip.inflection_characteristic = p1
            lip.pattern = p0205
            lip.save()
    debug(u'stwo', u'%s poprawionych' % stwo.count())


def fix_morfologik():
    no_history()
    sgtant_qualifiers()
    cand_ndm()
    fix_ow()
    fix_stwo()