fix_surnames.py 2.45 KB

Edit Raw Blame History

#-*- coding:utf-8 -*-

import sys
from django.core.management.base import BaseCommand, CommandError
from common.util import no_history, debug
from dictionary.models import Lexeme, Vocabulary


class Command(BaseCommand):
    args = 'none'
    help = 'Fixes SGJP surnames which come from Morfologik as adjectives'

    def handle(self, **options):
        fix_surnames()


def fix_surnames():
    no_history()
    morfologik = Vocabulary.objects.get(id='Morfologik')
    SGJP = Vocabulary.objects.get(id='SGJP')
    morf = morfologik.owned_lexemes_pk()
    sgjp = SGJP.owned_lexemes_pk()
    existing = Lexeme.objects
    sgjp_subst = existing.filter(# jak odsiewam po SGJP, to nic nie zostaje...
                                 part_of_speech__symbol='subst',
                                 entry__regex=u'^[A-ZĄĆĘŁŃÓŚŻŹ]')
    morf_surnames = existing.filter(
        pk__in=morf, part_of_speech__symbol='adj',
        entry__regex=u'^[A-ZĄĆĘŁŃÓŚŻŹ]')
    subst_entries = set(sgjp_subst.values_list('entry', flat=True))
    surnames_entries = set(morf_surnames.values_list('entry', flat=True))
    entries = subst_entries & surnames_entries
    lexemes = morf_surnames.filter(entry__in=entries)
    for lexeme in lexemes:
        if lexeme.entry[-1] not in 'iy':
            debug(lexeme.entry, u'Nie jest nazwiskiem rodzaju męskiego')
            continue
        m = existing.filter(
            pk__in=sgjp, entry=lexeme.entry, part_of_speech__symbol='subst',
            lexemeinflectionpattern__inflection_characteristic__symbol='m1')
        female = lexeme.entry[:-1] + 'a'
        f = existing.filter(
            pk__in=sgjp, entry=female, part_of_speech__symbol='subst',
            lexemeinflectionpattern__inflection_characteristic__symbol='f')
        if m.count() == 0 or f.count() == 0:
            debug(lexeme.entry, u'Brak homonimu w SGJP')
        elif m.count() > 1 or f.count() > 1:
            debug(lexeme.entry, u'Niejednoznaczne homonimy w SGJP')
        else:
            m = m[0]
            f = f[0]
            if morfologik not in m.vocabularies.all():
                morfologik.add_lexeme(m)
            else:
                debug(lexeme.entry, u'Już jest dopisany do Morfologika [m]')
            if morfologik not in f.vocabularies.all():
                morfologik.add_lexeme(f)
            else:
                debug(lexeme.entry, u'Już jest dopisany do Morfologika [f]')
            lexeme.delete()
            debug(lexeme.entry, u'Wykonano')