fix_surnames.py 2.3 KB
#-*- coding:utf-8 -*-

import sys
from django.core.management.base import BaseCommand, CommandError
from common.util import no_history, debug
from dictionary.models import Lexeme, Vocabulary, LexemeAssociation

class Command(BaseCommand):
  args = 'none'
  help = 'Fixes SGJP surnames which come from Morfologik as adjectives'

  def handle(self, **options):
    fix_surnames()

def fix_surnames():
  no_history()
  morfologik = Vocabulary.objects.get(id='Morfologik')
  SGJP = Vocabulary.objects.get(id='SGJP')
  morf = morfologik.owned_lexemes_pk()
  sgjp = SGJP.owned_lexemes_pk()
  existing = Lexeme.objects
  sgjp_subst = existing.filter( # jak odsiewam po SGJP, to nic nie zostaje...
    part_of_speech__symbol='subst',
    entry__regex=u'^[A-ZĄĆĘŁŃÓŚŻŹ]')
  morf_surnames = existing.filter(
    pk__in=morf, part_of_speech__symbol='adj', entry__regex=u'^[A-ZĄĆĘŁŃÓŚŻŹ]')
  subst_entries = set(sgjp_subst.values_list('entry', flat=True))
  surnames_entries = set(morf_surnames.values_list('entry', flat=True))
  entries = subst_entries & surnames_entries
  lexemes = morf_surnames.filter(entry__in=entries)
  for lexeme in lexemes:
    if lexeme.entry[-1] not in 'iy':
      debug(lexeme.entry, u'Nie jest nazwiskiem rodzaju męskiego')
      continue
    m = existing.filter(
      pk__in=sgjp, entry=lexeme.entry, part_of_speech__symbol='subst',
      lexemeinflectionpattern__inflection_characteristic__entry='m1')
    female = lexeme.entry[:-1] + 'a'
    f = existing.filter(
      pk__in=sgjp, entry=female, part_of_speech__symbol='subst',
      lexemeinflectionpattern__inflection_characteristic__entry='f')
    if m.count() == 0 or f.count() == 0:
      debug(lexeme.entry, u'Brak homonimu w SGJP')
    elif m.count() > 1 or f.count() > 1:
      debug(lexeme.entry, u'Niejednoznaczne homonimy w SGJP')
    else:
      m = m[0]
      f = f[0]
      if morfologik not in m.vocabularies.all():
        la = LexemeAssociation(lexeme=m, vocabulary=morfologik)
        la.save()
      else:
        debug(lexeme.entry, u'Już jest dopisany do Morfologika [m]')
      if morfologik not in f.vocabularies.all():
        la = LexemeAssociation(lexeme=f, vocabulary=morfologik)
        la.save()
      else:
        debug(lexeme.entry, u'Już jest dopisany do Morfologika [f]')
      lexeme.delete()
      debug(lexeme.entry, u'Wykonano')