fix_morfologik.py 2.28 KB
#-*- coding:utf-8 -*-

import sys
from django.core.management.base import BaseCommand, CommandError
from common.util import no_history, debug
from dictionary.models import Lexeme, Pattern, Vocabulary, LexemeAssociation, \
  Qualifier, InflectionCharacteristic

class Command(BaseCommand):
  args = 'none'
  help = 'Fixes various issues with Morfologik import'

  def handle(self, **options):
    fix_morfologik()

morfologik = Vocabulary.objects.get(id='Morfologik')
morf = morfologik.owned_lexemes_pk()
existing = Lexeme.objects # usunięte są odsiewane automatycznie

def sgtant_qualifiers():
  sgtant = existing.filter(comment__contains='singulare tantum')
  q, created = Qualifier.objects.get_or_create(
    label='zwykle lp', vocabulary=morfologik)
  for l in sgtant:
    l.qualifiers.add(q) #add

def cand_ndm():
  for l in existing.filter(
      status='desc', lexemeinflectionpattern__pattern__name='0000'):
    l.status = 'cand'
    l.save()
  for l in existing.filter(
      status='desc', lexemeinflectionpattern__pattern__name='P00'):
    l.status = 'cand'
    l.save()
  for l in (existing.filter(
      status='desc', lexemeinflectionpattern__pattern__name='ndm')
            .exclude(part_of_speech__symbol='adv')):
    l.status = 'cand'
    l.save()

def fix_ow():
  owy = existing.filter(
    source='Morfologik', part_of_speech__symbol='subst',
    entry__regex='^[A-ZĄĆĘŁŃÓŚŻŹ].*ów$')
  m3 = InflectionCharacteristic.objects.get(
    entry='m3', part_of_speech__symbol='subst')
  for l in owy:
    for lip in l.lexemeinflectionpattern_set.all():
      lip.inflection_characteristic = m3
      lip.save()
  debug(u'ów', u'%s poprawionych' % owy.count())

# uwaga: nie sprawdza, czy po zmianie leksem nie jest identyczny z SGJP
def fix_stwo():
  stwo = existing.filter(
    entry__endswith='stwo', source='Morfologik',
    lexemeinflectionpattern__pattern__name='0173o')
  p1 = InflectionCharacteristic.objects.get(
    entry='p1', part_of_speech__symbol='subst')
  p0205 = Pattern.objects.get(name='0205')
  for l in stwo:
    for lip in l.lexemeinflectionpattern_set.all():
      lip.inflection_characteristic = p1
      lip.pattern = p0205
      lip.save()
  debug(u'stwo', u'%s poprawionych' % stwo.count())

def fix_morfologik():
  no_history()
  sgtant_qualifiers()
  cand_ndm()
  fix_ow()
  fix_stwo()