uncertain_ppas.py 1.44 KB
#-*- coding:utf-8 -*-

from django.core.management.base import BaseCommand
from dictionary.models import Lexeme, Vocabulary

class Command(BaseCommand):
  args = 'none'
  help = 'blah'

  def handle(self, **options):
    list_verbs()

def get_derived(lexeme):
  derived = set()
  for pos in ('ger', 'pact', 'ppas'):
    if lexeme.refs_to.filter(type__symbol='ver' + pos):
      derived.add(pos)
  return derived


def list_verbs():
  morfologik = Vocabulary.objects.get(id='Morfologik')
  SGJP = Vocabulary.objects.get(id='SGJP')
  morf = morfologik.owned_lexemes_pk()
  sgjp = SGJP.owned_lexemes_pk()
  verbs = Lexeme.objects.filter(part_of_speech__symbol='v')
  sgjp_verbs = verbs.filter(pk__in=sgjp)
  morf_verbs = verbs.filter(pk__in=morf)
  sgjp_entries = set(sgjp_verbs.values_list('entry', flat=True))
  morf_entries = set(morf_verbs.values_list('entry', flat=True))
  common_entries = sgjp_entries & morf_entries
  lexemes = morf_verbs.filter(entry__in=common_entries)
  for lexeme in lexemes:
    homonyms = verbs.filter(pk__in=sgjp, entry=lexeme.entry)
    for hom in homonyms:
      if set(lexeme.patterns.all()) == set(hom.patterns.all()):
        m_der = get_derived(lexeme)
        s_der = get_derived(hom)
        if (m_der.issuperset(s_der) and m_der - s_der == {'ppas'}
            or s_der.issuperset(m_der) and s_der - m_der == {'ppas'}):
          print lexeme.entry.encode('utf-8'),
          print lexeme.pk, hom.pk, 'morf' if 'ppas' in m_der else 'sgjp'