uncertain_ppas.py 1.64 KB
#-*- coding:utf-8 -*-

import sys
from django.core.management.base import BaseCommand, CommandError
from common.util import debug
from dictionary.models import Lexeme, Vocabulary, LexemeAssociation

class Command(BaseCommand):
  args = 'none'
  help = 'blah'

  def handle(self, **options):
    list_verbs()

def get_derived(lexeme):
  derived = set()
  for pos in ('ger', 'pact', 'ppas'):
    if lexeme.refs_to.filter(type__symbol='ver' + pos, to_lexeme__deleted=False):
      derived.add(pos)
  return derived


def list_verbs():
  morfologik = Vocabulary.objects.get(id='Morfologik')
  SGJP = Vocabulary.objects.get(id='SGJP')
  morf = morfologik.owned_lexemes_pk()
  sgjp = SGJP.owned_lexemes_pk()
  existing = Lexeme.objects.filter(deleted=False)
  sgjp_verbs = existing.filter(pk__in=sgjp, part_of_speech__symbol='v')
  morf_verbs = existing.filter(pk__in=morf, part_of_speech__symbol='v')
  sgjp_entries = set(sgjp_verbs.values_list('entry', flat=True))
  morf_entries = set(morf_verbs.values_list('entry', flat=True))
  common_entries = sgjp_entries & morf_entries
  lexemes = morf_verbs.filter(entry__in=common_entries)
  for lexeme in lexemes:
    homonyms = existing.filter(
      pk__in=sgjp, entry=lexeme.entry, part_of_speech__symbol='v')
    for hom in homonyms:
      if set(lexeme.patterns.all()) == set(hom.patterns.all()):
        m_der = get_derived(lexeme)
        s_der = get_derived(hom)
        if (m_der.issuperset(s_der) and m_der - s_der == set(['ppas'])
            or s_der.issuperset(m_der) and s_der - m_der == set(['ppas'])):
          print lexeme.entry.encode('utf-8'),
          print lexeme.pk, hom.pk, 'morf' if 'ppas' in m_der else 'sgjp'