uncertain_ppas.py 1.55 KB
#-*- coding:utf-8 -*-

from django.core.management.base import BaseCommand
from dictionary.models import Lexeme, Vocabulary


class Command(BaseCommand):
    args = 'none'
    help = 'blah'

    def handle(self, **options):
        list_verbs()


def get_derived(lexeme):
    derived = set()
    for pos in ('ger', 'pact', 'ppas'):
        if lexeme.refs_to.filter(type__symbol='ver' + pos):
            derived.add(pos)
    return derived


def list_verbs():
    morfologik = Vocabulary.objects.get(id='Morfologik')
    SGJP = Vocabulary.objects.get(id='SGJP')
    morf = morfologik.owned_lexemes_pk()
    sgjp = SGJP.owned_lexemes_pk()
    verbs = Lexeme.objects.filter(part_of_speech__symbol='v')
    sgjp_verbs = verbs.filter(pk__in=sgjp)
    morf_verbs = verbs.filter(pk__in=morf)
    sgjp_entries = set(sgjp_verbs.values_list('entry', flat=True))
    morf_entries = set(morf_verbs.values_list('entry', flat=True))
    common_entries = sgjp_entries & morf_entries
    lexemes = morf_verbs.filter(entry__in=common_entries)
    for lexeme in lexemes:
        homonyms = verbs.filter(pk__in=sgjp, entry=lexeme.entry)
        for hom in homonyms:
            if set(lexeme.patterns.all()) == set(hom.patterns.all()):
                m_der = get_derived(lexeme)
                s_der = get_derived(hom)
                if (m_der.issuperset(s_der) and m_der - s_der == {'ppas'}
                or s_der.issuperset(m_der) and s_der - m_der == {'ppas'}):
                    print lexeme.entry.encode('utf-8'),
                    print lexeme.pk, hom.pk, 'morf' if 'ppas' in m_der else 'sgjp'