uncertain_ppas.py
1.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# -*- coding: utf-8 -*-
from django.core.management.base import BaseCommand
from common.util import uniprint
from dictionary.models import Lexeme, Vocabulary
class Command(BaseCommand):
args = 'none'
help = 'blah'
def handle(self, **options):
list_verbs()
def get_derived(lexeme):
derived = set()
for pos in ('ger', 'pact', 'ppas'):
if lexeme.refs_to.filter(type__symbol='ver' + pos):
derived.add(pos)
return derived
def list_verbs():
morfologik = Vocabulary.objects.get(id='Morfologik')
SGJP = Vocabulary.objects.get(id='SGJP')
morf = morfologik.owned_lexemes_pk()
sgjp = SGJP.owned_lexemes_pk()
verbs = Lexeme.objects.filter(part_of_speech__symbol='v')
sgjp_verbs = verbs.filter(pk__in=sgjp)
morf_verbs = verbs.filter(pk__in=morf)
sgjp_entries = set(sgjp_verbs.values_list('entry', flat=True))
morf_entries = set(morf_verbs.values_list('entry', flat=True))
common_entries = sgjp_entries & morf_entries
lexemes = morf_verbs.filter(entry__in=common_entries)
for lexeme in lexemes:
homonyms = verbs.filter(pk__in=sgjp, entry=lexeme.entry)
for hom in homonyms:
if set(lexeme.patterns.all()) == set(hom.patterns.all()):
m_der = get_derived(lexeme)
s_der = get_derived(hom)
if (m_der.issuperset(s_der) and m_der - s_der == {'ppas'}
or s_der.issuperset(m_der) and s_der - m_der == {'ppas'}):
uniprint(
'%s %s %s %s' %
(lexeme.entry, lexeme.pk, hom.pk,
'morf' if 'ppas' in m_der else 'sgjp'))