uncertain_ppas.py
1.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#-*- coding:utf-8 -*-
from django.core.management.base import BaseCommand
from dictionary.models import Lexeme, Vocabulary
class Command(BaseCommand):
args = 'none'
help = 'blah'
def handle(self, **options):
list_verbs()
def get_derived(lexeme):
derived = set()
for pos in ('ger', 'pact', 'ppas'):
if lexeme.refs_to.filter(type__symbol='ver' + pos, to_lexeme__deleted=False):
derived.add(pos)
return derived
def list_verbs():
morfologik = Vocabulary.objects.get(id='Morfologik')
SGJP = Vocabulary.objects.get(id='SGJP')
morf = morfologik.owned_lexemes_pk()
sgjp = SGJP.owned_lexemes_pk()
existing = Lexeme.objects.filter(deleted=False)
sgjp_verbs = existing.filter(pk__in=sgjp, part_of_speech__symbol='v')
morf_verbs = existing.filter(pk__in=morf, part_of_speech__symbol='v')
sgjp_entries = set(sgjp_verbs.values_list('entry', flat=True))
morf_entries = set(morf_verbs.values_list('entry', flat=True))
common_entries = sgjp_entries & morf_entries
lexemes = morf_verbs.filter(entry__in=common_entries)
for lexeme in lexemes:
homonyms = existing.filter(
pk__in=sgjp, entry=lexeme.entry, part_of_speech__symbol='v')
for hom in homonyms:
if set(lexeme.patterns.all()) == set(hom.patterns.all()):
m_der = get_derived(lexeme)
s_der = get_derived(hom)
if (m_der.issuperset(s_der) and m_der - s_der == set(['ppas'])
or s_der.issuperset(m_der) and s_der - m_der == set(['ppas'])):
print lexeme.entry.encode('utf-8'),
print lexeme.pk, hom.pk, 'morf' if 'ppas' in m_der else 'sgjp'