wrong_derivatives.py
2.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# -*- coding: utf-8 -*-
from django.core.management.base import BaseCommand
from django.db.transaction import atomic
from common.util import uniprint, no_history
from dictionary.auto_derivatives import ppas_data, pact_data, create_derivative
from dictionary.models import Lexeme, LexemeAttributeValue, LexemeForm
from patterns.models import Ending
class Command(BaseCommand):
help = "Convert derivatives so they don't use verb patterns"
def handle(self, *args, **options):
wrong_derivatives()
INTRANS = LexemeAttributeValue.objects.get(attribute__name=u'przechodniość', value='iT')
IMPROPER = LexemeAttributeValue.objects.get(attribute__name=u'właściwy', value='Q')
DK = LexemeAttributeValue.objects.get(attribute__name=u'aspekt', value='dk')
NDK = LexemeAttributeValue.objects.get(attribute__name=u'aspekt', value='ndk')
@atomic
def wrong_derivatives():
no_history()
verbs = Lexeme.objects.filter(part_of_speech='v', owner_vocabulary='SGJP')
intrans = verbs.filter(lexemeattributevalue=INTRANS)
improper = verbs.filter(lexemeattributevalue=IMPROPER)
dk = verbs.filter(lexemeattributevalue=DK)
ndk = verbs.filter(lexemeattributevalue=NDK)
wrong_participles = set()
for l in intrans | improper:
for data in ppas_data(l):
der = create_derivative(
l, data['pos'], data['entry'], data['index'], pl=data['pl'])
wrong_participles |= set((f, l.entry) for f in der.all_forms())
for l in dk | improper:
for data in pact_data(l):
der = create_derivative(
l, data['pos'], data['entry'], data['index'])
wrong_participles |= set((f, l.entry) for f in der.all_forms())
lips = list(l.lexemeinflectionpattern_set.all())
for lip in lips:
pattern = lip.pattern
endings3 = Ending.objects.filter(
pattern=pattern, base_form_label__symbol='3')
for ending in endings3:
wrong_participles.add((lip.root + ending.string + u'c', l.entry))
for l in ndk | improper:
lips = list(l.lexemeinflectionpattern_set.all())
for lip in lips:
pattern = lip.pattern
endings6p = Ending.objects.filter(
pattern=pattern, base_form_label__symbol="6'")
for ending in endings6p:
wrong_participles.add((lip.root + ending.string + u'szy', l.entry))
existing_forms = set(LexemeForm.objects.values_list('form', flat=True))
wrong_participles = sorted(
(form, entry) for form, entry in wrong_participles if form not in existing_forms)
for p in wrong_participles:
uniprint('%s, %s' % p)