fix_morfologik.py
2.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#-*- coding:utf-8 -*-
from django.core.management.base import BaseCommand
from common.util import no_history, debug
from dictionary.models import Lexeme, Pattern, Vocabulary, \
Qualifier, InflectionCharacteristic
class Command(BaseCommand):
args = 'none'
help = 'Fixes various issues with Morfologik import'
def handle(self, **options):
fix_morfologik()
morfologik = Vocabulary.objects.get(id='Morfologik')
morf = morfologik.owned_lexemes_pk()
existing = Lexeme.objects # usunięte są odsiewane automatycznie
def sgtant_qualifiers():
sgtant = existing.filter(comment__contains='singulare tantum')
q, created = Qualifier.objects.get_or_create(
label='zwykle lp', vocabulary=morfologik)
for l in sgtant:
l.qualifiers.add(q) #add
def cand_ndm():
for l in existing.filter(
status='desc', lexemeinflectionpattern__pattern__name='0000'):
l.status = 'cand'
l.save()
for l in existing.filter(
status='desc', lexemeinflectionpattern__pattern__name='P00'):
l.status = 'cand'
l.save()
for l in (existing.filter(
status='desc', lexemeinflectionpattern__pattern__name='ndm')
.exclude(part_of_speech__symbol='adv')):
l.status = 'cand'
l.save()
def fix_ow():
owy = existing.filter(
source='Morfologik', part_of_speech__symbol='subst',
entry__regex='^[A-ZĄĆĘŁŃÓŚŻŹ].*ów$')
m3 = InflectionCharacteristic.objects.get(
symbol='m3', part_of_speech__symbol='subst')
for l in owy:
for lip in l.lexemeinflectionpattern_set.all():
lip.inflection_characteristic = m3
lip.save()
debug(u'ów', u'%s poprawionych' % owy.count())
# uwaga: nie sprawdza, czy po zmianie leksem nie jest identyczny z SGJP
def fix_stwo():
stwo = existing.filter(
entry__endswith='stwo', source='Morfologik',
lexemeinflectionpattern__pattern__name='0173o')
p1 = InflectionCharacteristic.objects.get(
symbol='p1', part_of_speech__symbol='subst')
p0205 = Pattern.objects.get(name='0205')
for l in stwo:
for lip in l.lexemeinflectionpattern_set.all():
lip.inflection_characteristic = p1
lip.pattern = p0205
lip.save()
debug(u'stwo', u'%s poprawionych' % stwo.count())
def fix_morfologik():
no_history()
sgtant_qualifiers()
cand_ndm()
fix_ow()
fix_stwo()