fix_surnames.py
2.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#-*- coding:utf-8 -*-
import sys
from django.core.management.base import BaseCommand, CommandError
from common.util import no_history, debug
from dictionary.models import Lexeme, Vocabulary, LexemeAssociation
class Command(BaseCommand):
args = 'none'
help = 'Fixes SGJP surnames which come from Morfologik as adjectives'
def handle(self, **options):
fix_surnames()
def fix_surnames():
no_history()
morfologik = Vocabulary.objects.get(id='Morfologik')
SGJP = Vocabulary.objects.get(id='SGJP')
morf = morfologik.owned_lexemes_pk()
sgjp = SGJP.owned_lexemes_pk()
existing = Lexeme.objects.filter(deleted=False)
sgjp_subst = existing.filter( # jak odsiewam po SGJP, to nic nie zostaje...
part_of_speech__symbol='subst',
entry__regex=u'^[A-ZĄĆĘŁŃÓŚŻŹ]')
morf_surnames = existing.filter(
pk__in=morf, part_of_speech__symbol='adj', entry__regex=u'^[A-ZĄĆĘŁŃÓŚŻŹ]')
subst_entries = set(sgjp_subst.values_list('entry', flat=True))
surnames_entries = set(morf_surnames.values_list('entry', flat=True))
entries = subst_entries & surnames_entries
lexemes = morf_surnames.filter(entry__in=entries)
for lexeme in lexemes:
if lexeme.entry[-1] not in 'iy':
debug(lexeme.entry, u'Nie jest nazwiskiem rodzaju męskiego')
continue
m = existing.filter(
pk__in=sgjp, entry=lexeme.entry, part_of_speech__symbol='subst',
lexemeinflectionpattern__inflection_characteristic__entry='m1')
female = lexeme.entry[:-1] + 'a'
f = existing.filter(
pk__in=sgjp, entry=female, part_of_speech__symbol='subst',
lexemeinflectionpattern__inflection_characteristic__entry='f')
if m.count() == 0 or f.count() == 0:
debug(lexeme.entry, u'Brak homonimu w SGJP')
elif m.count() > 1 or f.count() > 1:
debug(lexeme.entry, u'Niejednoznaczne homonimy w SGJP')
else:
m = m[0]
f = f[0]
if morfologik not in m.vocabularies.all():
la = LexemeAssociation(lexeme=m, vocabulary=morfologik)
la.save()
else:
debug(lexeme.entry, u'Już jest dopisany do Morfologika [m]')
if morfologik not in f.vocabularies.all():
la = LexemeAssociation(lexeme=f, vocabulary=morfologik)
la.save()
else:
debug(lexeme.entry, u'Już jest dopisany do Morfologika [f]')
lexeme.delete()
debug(lexeme.entry, u'Wykonano')