fix_homonym.py
1.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# -*- coding: utf-8 -*-
from django.core.management.base import BaseCommand
from django.db.models import Count
from common.util import no_history
from dictionary.management.commands.import_data import bulk_create
from dictionary.models import Lexeme, HomonymNumber
class Command(BaseCommand):
args = 'none'
help = 'fixes homonym numbers'
def handle(self, **options):
fix_homonym()
fix_homonym_morfeusz()
def fix_homonym():
no_history()
homonyms = (
Lexeme.objects.values('entry', 'part_of_speech')
.annotate(count=Count('pk')).filter(count__gt=1))
for homonym in homonyms:
lexemes = Lexeme.objects.filter(
entry=homonym['entry'],
part_of_speech=homonym['part_of_speech']).order_by('pk')
for i, lexeme in enumerate(lexemes, 1):
lexeme.homonym_number = i
lexeme.save()
def fix_homonym_morfeusz():
no_history()
HomonymNumber.objects.filter(variant_id='Morfeusz').delete()
hn_list = []
for pos_list in HomonymNumber.MORFEUSZ_LETTERS.itervalues():
lexemes = Lexeme.objects.filter(part_of_speech_id__in=pos_list)
homonym_entries = lexemes.values('entry').annotate(
count=Count('pk')).filter(count__gt=1)
for homonym_entry in homonym_entries:
homonyms = Lexeme.objects.filter(
part_of_speech_id__in=pos_list,
entry=homonym_entry['entry']).order_by('pk')
for i, lexeme in enumerate(homonyms, 1):
hn_list.append(HomonymNumber(
variant_id='Morfeusz', lexeme=lexeme, number=i))
bulk_create(HomonymNumber, hn_list)