import_witek.py
2.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# -*- coding: utf-8 -*-
from django.core.management.base import BaseCommand
from django.db import transaction
from django.db.models import Max
from common.util import uniopen, no_history
from dictionary.models import Lexeme, Vocabulary, LexemeInflectionPattern, \
Pattern, Qualifier, ClassificationValue, LexemeCV, Gender
class Command(BaseCommand):
help = "My shiny new management command."
def handle(self, filename, comment, *args, **options):
import_lexemes(uniopen(filename), comment)
WSJP = Vocabulary.objects.get(id='WSJP')
def import_lexemes(lines, comment):
transaction.commit_unless_managed()
transaction.enter_transaction_management()
transaction.managed()
no_history()
for line in lines:
elements = line.split(';')
if elements[1] == 'subst':
import_subst(elements, comment)
transaction.commit()
transaction.leave_transaction_management()
def import_subst(elements, comment):
try:
entry, pos, gender, pattern_data, commonness = elements
except ValueError:
print elements
raise
assert pos == 'subst'
gender = Gender.objects.get(symbol=gender)
lip_data = [p.rsplit(' ', 1) for p in pattern_data.split('/')]
next_id = Lexeme.all_objects.aggregate(Max('id'))['id__max'] + 1
l = Lexeme(
id=next_id, entry=entry, part_of_speech_id='subst',
status=Lexeme.STATUS_DESCRIBED, owner_vocabulary_id=WSJP,
comment=comment)
l.fix_homonym_number()
l.save()
WSJP.add_lexeme(l)
comm_value = ClassificationValue.objects.get(
classification__name=u'pospolitość', label=commonness)
LexemeCV.objects.create(lexeme=l, classification_value=comm_value)
for i, ld in enumerate(lip_data):
lip = LexemeInflectionPattern(lexeme=l, index=i+1, gender=gender)
if len(ld) == 1:
pattern = ld[0]
qualifier = None
else:
qualifier, pattern = ld
lip.pattern = Pattern.objects.get(name=pattern)
lip.root = lip.get_root()
lip.save()
if qualifier:
lip.qualifiers.add(Qualifier.objects.get(label=qualifier))