Commit 1d1dfd7aa6128d771ecab4ec2a22f6ae652468cd
1 parent
1761122b
skrypt do importu danych od Witka
Showing
1 changed file
with
62 additions
and
0 deletions
dictionary/management/commands/import_witek.py
0 → 100644
1 | +# -*- coding: utf-8 -*- | |
2 | +from django.core.management.base import BaseCommand | |
3 | +from django.db import transaction | |
4 | +from django.db.models import Max | |
5 | +from common.util import uniopen, no_history | |
6 | +from dictionary.models import Lexeme, Vocabulary, LexemeInflectionPattern, \ | |
7 | + Pattern, Qualifier, ClassificationValue, LexemeCV, Gender | |
8 | + | |
9 | + | |
10 | +class Command(BaseCommand): | |
11 | + help = "My shiny new management command." | |
12 | + | |
13 | + def handle(self, filename, comment, *args, **options): | |
14 | + import_lexemes(uniopen(filename), comment) | |
15 | + | |
16 | + | |
17 | +WSJP = Vocabulary.objects.get(id='WSJP') | |
18 | + | |
19 | +def import_lexemes(lines, comment): | |
20 | + transaction.commit_unless_managed() | |
21 | + transaction.enter_transaction_management() | |
22 | + transaction.managed() | |
23 | + no_history() | |
24 | + for line in lines: | |
25 | + elements = line.split(';') | |
26 | + if elements[1] == 'subst': | |
27 | + import_subst(elements, comment) | |
28 | + transaction.commit() | |
29 | + transaction.leave_transaction_management() | |
30 | + | |
31 | + | |
32 | +def import_subst(elements, comment): | |
33 | + try: | |
34 | + entry, pos, gender, pattern_data, commonness = elements | |
35 | + except ValueError: | |
36 | + print elements | |
37 | + raise | |
38 | + assert pos == 'subst' | |
39 | + gender = Gender.objects.get(symbol=gender) | |
40 | + lip_data = [p.rsplit(' ', 1) for p in pattern_data.split('/')] | |
41 | + next_id = Lexeme.all_objects.aggregate(Max('id'))['id__max'] + 1 | |
42 | + l = Lexeme( | |
43 | + id=next_id, entry=entry, part_of_speech_id='subst', | |
44 | + status=Lexeme.STATUS_DESCRIBED, owner_vocabulary_id=WSJP, | |
45 | + comment=comment) | |
46 | + l.fix_homonym_number() | |
47 | + l.save() | |
48 | + WSJP.add_lexeme(l) | |
49 | + comm_value = ClassificationValue.objects.get( | |
50 | + classification__name=u'pospolitość', label=commonness) | |
51 | + LexemeCV.objects.create(lexeme=l, classification_value=comm_value) | |
52 | + for i, ld in enumerate(lip_data): | |
53 | + lip = LexemeInflectionPattern(lexeme=l, index=i, gender=gender) | |
54 | + if len(ld) == 1: | |
55 | + pattern = ld[0] | |
56 | + qualifier = None | |
57 | + else: | |
58 | + qualifier, pattern = ld | |
59 | + lip.pattern = Pattern.objects.get(name=pattern) | |
60 | + lip.save() | |
61 | + if qualifier: | |
62 | + lip.qualifiers.add(Qualifier.objects.get(label=qualifier)) | |
0 | 63 | \ No newline at end of file |
... | ... |