import_skr.py
3.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# -*- coding: utf-8 -*-
from django.db import connection
from django.core.management.base import BaseCommand
from django.db.models import Max
from django.db.transaction import atomic
from common.util import no_history, get_cursor
from dictionary.models import Vocabulary, Qualifier, Lexeme, LexemeAttribute, LexemeInflectionPattern
from patterns.models import Pattern
class Command(BaseCommand):
args = '<input db filename>'
help = 'Imports initial data'
def handle(self, db_name, **options):
import_skr(db_name)
SKR_ATTRS = (
(u'dokładniej', 'dokładniej', True),
(u'jeszcze dokładniej', 'jeszcze_dokładniej', False),
(u'rozwinięcie', 'glossa', False),
(u'tłumaczenie', 'tlumaczenie', False),
(u'warianty', 'warianty', False),
)
@atomic
def import_skr(db_name):
cursor = connection.cursor()
sqlite_cursor = get_cursor(db_name)
no_history()
sgjp = Vocabulary.objects.get(id='SGJP')
zmiotki = Vocabulary.objects.get(id='zmiotki')
pattern = Pattern.objects.get(type__lexical_class_id='skr')
qual_dict = dict(Qualifier.objects.values_list('label', 'id'))
result = sqlite_cursor.execute(
'SELECT pochodzenie FROM skroty UNION SELECT kwalifikator FROM skroty')
for row in result:
for q_label in row[0].split(','):
q_label = q_label.strip()
if q_label != '' and q_label not in qual_dict:
q = Qualifier.objects.create(
vocabulary=sgjp, label=q_label, type=Qualifier.TYPE_SCOPE)
qual_dict[q_label] = q.id
attrs = {}
for attr_name, column, closed in SKR_ATTRS:
la, created = LexemeAttribute.objects.get_or_create(
name=attr_name, closed=closed)
la.parts_of_speech = ('skrl', 'skrw', 'skrf')
attrs[attr_name] = la
next_id = Lexeme.objects.aggregate(Max('id'))['id__max'] + 1
for row in sqlite_cursor.execute('SELECT * FROM skroty'):
entry = row['leksem']
good = row['status'][0] in '&+'
vocab = sgjp if good else zmiotki
status = Lexeme.STATUS_CONFIRMED if good else Lexeme.STATUS_CANDIDATE
pos = 'skr' + row['pos'][2]
quals = row['pochodzenie'].split(', ')
quals += row['kwalifikator'].split(',')
quals = [qual.strip() for qual in quals if qual.strip()]
valence = row['łączliwość']
source = row['źródło']
comment = row['komentarz1']
if row['motywacja']:
if comment:
comment += '\n' + row['motywacja']
else:
comment = row['motywacja']
l = Lexeme.objects.create(
entry=entry, part_of_speech_id=pos, valence=valence, source=source,
comment=comment, owner_vocabulary=vocab, status=status, id=next_id,
source_id=row['Identyfikator'])
next_id += 1
vocab.add_lexeme(l)
LexemeInflectionPattern.objects.create(
lexeme=l, pattern=pattern, index=1, root=entry)
for attr_name, column, closed in SKR_ATTRS:
data = row[column]
if attr_name == u'dokładniej':
if pos == 'skrl':
data = u'skrót leksemu, ' + data
elif pos == 'skrw':
data = u'skrót wyrażenia, ' + data
else: # pos == 'skrf'
data = u'skrót formy ' + data
attrs[attr_name].add_lexeme(l, data)
l.qualifiers = (qual_dict[qual] for qual in quals)
l.refresh_data()
cursor.close()
sqlite_cursor.close()