import_skr.py 3.51 KB

Edit Raw Blame History

# -*- coding: utf-8 -*-

from django.db import connection
from django.core.management.base import BaseCommand
from django.db.models import Max
from django.db.transaction import atomic

from common.util import no_history, get_cursor
from dictionary.models import Vocabulary, Qualifier, Lexeme, LexemeAttribute,  LexemeInflectionPattern
from patterns.models import Pattern


class Command(BaseCommand):
    args = '<input db filename>'
    help = 'Imports initial data'

    def handle(self, db_name, **options):
        import_skr(db_name)

SKR_ATTRS = (
    (u'dokładniej', 'dokładniej', True),
    (u'jeszcze dokładniej', 'jeszcze_dokładniej', False),
    (u'rozwinięcie', 'glossa', False),
    (u'tłumaczenie', 'tlumaczenie', False),
    (u'warianty', 'warianty', False),
)

@atomic
def import_skr(db_name):
    cursor = connection.cursor()
    sqlite_cursor = get_cursor(db_name)
    no_history()

    sgjp = Vocabulary.objects.get(id='SGJP')
    zmiotki = Vocabulary.objects.get(id='zmiotki')
    pattern = Pattern.objects.get(type__lexical_class_id='skr')

    qual_dict = dict(Qualifier.objects.values_list('label', 'id'))
    result = sqlite_cursor.execute(
        'SELECT pochodzenie FROM skroty UNION SELECT kwalifikator FROM skroty')
    for row in result:
        for q_label in row[0].split(','):
            q_label = q_label.strip()
            if q_label != '' and q_label not in qual_dict:
                q = Qualifier.objects.create(
                    vocabulary=sgjp, label=q_label, type=Qualifier.TYPE_SCOPE)
                qual_dict[q_label] = q.id

    attrs = {}
    for attr_name, column, closed in SKR_ATTRS:
        la, created = LexemeAttribute.objects.get_or_create(
            name=attr_name, closed=closed)
        la.parts_of_speech = ('skrl', 'skrw', 'skrf')
        attrs[attr_name] = la
    next_id = Lexeme.objects.aggregate(Max('id'))['id__max'] + 1

    for row in sqlite_cursor.execute('SELECT * FROM skroty'):
        entry = row['leksem']
        good = row['status'][0] in '&+'
        vocab = sgjp if good else zmiotki
        status = Lexeme.STATUS_CONFIRMED if good else Lexeme.STATUS_CANDIDATE
        pos = 'skr' + row['pos'][2]
        quals = row['pochodzenie'].split(', ')
        quals += row['kwalifikator'].split(',')
        quals = [qual.strip() for qual in quals if qual.strip()]
        valence = row['łączliwość']
        source = row['źródło']
        comment = row['komentarz1']
        if row['motywacja']:
            if comment:
                comment += '\n' + row['motywacja']
            else:
                comment = row['motywacja']
        l = Lexeme.objects.create(
            entry=entry, part_of_speech_id=pos, valence=valence, source=source,
            comment=comment, owner_vocabulary=vocab, status=status, id=next_id,
            source_id=row['Identyfikator'])
        next_id += 1
        vocab.add_lexeme(l)
        LexemeInflectionPattern.objects.create(
            lexeme=l, pattern=pattern, index=1, root=entry)
        for attr_name, column, closed in SKR_ATTRS:
            data = row[column]
            if attr_name == u'dokładniej':
                if pos == 'skrl':
                    data = u'skrót leksemu, ' + data
                elif pos == 'skrw':
                    data = u'skrót wyrażenia, ' + data
                else: # pos == 'skrf'
                    data = u'skrót formy ' + data
            attrs[attr_name].add_lexeme(l, data)
        l.qualifiers = (qual_dict[qual] for qual in quals)
        l.refresh_data()

    cursor.close()
    sqlite_cursor.close()