import_data.py 26.6 KB

Edit Raw Blame History

#-*- coding:utf-8 -*-

import sqlite3
import datetime
from django.db import connection, transaction
from django.core.management.base import BaseCommand

from common.util import no_history
from dictionary.models import *

DEFAULT_DATABASE = 'data/sgjp.db'

MINI_MODE = True # do debugowania
MINI_LEXEME_COUNT = 1000
MINI_LEXEME_QUERY = "SELECT %s FROM leksemy ORDER BY haslo COLLATE NOCASE LIMIT ?"

SQL_MODE = True

BATCH_SIZE = 5000

OTHER = 'inne'
DEFAULT_VOCAB = 'SGJP'

ATTRS = {
    u'zwrotność': (
        (('v', 'ger', 'pact'), None),
        (
            (u'—', u''),
            (u'się', u'się'),
            (u'(się)', u'(się)'),
            (u'sobie', u'sobie'),
            (u'(sobie)', u'(sobie)'),
            (u'się/sobie', u'się/sobie'),
        ),
        ('haslosuf', lambda suf: suf.strip(' ?') or u'—'),
    ),
    u'przechodniość': (
        (('v', 'pred'), None),
        (
            ('iT', u'nieprzechodni'),
            ('qT', u'quasi-przechodni'),
            ('T', u'przechodni'),
        ),
        ('przechodniosc', lambda x: x),
    ),
    u'aspekt': (
        (('v', 'pred', 'ger', 'pact', 'ppas', 'appas'), None),
        (
            ('dk', u'dk'),
            ('ndk', u'ndk'),
            ('ndk/dk', u'ndk/dk'),
            ('dk/ndk', u'dk/ndk'),
            ('ndk/(dk)', u'ndk/(dk)'),
            ('dk/(ndk)', u'dk/(ndk)'),
        ),
        ('aspekt', lambda x: x),
    ),
    u'właściwy': (
        (('v', 'pred'), None),
        (
            ('Q', u'niewłaściwy'),
            ('(Q)', u'właściwy/niewłaściwy'),
            ('', u'właściwy'),
        ),
        ('właściwy', lambda x: x),
    ),
    u'depr': (
        (('subst', 'skrs'), 'm1'),
        (
            ('n', u''),
            ('d', u''),
            ('nd', u''),
        ),
        ('depr', lambda x: x),
    )
}

# tymczasowa tabelka
BASIC_FORM_LABELS = {
    '0-': '1',
    '3+': '1',
    'f': 'sg:nom',
    'm1': 'sg:nom',
    'm2': 'sg:nom',
    'm3': 'sg:nom',
    'n1': 'sg:nom',
    'n2': 'sg:nom',
    'p1': 'pl:nom:mo',
    'p2': 'pl:nom',
    'p3': 'pl:nom',
    'pri': 'sg:nom', # albo pl. teraz już naprawdę się nie da.
    'sec': 'sg:nom',
}

# to chyba nie jest najlepsze rozwiązanie...
BASIC_FORM_LABELS_POS = {
    'v': '5',
    'ger': '11',
    'pact': '3',
    'ppas': '10',
    'appas': '10',
    'pred': '5',
}

TRANSLATE_CLASS = {
    'h': 'header',
    'c': 'header-c',
    'b': 'blank',
    'd': 'data',
}


class Command(BaseCommand):
    args = '<input db filename>'
    help = 'Imports initial data'

    def handle(self, db_name=DEFAULT_DATABASE, **options):
        ImportData(db_name).delete_and_import()


def get_cursor(db):
    conn = sqlite3.connect(db)
    conn.row_factory = sqlite3.Row
    return conn.cursor()


def bulk_create(model, objects):
    model.objects.bulk_create(objects, batch_size=BATCH_SIZE)


METHOD_NAMES = {
    CrossReference: 'import_cross_references',
    Ending: 'import_endings',
    LexemeInflectionPattern: 'import_lexeme_inflection_patterns',
    Lexeme: 'import_lexemes',
    PatternType: 'import_pattern_types',
    TableTemplate: 'import_tables',
    BaseFormLabel: 'new_base_form_labels',
    CrossReferenceType: 'new_cross_reference_types',
    InflectionCharacteristic: 'new_inflection_characteristics',
    LexemeAssociation: 'new_lexeme_associations',
    LexicalClass: 'new_lexical_classes',
    PartOfSpeech: 'new_parts_of_speech',
    Pattern: 'new_patterns',
    Qualifier: 'new_qualifiers',
    TableHeader: 'new_table_headers',
    Vocabulary: 'new_vocabularies',
}


class ImportData(object):
    def __init__(self, db):
        transaction.commit_unless_managed()
        transaction.enter_transaction_management()
        transaction.managed()
        self.cursor = connection.cursor()
        self.sqlite_cursor = get_cursor(db)
        no_history()

    def close(self):
        self.cursor.close()
        self.sqlite_cursor.close()
        transaction.commit()
        transaction.leave_transaction_management()


    def new_lexical_classes(self):
        if not LexicalClass.objects.filter(symbol=OTHER).exists():
            yield LexicalClass(symbol=OTHER)
        for row in self.sqlite_cursor.execute('select distinct pos from wzory'):
            if not LexicalClass.objects.filter(symbol=row['pos']).exists():
                yield LexicalClass(symbol=row['pos'])

    def cache_lc(self):
        if 'lc' not in self.__dict__:
            self.lc = dict((lc.symbol, lc) for lc in LexicalClass.objects.all())

    def new_parts_of_speech(self):
        lcs = {}
        for row in self.sqlite_cursor.execute(
                'select distinct wzory.pos, leksemy.pos from wzory '
                'natural join odmieniasie join leksemy on leksemy.nr = odmieniasie.nr'):
            lcs[row[1]] = row[0]

        for row in self.sqlite_cursor.execute('SELECT * FROM klasygramatyczne'):
            if not PartOfSpeech.objects.filter(symbol=row['pos']).exists():
                lc = lcs.get(row['pos'], OTHER)
                yield PartOfSpeech(
                    symbol=row['pos'],
                    full_name=row['nazwakl'],
                    index=row['klporz'],
                    color_scheme=row['posind'],
                    lexical_class=LexicalClass.objects.get(symbol=lc))

    def cache_pos(self):
        if 'pos' not in self.__dict__:
            self.pos = dict(
                (pos.symbol, pos) for pos in PartOfSpeech.objects.all())

    def cache_lc_pos(self):
        if 'lc_pos' not in self.__dict__:
            self.lc_pos = dict(
                (pos.symbol, pos.lexical_class) for pos in
                    PartOfSpeech.objects.all()
            )

    def new_base_form_labels(self):
        query_result = self.sqlite_cursor.execute("""
      SELECT efobaz FROM paradygmaty
      UNION
      SELECT efobaz FROM zakonczenia
      """)
        for row in query_result:
            yield BaseFormLabel(symbol=row[0])

    def cache_bfl(self):
        if 'bfls' not in self.__dict__:
            self.bfls = dict(
                (bfl.symbol, bfl) for bfl in BaseFormLabel.objects.all())

    def new_inflection_characteristics(self):
        for row in self.sqlite_cursor.execute(
                'SELECT DISTINCT charfl, pos FROM paradygmaty'):
            if row['charfl'] == '':
                bfl_symbol = '1' if row['pos'] in ('adj', 'adjcom') else ''
            else:
                bfl_symbol = BASIC_FORM_LABELS.get(row['charfl'], '')
            if row['pos'] in BASIC_FORM_LABELS_POS:
                bfl_symbol = BASIC_FORM_LABELS_POS[row['pos']]
            bfl = BaseFormLabel.objects.get(symbol=bfl_symbol)
            yield InflectionCharacteristic(
                symbol=row['charfl'], basic_form_label=bfl,
                part_of_speech=PartOfSpeech.objects.get(pk=row['pos']))

    def cache_ics(self):
        if 'ics' not in self.__dict__:
            self.ics = dict(
                ((ic.symbol, ic.part_of_speech.symbol), ic)
                    for ic in InflectionCharacteristic.objects.all()
            )

    def new_vocabularies(self):
        result = self.sqlite_cursor.execute("""
      SELECT slownik FROM leksemy
      UNION
      SELECT slownik_uz FROM slowniki_uzywajace
    """)
        for row in result:
            yield Vocabulary(id=row[0])

    def cache_vocabs(self):
        if 'vocabs' not in self.__dict__:
            self.vocabs = dict((v.id, v) for v in Vocabulary.objects.all())

    def new_qualifiers(self):
        default = Vocabulary.objects.get(id=DEFAULT_VOCAB)
        query_result = self.sqlite_cursor.execute("""
      SELECT okwal FROM odmieniasie
      UNION
      SELECT zkwal FROM zakonczenia
      UNION
      SELECT lkwal FROM leksemy
      """)
        added = set()
        for row in query_result:
            if row[0]:
                for qualifier_label in row[0].split('|'):
                    if qualifier_label not in added:
                        added.add(qualifier_label)
                        yield Qualifier(label=qualifier_label,
                            vocabulary=default)

    def cache_qualifiers(self):
        if 'qual' not in self.__dict__:
            self.qual = dict((q.label, q) for q in Qualifier.objects.all())

    def create_attributes(self):
        attr_values = {}
        for attr_name, ((poses, ic), values, import_info) in ATTRS.iteritems():
            la, created = LexemeAttribute.objects.get_or_create(
                name=attr_name, closed=True, required=True, takes_ic=bool(ic))
            for pos in PartOfSpeech.objects.filter(symbol__in=poses):
                la.parts_of_speech.add(pos) #add
                pos_ics = InflectionCharacteristic.objects.filter(
                    part_of_speech=pos, symbol=ic)
                for ic0 in pos_ics:
                    la.inflection_characteristics.add(ic0) #add
            values_cache = {}
            for val, display_val in values:
                values_cache[
                    val], created = LexemeAttributeValue.objects.get_or_create(
                    value=val, display_value=display_val, attribute=la)
            attr_values[attr_name] = values_cache
        return attr_values

    def new_lexemes(self):
        self.cache_qualifiers()
        if MINI_MODE:
            result = self.sqlite_cursor.execute(
                MINI_LEXEME_QUERY % '*', (MINI_LEXEME_COUNT,))
        else:
            result = self.sqlite_cursor.execute('SELECT * FROM leksemy')
        attr_values = self.create_attributes()
        date = datetime.datetime.now()
        cv_table = dict(
            (cv.label, cv) for cv in ClassificationValue.objects.all())
        lexemes = []
        lexeme_associations = []
        lexeme_qualifiers = []
        lexeme_cvs = []
        lexeme_attrs = []
        for row in result:
            slownik = row['slownik']
            status = 'conf' if slownik != 'zmiotki' else 'cand'
            cv = cv_table[row['pospolitosc']]
            lexemes.append(Lexeme(
                id=row['nr'],
                entry=row['haslo'],
                entry_suffix=row['haslosuf'] or '', # pozostałość historyczna
                gloss=row['glosa'] or '',
                note=row['nota'] or '',
                pronunciation=row['wymowa'] or '',
                valence=row['łączliwość'] or '',
                part_of_speech_id=row['pos'],
                source='SGJP',
                status=status,
                comment=row['komentarz'] or '',
                last_modified=date,
                owner_vocabulary_id=slownik,
            ))
            lexeme_associations.append(LexemeAssociation(
                lexeme_id=row['nr'], vocabulary_id=slownik))
            lexeme_cvs.append(
                LexemeCV(lexeme_id=row['nr'], classification_value=cv))
            if row['lkwal']:
                for qual in row['lkwal'].split('|'):
                    lexeme_qualifiers.append((row['nr'], self.qual[qual]))
            for attr_name, (
                (poses, ic), values, (column, f)) in ATTRS.iteritems():
                if row['pos'] in poses:
                    attr_value = attr_values[attr_name].get(f(row[column]))
                    if attr_value:
                        lexeme_attrs.append(
                            LexemeAV(lexeme_id=row['nr'],
                                attribute_value=attr_value))
                    elif row[column]:
                        print 'unknown value of %s: %s' % (
                            attr_name, row[column])
        return (lexemes, lexeme_associations, lexeme_cvs, lexeme_qualifiers,
        lexeme_attrs)

    def new_lexeme_associations(self):
        self.cache_vocabs()
        if MINI_MODE:
            result = self.sqlite_cursor.execute(
                'SELECT * FROM slowniki_uzywajace WHERE nr in (%s)'
                % (MINI_LEXEME_QUERY % 'nr'), [MINI_LEXEME_COUNT])
        else:
            result = self.sqlite_cursor.execute(
                'SELECT * FROM slowniki_uzywajace')
        for row in result:
            yield LexemeAssociation(
                vocabulary=self.vocabs[row['slownik_uz']], lexeme_id=row['nr'])

    CR_DESC = {
        ('verppas', 'appas'): u'quasi-imiesłów bierny',
        ('adjnie', 'appas'): u'„zanegowany” quasi-imiesłów bierny',
        ('adjnie', 'adv'): u'przysłówek „zanegowany”',
        ('adjnie', 'osc'): u'„zanegowana” nazwa cechy',
        ('nieadj', 'appas'): u'quasi-imiesłów bierny bez „nie-”',
        ('nieadj', 'adv'): u'przysłówek bez „nie-”',
        ('nieadj', 'osc'): u'nazwa cechy bez „nie-”',
    }

    def new_cross_reference_types(self):
        result = self.sqlite_cursor.execute(
            'select distinct l1.pos pos1, l2.pos pos2, t.* '
            'from odsylacze o join leksemy l1 on nrod=l1.nr '
            'join leksemy l2 on nrdo=l2.nr '
            'join typyodsylaczy t on t.typods=o.typods')
        for row in result:
            desc = self.CR_DESC.get((row['typods'], row['pos2']),
                row['naglowek'])
            yield CrossReferenceType(
                symbol=row['typods'],
                desc=desc,
                index=row['kolejnosc'],
                from_pos=PartOfSpeech.objects.get(symbol=row['pos1']),
                to_pos=PartOfSpeech.objects.get(symbol=row['pos2']),
            )

    def new_cross_references(self):
        if MINI_MODE:
            result = self.sqlite_cursor.execute(
                'SELECT o.*, l1.pos pos1, l2.pos pos2 FROM odsylacze o '
                'JOIN leksemy l1 on nrod=l1.nr '
                'JOIN leksemy l2 on nrdo=l2.nr '
                'WHERE nrod in (%(subq)s) and nrdo in (%(subq)s)'
                % {'subq': MINI_LEXEME_QUERY % 'nr'},
                [MINI_LEXEME_COUNT, MINI_LEXEME_COUNT])
        else:
            result = self.sqlite_cursor.execute(
                'SELECT o.*, l1.pos pos1, l2.pos pos2 FROM odsylacze o '
                'JOIN leksemy l1 on nrod=l1.nr '
                'JOIN leksemy l2 on nrdo=l2.nr'
            )
        cr_type_table = dict(
            ((crt.symbol, crt.from_pos.symbol, crt.to_pos.symbol), crt)
                for crt in CrossReferenceType.objects.all()
        )
        for row in result:
            # niekompletne odsyłacze zdarzają się dla 'asp'
            if row['nrod'] and row['nrdo']:
                cr_type = cr_type_table[
                    (row['typods'], row['pos1'], row['pos2'])]
                yield CrossReference(
                    from_lexeme_id=row['nrod'], to_lexeme_id=row['nrdo'],
                    type=cr_type)

    def copy_aspects(self):
        aspect_vals = dict(LexemeAttributeValue.objects.filter(
            attribute__name='aspekt').values_list('value', 'pk'))
        if MINI_MODE:
            result = self.sqlite_cursor.execute(
                "select distinct nrdo, aspekt "
                "from odsylacze o "
                "join leksemy l on o.nrod = l.nr "
                "where typods in ('verger', 'verppas', 'verpact') "
                "and nrod in (%(subq)s) and nrdo in (%(subq)s)"
                % {'subq': MINI_LEXEME_QUERY % 'nr'},
                [MINI_LEXEME_COUNT, MINI_LEXEME_COUNT])
        else:
            result = self.sqlite_cursor.execute(
                "select distinct nrdo, aspekt "
                "from odsylacze o "
                "join leksemy l on o.nrod = l.nr "
                "where typods in ('verger', 'verppas', 'verpact')")
        for row in result:
            yield LexemeAV(
                lexeme_id=row['nrdo'],
                attribute_value_id=aspect_vals[row['aspekt']])

    def import_pattern_types(self):
        self.cache_lc_pos()
        result = self.sqlite_cursor.execute(
            'SELECT DISTINCT typr, pos FROM paradygmaty')
        for row in result:
            lc = self.lc_pos[row['pos']]
            PatternType.objects.get_or_create(lexical_class=lc,
                symbol=row['typr'])
            # prowizorka z powodu pustej klasy 'skr'
        self.cache_lc()
        result = self.sqlite_cursor.execute(
            'SELECT DISTINCT typr, pos FROM wzory')
        for row in result:
            lc = self.lc[row['pos']]
            PatternType.objects.get_or_create(lexical_class=lc,
                symbol=row['typr'])

    def cache_ptypes(self):
        if 'ptypes' not in self.__dict__:
            self.ptypes = dict(
                ((pt.lexical_class.symbol, pt.symbol), pt)
                    for pt in PatternType.objects.all()
            )

    def new_patterns(self):
        self.cache_ptypes()
        for row in self.sqlite_cursor.execute('SELECT * FROM wzory'):
            yield Pattern(
                name=row['wzor'],
                type=self.ptypes[(row['pos'], row['typr'])],
                basic_form_ending=row['zakp'],
                example=row['przyklad'] or '',
                comment=row['wkomentarz'] or '',
                status='temp',
            )

    def cache_patterns(self):
        if 'paterns' not in self.__dict__:
            self.patterns = dict((p.name, p) for p in Pattern.objects.all())

    def new_endings(self):
        self.cache_qualifiers()
        self.cache_patterns()
        self.cache_bfl()
        endings = []
        ending_quals = []
        for row in self.sqlite_cursor.execute('SELECT * FROM zakonczenia'):
            if row['zak'] is not None:
                endings.append(Ending(
                    pattern=self.patterns[row['wzor']],
                    base_form_label=self.bfls[row['efobaz']],
                    string=row['zak'],
                    index=row['nrskl'],
                ))
                if row['zkwal']:
                    for qual in row['zkwal'].split('|'):
                        ending_quals.append((
                            self.patterns[row['wzor']],
                            self.bfls[row['efobaz']],
                            row['nrskl'],
                            self.qual[qual]))
        return endings, ending_quals

    def new_lexeme_inflection_patterns(self):
        self.cache_ics()
        self.cache_qualifiers()
        self.cache_patterns()
        if MINI_MODE:
            result = self.sqlite_cursor.execute(
                'SELECT o.*, l.pos FROM odmieniasie o '
                'JOIN leksemy l ON o.nr = l.nr '
                'WHERE l.nr IN (%s)' % (MINI_LEXEME_QUERY % 'nr'),
                (MINI_LEXEME_COUNT,))
        else:
            result = self.sqlite_cursor.execute(
                'SELECT * FROM odmieniasie o JOIN leksemy l ON o.nr = l.nr')
        lips = []
        lip_quals = []
        for row in result:
            lexeme_id = row['nr']
            lips.append(LexemeInflectionPattern(
                lexeme_id=lexeme_id,
                index=row['oskl'],
                pattern=self.patterns[row['wzor']],
                inflection_characteristic=self.ics[
                    (row['charfl'], row['pos'])],
                root=row['rdzen'],
            ))
            if row['okwal']:
                for qual in row['okwal'].split('|'):
                    lip_quals.append((lexeme_id, row['oskl'], self.qual[qual]))
        return lips, lip_quals

    def new_variants(self):
        result = self.sqlite_cursor.execute(
            'SELECT DISTINCT wariant FROM paradygmaty')
        for row in result:
            yield Variant(id=row['wariant'])

    def new_table_templates(self):
        self.cache_ics()
        self.cache_ptypes()
        self.cache_lc_pos()
        result = self.sqlite_cursor.execute(
            'SELECT DISTINCT wariant, pos, typr, charfl FROM paradygmaty')
        for row in result:
            yield TableTemplate(
                variant_id=row['wariant'],
                pattern_type=self.ptypes[
                    (self.lc_pos[row['pos']].symbol, row['typr'])],
                inflection_characteristic=self.ics[(row['charfl'], row['pos'])])

    # to zostaje, bo tabelki i tak się pozmieniają
    def import_tables(self):
        self.cache_bfl()
        tt_table = dict(
            ((
                tt.variant.id,
                tt.pattern_type.symbol,
                tt.inflection_characteristic.symbol,
                tt.inflection_characteristic.part_of_speech.symbol,
            ), tt) for tt in TableTemplate.objects.all()
        )
        for row in self.sqlite_cursor.execute('SELECT * FROM paradygmaty'):
            tt = tt_table[
                (unicode(row['wariant']), row['typr'], row['charfl'],
                row['pos'])]
            if not SQL_MODE:
                c = Cell(
                    table_template=tt,
                    base_form_label=BaseFormLabel.objects.get(
                        symbol=row['efobaz']),
                    tag=row['morf'],
                    prefix=row['pref'],
                    suffix=row['suf'],
                    index=row['kskl'],
                )
                c.save()
                if row['row']:
                    tc = TableCell(
                        cell=c,
                        row=row['row'],
                        col=row['col'],
                        rowspan=row['rowspan'],
                        colspan=row['colspan'],
                    )
                    tc.save()
            else:
                efobaz_id = self.bfls[row['efobaz']].id
                self.cursor.execute(
                    "INSERT INTO klatki (st_id, efobaz, tag, prefiks, sufiks, kind) "
                    "VALUES (%s, %s, %s, %s, %s, %s)",
                    [tt.pk, efobaz_id, row['morf'],
                        row['pref'], row['suf'], row['kskl']])
                if row['row']:
                    self.cursor.execute("select currval('klatki_id_seq')")
                    last_id = self.cursor.fetchone()[0]
                    self.cursor.execute(
                        "INSERT INTO komorki_tabel (k_id, row, col, rowspan, colspan) "
                        "VALUES (%s, %s, %s, %s, %s)", [last_id, row['row'],
                            row['col'],
                            row['rowspan'],
                            row['colspan']])

    def new_table_headers(self):
        for row in self.sqlite_cursor.execute('SELECT * FROM naglowkiwierszy'):
            tts = TableTemplate.objects.filter(
                variant__id=row['wariant'], pattern_type__symbol=row['typr'],
                inflection_characteristic__symbol=row['charfl'],
                inflection_characteristic__part_of_speech__symbol=row['pos'])
            if tts:
                tt = tts.get()
                yield TableHeader(
                    table_template=tt,
                    row=row['row'],
                    col=row['col'],
                    rowspan=row['rowspan'],
                    colspan=row['colspan'],
                    label=row['nagl'],
                    css_class=TRANSLATE_CLASS[row['styl']],
                )
            else:
                raise Exception('Brak szablonu dla nagłówka: %s', dict(row))

    def delete_and_import(self):
        models = (
            TableCell,
            Cell,
            TableTemplate,
            Variant,
            CrossReference,
            CrossReferenceType,
            LexemeAssociation,
            LexemeInflectionPattern,
            Lexeme,
            Ending,
            Pattern,
            PatternType,
            Qualifier,
            #Vocabulary,
            InflectionCharacteristic,
            BaseFormLabel,
            #PartOfSpeech,
            #LexicalClass,
            LexemeAttribute,
        )
        print 'deleting old data...'
        for model in models:
            model.objects.all().delete()

        print 'importing lexical classes...'
        bulk_create(LexicalClass, self.new_lexical_classes())
        print 'importing parts of speech...'
        bulk_create(PartOfSpeech, self.new_parts_of_speech())
        print 'importing base form labels...'
        bulk_create(BaseFormLabel, self.new_base_form_labels())
        print 'importing inflection characteristics...'
        bulk_create(InflectionCharacteristic,
            self.new_inflection_characteristics())
        print 'importing vocabularies...'
        for v in self.new_vocabularies():
            v.save()
        print 'importing qualifiers...'
        bulk_create(Qualifier, self.new_qualifiers())
        print 'importing pattern types...'
        self.import_pattern_types()
        print 'importing patterns...'
        bulk_create(Pattern, self.new_patterns())
        print 'importing endings...'
        endings, ending_quals = self.new_endings()
        bulk_create(Ending, endings)
        for pattern, bfl, index, q in ending_quals:
            Ending.objects.get(
                pattern=pattern, base_form_label=bfl,
                index=index).qualifiers.add(q)

        def import_lexemes():
            print 'importing lexemes...'
            (lexemes, lexeme_assoc, lexeme_cvs, lexeme_quals,
            lexeme_attrs) = self.new_lexemes()
            print 'creating...'
            bulk_create(Lexeme, lexemes)
            print 'associations...'
            bulk_create(LexemeAssociation, lexeme_assoc)
            print 'classifications...'
            bulk_create(LexemeCV, lexeme_cvs)
            print 'qualifiers...'
            for lexeme_id, q in lexeme_quals:
                q.lexeme_set.add(lexeme_id) #add
            print 'attributes...'
            bulk_create(LexemeAV, lexeme_attrs)

        import_lexemes()

        def import_lips():
            print 'importing lexeme inflection patterns...'
            lips, lip_quals = self.new_lexeme_inflection_patterns()
            print 'creating...'
            bulk_create(LexemeInflectionPattern, lips)
            print 'qualifiers...'
            for lexeme_id, index, q in lip_quals:
                LexemeInflectionPattern.objects.get(
                    lexeme_id=lexeme_id, index=index).qualifiers.add(q)

        import_lips()
        print 'importing lexeme associations...'
        bulk_create(LexemeAssociation, self.new_lexeme_associations())
        print 'importing cross-reference types...'
        bulk_create(CrossReferenceType,
            self.new_cross_reference_types())
        print 'importing cross-references...'
        bulk_create(CrossReference, self.new_cross_references())
        print 'copying aspect values to derived lexemes...'
        bulk_create(LexemeAV, self.copy_aspects())
        print 'importing variants...'
        bulk_create(Variant, self.new_variants())
        print 'importing table templates...'
        bulk_create(TableTemplate, self.new_table_templates())
        print 'importing tables...'
        self.import_tables()
        print 'importing table headers...'
        bulk_create(TableHeader, self.new_table_headers())
        print 'committing to database...'
        self.close()

    def single_import(self, model):
        method_name = METHOD_NAMES[model]
        if method_name.startswith('new'):
            bulk_create(model, self.__getattribute__(method_name)())
        elif method_name.startswith('import'):
            self.__getattribute__(method_name)()
        self.close()