import_surnames.py 2.68 KB
# -*- coding: utf-8 -*-
import sys
from django.core.management.base import BaseCommand
from django.db import transaction

from accounts.util import bot_history
from dictionary.models import Lexeme, Vocabulary, ClassificationValue, \
    Inflection, Gender, CrossReferenceType, CrossReference
from patterns.models import Pattern

SGJP = Vocabulary.objects.get(id='SGJP')

SURNAME = ClassificationValue.objects.get(label=u'nazwisko')

FEMMAS = CrossReferenceType.objects.get(symbol='femmas')
MASFEM = CrossReferenceType.objects.get(symbol='masfem')

F = Gender.objects.get(symbol='f')
M = Gender.objects.get(symbol='m1')


class Command(BaseCommand):
    help = "Import male and female surnames from CSV."
    args = "file_path"

    @staticmethod
    def new_lexeme(entry):
        if Lexeme.objects.filter(entry=entry):
            print >>sys.stderr, '%s already exists, skipping' % entry
            return None
        l = Lexeme(
            entry=entry, part_of_speech_id='subst',
            status=Lexeme.STATUS_DESCRIBED,
            owner_vocabulary_id=SGJP, source='Bronk')
        l.save()
        SGJP.add_lexeme(l)
        SURNAME.add_lexeme(l)
        return l

    @transaction.atomic
    def handle(self, file_path, **options):
        bot_history()
        surnames = [
            line.decode('utf-8').strip().split(',')
            for line in open(file_path)]
        ok = True

        for female, female_patterns, male, male_patterns in surnames:
            pair = {}
            surnames_data = (
                (female, female_patterns, F),
                (male, male_patterns, M),
            )
            for surname, patterns, gender in surnames_data:
                l = self.new_lexeme(surname)
                if l:
                    for i, pattern_name in enumerate(patterns.split('/')):
                        pattern = Pattern.objects.get(name=pattern_name)
                        inflection = Inflection(
                            lexeme=l, pattern=pattern, gender=gender, index=i)
                        inflection.root = inflection.get_root()
                        if not inflection.root:
                            print >>sys.stderr, 'Bad pattern for %s: %s' % (
                                surname, pattern_name)
                            ok = False
                        else:
                            inflection.save()
                pair[gender] = l
            if pair[F] and pair[M]:
                CrossReference.objects.create(
                    from_lexeme=pair[F], to_lexeme=pair[M], type=FEMMAS)
                CrossReference.objects.create(
                    from_lexeme=pair[M], to_lexeme=pair[F], type=MASFEM)
        if not ok:
            raise Exception