diff --git a/dictionary/management/commands/import_surnames.py b/dictionary/management/commands/import_surnames.py new file mode 100644 index 0000000..1e317ab --- /dev/null +++ b/dictionary/management/commands/import_surnames.py @@ -0,0 +1,75 @@ +# -*- coding: utf-8 -*- +import sys +from django.core.management.base import BaseCommand +from django.db import transaction + +from accounts.util import bot_history +from dictionary.models import Lexeme, Vocabulary, ClassificationValue, \ + Inflection, Gender, CrossReferenceType, CrossReference +from patterns.models import Pattern + +SGJP = Vocabulary.objects.get(id='SGJP') + +SURNAME = ClassificationValue.objects.get(label=u'nazwisko') + +FEMMAS = CrossReferenceType.objects.get(symbol='femmas') +MASFEM = CrossReferenceType.objects.get(symbol='masfem') + +F = Gender.objects.get(symbol='f') +M = Gender.objects.get(symbol='m1') + + +class Command(BaseCommand): + help = "Import male and female surnames from CSV." + args = "filename" + + @staticmethod + def new_lexeme(entry): + if Lexeme.objects.filter(entry=entry): + print >>sys.stderr, '%s already exists, skipping' % entry + return None + l = Lexeme( + entry=entry, part_of_speech_id='subst', + status=Lexeme.STATUS_DESCRIBED, + owner_vocabulary_id=SGJP, source='Bronk') + l.save() + SGJP.add_lexeme(l) + SURNAME.add_lexeme(l) + return l + + @transaction.atomic + def handle(self, file_path, **options): + bot_history() + surnames = [ + line.decode('utf-8').strip().split(',') + for line in open(file_path)] + ok = True + + for female, female_patterns, male, male_patterns in surnames: + pair = {} + surnames_data = ( + (female, female_patterns, F), + (male, male_patterns, M), + ) + for surname, patterns, gender in surnames_data: + l = self.new_lexeme(surname) + if l: + for i, pattern_name in enumerate(patterns.split('/')): + pattern = Pattern.objects.get(name=pattern_name) + inflection = Inflection( + lexeme=l, pattern=pattern, gender=gender, index=i) + inflection.root = inflection.get_root() + if not inflection.root: + print >>sys.stderr, 'Bad pattern for %s: %s' % ( + surname, pattern_name) + ok = False + else: + inflection.save() + pair[gender] = l + if pair[F] and pair[M]: + CrossReference.objects.create( + from_lexeme=pair[F], to_lexeme=pair[M], type=FEMMAS) + CrossReference.objects.create( + from_lexeme=pair[M], to_lexeme=pair[F], type=MASFEM) + if not ok: + raise Exception