Commit 81daac4e64b717f579d196fcf5cd4339870ab07a

Authored by janek@kublik
1 parent dd653a8f

import nazwisk

--HG--
branch : beta
dictionary/management/commands/import_surnames.py 0 → 100644
  1 +# -*- coding: utf-8 -*-
  2 +import sys
  3 +from django.core.management.base import BaseCommand
  4 +from django.db import transaction
  5 +
  6 +from accounts.util import bot_history
  7 +from dictionary.models import Lexeme, Vocabulary, ClassificationValue, \
  8 + Inflection, Gender, CrossReferenceType, CrossReference
  9 +from patterns.models import Pattern
  10 +
  11 +SGJP = Vocabulary.objects.get(id='SGJP')
  12 +
  13 +SURNAME = ClassificationValue.objects.get(label=u'nazwisko')
  14 +
  15 +FEMMAS = CrossReferenceType.objects.get(symbol='femmas')
  16 +MASFEM = CrossReferenceType.objects.get(symbol='masfem')
  17 +
  18 +F = Gender.objects.get(symbol='f')
  19 +M = Gender.objects.get(symbol='m1')
  20 +
  21 +
  22 +class Command(BaseCommand):
  23 + help = "Import male and female surnames from CSV."
  24 + args = "filename"
  25 +
  26 + @staticmethod
  27 + def new_lexeme(entry):
  28 + if Lexeme.objects.filter(entry=entry):
  29 + print >>sys.stderr, '%s already exists, skipping' % entry
  30 + return None
  31 + l = Lexeme(
  32 + entry=entry, part_of_speech_id='subst',
  33 + status=Lexeme.STATUS_DESCRIBED,
  34 + owner_vocabulary_id=SGJP, source='Bronk')
  35 + l.save()
  36 + SGJP.add_lexeme(l)
  37 + SURNAME.add_lexeme(l)
  38 + return l
  39 +
  40 + @transaction.atomic
  41 + def handle(self, file_path, **options):
  42 + bot_history()
  43 + surnames = [
  44 + line.decode('utf-8').strip().split(',')
  45 + for line in open(file_path)]
  46 + ok = True
  47 +
  48 + for female, female_patterns, male, male_patterns in surnames:
  49 + pair = {}
  50 + surnames_data = (
  51 + (female, female_patterns, F),
  52 + (male, male_patterns, M),
  53 + )
  54 + for surname, patterns, gender in surnames_data:
  55 + l = self.new_lexeme(surname)
  56 + if l:
  57 + for i, pattern_name in enumerate(patterns.split('/')):
  58 + pattern = Pattern.objects.get(name=pattern_name)
  59 + inflection = Inflection(
  60 + lexeme=l, pattern=pattern, gender=gender, index=i)
  61 + inflection.root = inflection.get_root()
  62 + if not inflection.root:
  63 + print >>sys.stderr, 'Bad pattern for %s: %s' % (
  64 + surname, pattern_name)
  65 + ok = False
  66 + else:
  67 + inflection.save()
  68 + pair[gender] = l
  69 + if pair[F] and pair[M]:
  70 + CrossReference.objects.create(
  71 + from_lexeme=pair[F], to_lexeme=pair[M], type=FEMMAS)
  72 + CrossReference.objects.create(
  73 + from_lexeme=pair[M], to_lexeme=pair[F], type=MASFEM)
  74 + if not ok:
  75 + raise Exception
... ...