Commit 81daac4e64b717f579d196fcf5cd4339870ab07a
1 parent
dd653a8f
import nazwisk
--HG-- branch : beta
Showing
1 changed file
with
75 additions
and
0 deletions
dictionary/management/commands/import_surnames.py
0 → 100644
1 | +# -*- coding: utf-8 -*- | ||
2 | +import sys | ||
3 | +from django.core.management.base import BaseCommand | ||
4 | +from django.db import transaction | ||
5 | + | ||
6 | +from accounts.util import bot_history | ||
7 | +from dictionary.models import Lexeme, Vocabulary, ClassificationValue, \ | ||
8 | + Inflection, Gender, CrossReferenceType, CrossReference | ||
9 | +from patterns.models import Pattern | ||
10 | + | ||
11 | +SGJP = Vocabulary.objects.get(id='SGJP') | ||
12 | + | ||
13 | +SURNAME = ClassificationValue.objects.get(label=u'nazwisko') | ||
14 | + | ||
15 | +FEMMAS = CrossReferenceType.objects.get(symbol='femmas') | ||
16 | +MASFEM = CrossReferenceType.objects.get(symbol='masfem') | ||
17 | + | ||
18 | +F = Gender.objects.get(symbol='f') | ||
19 | +M = Gender.objects.get(symbol='m1') | ||
20 | + | ||
21 | + | ||
22 | +class Command(BaseCommand): | ||
23 | + help = "Import male and female surnames from CSV." | ||
24 | + args = "filename" | ||
25 | + | ||
26 | + @staticmethod | ||
27 | + def new_lexeme(entry): | ||
28 | + if Lexeme.objects.filter(entry=entry): | ||
29 | + print >>sys.stderr, '%s already exists, skipping' % entry | ||
30 | + return None | ||
31 | + l = Lexeme( | ||
32 | + entry=entry, part_of_speech_id='subst', | ||
33 | + status=Lexeme.STATUS_DESCRIBED, | ||
34 | + owner_vocabulary_id=SGJP, source='Bronk') | ||
35 | + l.save() | ||
36 | + SGJP.add_lexeme(l) | ||
37 | + SURNAME.add_lexeme(l) | ||
38 | + return l | ||
39 | + | ||
40 | + @transaction.atomic | ||
41 | + def handle(self, file_path, **options): | ||
42 | + bot_history() | ||
43 | + surnames = [ | ||
44 | + line.decode('utf-8').strip().split(',') | ||
45 | + for line in open(file_path)] | ||
46 | + ok = True | ||
47 | + | ||
48 | + for female, female_patterns, male, male_patterns in surnames: | ||
49 | + pair = {} | ||
50 | + surnames_data = ( | ||
51 | + (female, female_patterns, F), | ||
52 | + (male, male_patterns, M), | ||
53 | + ) | ||
54 | + for surname, patterns, gender in surnames_data: | ||
55 | + l = self.new_lexeme(surname) | ||
56 | + if l: | ||
57 | + for i, pattern_name in enumerate(patterns.split('/')): | ||
58 | + pattern = Pattern.objects.get(name=pattern_name) | ||
59 | + inflection = Inflection( | ||
60 | + lexeme=l, pattern=pattern, gender=gender, index=i) | ||
61 | + inflection.root = inflection.get_root() | ||
62 | + if not inflection.root: | ||
63 | + print >>sys.stderr, 'Bad pattern for %s: %s' % ( | ||
64 | + surname, pattern_name) | ||
65 | + ok = False | ||
66 | + else: | ||
67 | + inflection.save() | ||
68 | + pair[gender] = l | ||
69 | + if pair[F] and pair[M]: | ||
70 | + CrossReference.objects.create( | ||
71 | + from_lexeme=pair[F], to_lexeme=pair[M], type=FEMMAS) | ||
72 | + CrossReference.objects.create( | ||
73 | + from_lexeme=pair[M], to_lexeme=pair[F], type=MASFEM) | ||
74 | + if not ok: | ||
75 | + raise Exception |