Commit 81daac4e64b717f579d196fcf5cd4339870ab07a
1 parent
dd653a8f
import nazwisk
--HG-- branch : beta
Showing
1 changed file
with
75 additions
and
0 deletions
dictionary/management/commands/import_surnames.py
0 → 100644
1 | +# -*- coding: utf-8 -*- | |
2 | +import sys | |
3 | +from django.core.management.base import BaseCommand | |
4 | +from django.db import transaction | |
5 | + | |
6 | +from accounts.util import bot_history | |
7 | +from dictionary.models import Lexeme, Vocabulary, ClassificationValue, \ | |
8 | + Inflection, Gender, CrossReferenceType, CrossReference | |
9 | +from patterns.models import Pattern | |
10 | + | |
11 | +SGJP = Vocabulary.objects.get(id='SGJP') | |
12 | + | |
13 | +SURNAME = ClassificationValue.objects.get(label=u'nazwisko') | |
14 | + | |
15 | +FEMMAS = CrossReferenceType.objects.get(symbol='femmas') | |
16 | +MASFEM = CrossReferenceType.objects.get(symbol='masfem') | |
17 | + | |
18 | +F = Gender.objects.get(symbol='f') | |
19 | +M = Gender.objects.get(symbol='m1') | |
20 | + | |
21 | + | |
22 | +class Command(BaseCommand): | |
23 | + help = "Import male and female surnames from CSV." | |
24 | + args = "filename" | |
25 | + | |
26 | + @staticmethod | |
27 | + def new_lexeme(entry): | |
28 | + if Lexeme.objects.filter(entry=entry): | |
29 | + print >>sys.stderr, '%s already exists, skipping' % entry | |
30 | + return None | |
31 | + l = Lexeme( | |
32 | + entry=entry, part_of_speech_id='subst', | |
33 | + status=Lexeme.STATUS_DESCRIBED, | |
34 | + owner_vocabulary_id=SGJP, source='Bronk') | |
35 | + l.save() | |
36 | + SGJP.add_lexeme(l) | |
37 | + SURNAME.add_lexeme(l) | |
38 | + return l | |
39 | + | |
40 | + @transaction.atomic | |
41 | + def handle(self, file_path, **options): | |
42 | + bot_history() | |
43 | + surnames = [ | |
44 | + line.decode('utf-8').strip().split(',') | |
45 | + for line in open(file_path)] | |
46 | + ok = True | |
47 | + | |
48 | + for female, female_patterns, male, male_patterns in surnames: | |
49 | + pair = {} | |
50 | + surnames_data = ( | |
51 | + (female, female_patterns, F), | |
52 | + (male, male_patterns, M), | |
53 | + ) | |
54 | + for surname, patterns, gender in surnames_data: | |
55 | + l = self.new_lexeme(surname) | |
56 | + if l: | |
57 | + for i, pattern_name in enumerate(patterns.split('/')): | |
58 | + pattern = Pattern.objects.get(name=pattern_name) | |
59 | + inflection = Inflection( | |
60 | + lexeme=l, pattern=pattern, gender=gender, index=i) | |
61 | + inflection.root = inflection.get_root() | |
62 | + if not inflection.root: | |
63 | + print >>sys.stderr, 'Bad pattern for %s: %s' % ( | |
64 | + surname, pattern_name) | |
65 | + ok = False | |
66 | + else: | |
67 | + inflection.save() | |
68 | + pair[gender] = l | |
69 | + if pair[F] and pair[M]: | |
70 | + CrossReference.objects.create( | |
71 | + from_lexeme=pair[F], to_lexeme=pair[M], type=FEMMAS) | |
72 | + CrossReference.objects.create( | |
73 | + from_lexeme=pair[M], to_lexeme=pair[F], type=MASFEM) | |
74 | + if not ok: | |
75 | + raise Exception | |
... | ... |