import_surnames.py
2.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# -*- coding: utf-8 -*-
import sys
from django.core.management.base import BaseCommand
from django.db import transaction
from accounts.util import bot_history
from dictionary.models import Lexeme, Vocabulary, ClassificationValue, \
Inflection, Gender, CrossReferenceType, CrossReference
from patterns.models import Pattern
SGJP = Vocabulary.objects.get(id='SGJP')
SURNAME = ClassificationValue.objects.get(label=u'nazwisko')
FEMMAS = CrossReferenceType.objects.get(symbol='femmas')
MASFEM = CrossReferenceType.objects.get(symbol='masfem')
F = Gender.objects.get(symbol='f')
M = Gender.objects.get(symbol='m1')
class Command(BaseCommand):
help = "Import male and female surnames from CSV."
args = "file_path"
@staticmethod
def new_lexeme(entry):
if Lexeme.objects.filter(entry=entry):
print >>sys.stderr, '%s already exists, skipping' % entry
return None
l = Lexeme(
entry=entry, part_of_speech_id='subst',
status=Lexeme.STATUS_DESCRIBED,
owner_vocabulary_id=SGJP, source='Bronk')
l.save()
SGJP.add_lexeme(l)
SURNAME.add_lexeme(l)
return l
@transaction.atomic
def handle(self, file_path, **options):
bot_history()
surnames = [
line.decode('utf-8').strip().split(',')
for line in open(file_path)]
ok = True
for female, female_patterns, male, male_patterns in surnames:
pair = {}
surnames_data = (
(female, female_patterns, F),
(male, male_patterns, M),
)
for surname, patterns, gender in surnames_data:
l = self.new_lexeme(surname)
if l:
for i, pattern_name in enumerate(patterns.split('/')):
pattern = Pattern.objects.get(name=pattern_name)
inflection = Inflection(
lexeme=l, pattern=pattern, gender=gender, index=i)
inflection.root = inflection.get_root()
if not inflection.root:
print >>sys.stderr, 'Bad pattern for %s: %s' % (
surname, pattern_name)
ok = False
else:
inflection.save()
pair[gender] = l
if pair[F] and pair[M]:
CrossReference.objects.create(
from_lexeme=pair[F], to_lexeme=pair[M], type=FEMMAS)
CrossReference.objects.create(
from_lexeme=pair[M], to_lexeme=pair[F], type=MASFEM)
if not ok:
raise Exception