auto_derivatives.py
5.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# -*- coding: utf-8 -*-
from django.db.models import Max
from dictionary.models import Ending, Lexeme, LexemeInflectionPattern, \
Pattern, Gender, PartOfSpeech, LexemeAttributeValue, CrossReferenceType, \
CrossReference
P07 = Pattern.objects.get(name='P07')
P28 = Pattern.objects.get(name='P28')
P12 = Pattern.objects.get(name='P12')
P19 = Pattern.objects.get(name='P19')
P20 = Pattern.objects.get(name='P20')
P0196 = Pattern.objects.get(name='0196')
P0195 = Pattern.objects.get(name='0195')
n2 = Gender.objects.get(symbol='n2')
NO_POPRZ = LexemeAttributeValue.objects.get(
value=u'nieobecna', attribute__name=u'forma poprz.')
NO_ZLOZ = LexemeAttributeValue.objects.get(
value=u'nieobecna', attribute__name=u'forma złoż.')
CR_TYPES = {
'pact': ('verpact', 'pactver'),
'ppas': ('verppas', 'ppasver'),
'appas': ('verppas', 'ppasver'),
'ger': ('verger', 'gerver'),
}
def ppas_data(lexeme, pos='ppas'):
lips = list(lexeme.lexemeinflectionpattern_set.all())
for lip in lips:
pattern = lip.pattern
endings10 = Ending.objects.filter(
pattern=pattern, base_form_label__symbol='10')
endings12 = Ending.objects.filter(
pattern=pattern, base_form_label__symbol='12')
for ending in endings10:
for ending12 in endings12:
yield {
'pos': pos,
'entry': lip.root + ending.string + 'y',
'pl': lip.root + ending12.string,
'index': lip.index,
}
def pact_data(lexeme):
lips = list(lexeme.lexemeinflectionpattern_set.all())
for lip in lips:
pattern = lip.pattern
endings3 = Ending.objects.filter(
pattern=pattern, base_form_label__symbol='3')
for ending in endings3:
yield {
'pos': 'pact',
'entry': lip.root + ending.string + 'cy',
'index': lip.index,
}
def ger_data(lexeme):
lips = list(lexeme.lexemeinflectionpattern_set.all())
for lip in lips:
pattern = lip.pattern
endings11 = Ending.objects.filter(
pattern=pattern, base_form_label__symbol='11')
for ending in endings11:
yield {
'pos': 'ger',
'entry': lip.root + ending.string + 'ie',
'index': lip.index,
}
def lexeme_derivatives(lexeme):
if lexeme.part_of_speech.symbol == 'v':
proper = lexeme.lexemeattributevalue_set.filter(
attribute__name=u'właściwy', value__in=('', '(Q)'))
if proper:
trans = lexeme.lexemeattributevalue_set.filter(
attribute__name=u'przechodniość', value='T')
q_trans = lexeme.lexemeattributevalue_set.filter(
attribute__name=u'przechodniość', value='qT')
imperf = lexeme.lexemeattributevalue_set.filter(
attribute__name=u'aspekt').exclude(value='dk')
if trans or q_trans:
pos = 'ppas' if trans else 'appas'
for data in ppas_data(lexeme, pos):
yield data
if imperf:
for data in pact_data(lexeme):
yield data
for data in ger_data(lexeme):
yield data
def create_derivative(lexeme, part_of_speech, entry, index, pl=None):
next_id = Lexeme.all_objects.aggregate(Max('id'))['id__max'] + 1
der = Lexeme(
id=next_id, entry=entry, part_of_speech_id=part_of_speech,
status=lexeme.status, owner_vocabulary_id=lexeme.owner_vocabulary_id,
specialist=lexeme.specialist,
borrowing_source_id=lexeme.borrowing_source_id)
der.fix_homonym_number()
der.save()
der.refresh_data()
lexeme.owner_vocabulary.add_lexeme(der)
lip = LexemeInflectionPattern(lexeme=der, index=1)
if part_of_speech in ('ppas', 'appas'):
# -ty/-ci
if entry.endswith('ty'):
lip.pattern = P28
# -iony/-eni
elif entry.endswith('iony') and not pl.endswith('ieni'):
lip.pattern = P20
# -ony/-eni
elif entry.endswith('eni'):
lip.pattern = P19
# -ny/-ni
else:
lip.pattern = P12
elif part_of_speech == 'pact':
lip.pattern = P07
elif part_of_speech == 'ger':
lip.gender = n2
if entry.endswith('cie'):
lip.pattern = P0195
else: # -nie
lip.pattern = P0196
lip.root = lip.get_root()
lip.save()
for attr, attr_val in lexeme.attributes_values():
if attr_val and attr.parts_of_speech.filter(symbol=part_of_speech):
attr_val.add_lexeme(der)
if part_of_speech in ('ppas', 'appas', 'pact'):
NO_POPRZ.add_lexeme(der)
NO_ZLOZ.add_lexeme(der)
for q in lexeme.qualifiers.all():
der.qualifiers.add(q)
# może kopiować kwalifikatory odmieniasia do leksemu?
orig_lip = LexemeInflectionPattern.objects.get(lexeme=lexeme, index=index)
for q in orig_lip.qualifiers.all():
lip.qualifiers.add(q) # der zamiast lip?
lc = lexeme.part_of_speech.lexical_class_id
if lc == 'v':
cr_to, cr_from = CR_TYPES[part_of_speech]
cr_type = CrossReferenceType.objects.get(
symbol=cr_to, from_pos_id='v', to_pos_id=part_of_speech)
CrossReference.objects.create(
from_lexeme=lexeme, to_lexeme=der, type=cr_type)
cr_type = CrossReferenceType.objects.get(
symbol=cr_from, from_pos_id=part_of_speech, to_pos_id='v')
CrossReference.objects.create(
from_lexeme=der, to_lexeme=lexeme, type=cr_type)
return der