wsjp.py
4.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# -*- coding: utf-8 -*-
from django.db import connection
from dictionary.models import LexemeAttributeValue
def make_data(entries):
entry_placeholders = ', '.join('%s' for entry in entries)
refls = dict(LexemeAttributeValue.objects.filter(
attribute__name=u'zwrotność').values_list('pk', 'value'))
refl_ids = ', '.join(str(pk) for pk in refls)
refls_rev = dict(LexemeAttributeValue.objects.filter(
attribute__name=u'zwrotność').values_list('value', 'pk'))
nonrefl = [refls_rev[v] for v in (u'—', u'(się)', u'(sobie)')]
nonrefl_ids = ', '.join(str(pk) for pk in nonrefl)
empty_refl = refls_rev[u'—']
base_query = '''
select distinct pref||rdzen||zak||suf slowo, %(haslo_tab)s, hom,
l.pos,
case when l.pos in ('subst','osc','v') then ch.charfl else '' end
as rodzaj, podparad, row, col, rowspan, colspan, kskl
from
leksemy l
left outer join dictionary_lexemeav refl
on (l.id = refl.lexeme_id and refl.attribute_value_id in (%(refl)s))
join odmieniasie o on (l.id = o.l_id)
join charfle ch on (o.charfl = ch.id)
join wzory on (o.w_id = wzory.id)
join typywzorow tw on (wzory.typ = tw.id)
join paradygmatywsjp p on (o.charfl = p.charfl and wzory.typ = p.typr)
join zakonczenia z on (o.w_id = z.w_id and p.efobaz = z.efobaz)
where slownik in ('SGJP', 'WSJP') and
%(haslo)s in (%(entry_placeholders)s) and %(leks_clause)s and wariant=%%s
'''
nested_base = '''
select rdzen||zak||suf slowo, %(haslo)s, g.hom, l.pos,
case when l.pos = 'ppas' then ch.charfl else '' end as rodzaj,
podparad, row, col, rowspan, colspan, kskl
from
leksemy l
join odsylacze ods on l.id = l_id_od
join typyodsylaczy tods on ods.typods_id = tods.id
join leksemy g on l_id_do = g.id
left outer join dictionary_lexemeav refl
on (g.id = refl.lexeme_id and refl.attribute_value_id in (%(refl)s))
join odmieniasie o on l.id = o.l_id
join charfle ch on (o.charfl = ch.id)
join wzory on (o.w_id = wzory.id)
join paradygmatywsjp p on (o.charfl = p.charfl and wzory.typ = p.typr)
join zakonczenia z on (o.w_id = z.w_id and p.efobaz = z.efobaz)
where l.slownik in ('SGJP', 'WSJP') and
g.haslo in (%(entry_placeholders)s) and %(main_clause)s
'''
query_parts = [
(
base_query % {
'haslo_tab': 'haslo',
'haslo': 'haslo',
'entry_placeholders': entry_placeholders,
'leks_clause': '''l.pos not in ('skrl','skrw') and
(l.pos != 'v' or refl.attribute_value_id in (%s))'''
% nonrefl_ids,
'refl': refl_ids,
},
entries + ['1']
),
# czasowniki sięiczne:
(
base_query % {
'haslo_tab': u"haslo||' się'",
'haslo': 'haslo',
'entry_placeholders': entry_placeholders,
'leks_clause': '''(l.pos='v' and refl.attribute_value_id <> %s)''',
'refl': refl_ids,
},
entries + [empty_refl, 's']
),
# czasowniki zanegowane:
(
base_query % {
'haslo_tab': "'nie '||haslo",
'haslo': "'nie '||haslo",
'entry_placeholders': entry_placeholders,
'leks_clause': '''l.pos='v' and refl.attribute_value_id in (%s)'''
% nonrefl_ids,
'refl': refl_ids,
},
entries + ['n']
),
# czasowniki sięiczne zanegowane:
(
base_query % {
'haslo_tab': u"'nie '||haslo||' się'",
'haslo': "'nie '||haslo",
'entry_placeholders': entry_placeholders,
'leks_clause': '''(l.pos='v' and refl.attribute_value_id <> %s)''',
'refl': refl_ids,
},
entries + [empty_refl, 'ns']
),
# wymagające gniazdowania: adjcom, advcom i ppas
(
nested_base % {
'haslo': 'g.haslo',
'entry_placeholders': entry_placeholders,
'main_clause': '''typods in ('comadj','comadv','ppasver') and
l.pos in ('adjcom','advcom','ppas') and
(l.pos != 'ppas' or refl.attribute_value_id in (%s))'''
% nonrefl_ids,
'refl': refl_ids,
},
entries
),
# imiesłowy bierne czasowników sięicznych:
(
nested_base % {
'haslo': u"g.haslo||' się'",
'entry_placeholders': entry_placeholders,
'main_clause': '''(typods ='ppasver' and l.pos ='ppas' and
refl.attribute_value_id <> %s)''',
'refl': refl_ids,
},
entries + [empty_refl]
)
]
query = ' union all '.join(qp[0] for qp in query_parts) + '''
order by haslo, hom, rodzaj, podparad, row, col, kskl, slowo
'''
params = []
for qp in query_parts:
params += qp[1]
cursor = connection.cursor()
cursor.execute(query, params)
return list(cursor)