wsjp.py
3.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# -*- coding: utf-8 -*-
from django.db import connection
def make_data(entries):
entry_placeholders = ', '.join('%s' for entry in entries)
base_query = '''
select distinct pref||rdzen||zak||suf slowo, %(haslo_tab)s, hom,
l.pos,
case when l.pos in ('subst','osc','v') then ch.charfl else '' end
as rodzaj, podparad, row, col, rowspan, colspan, kskl
from
leksemy l
join odmieniasie o on (l.id = o.l_id)
join charfle ch on (o.charfl = ch.id)
join wzory on (o.w_id = wzory.id)
join typywzorow tw on (wzory.typ = tw.id)
join paradygmatywsjp p on (o.charfl = p.charfl and wzory.typ = p.typr)
join zakonczenia z on (o.w_id = z.w_id and p.efobaz = z.efobaz)
where slownik in ('SGJP', 'WSJP') and
%(haslo)s in (%(entry_placeholders)s) and %(leks_clause)s and wariant=%%s
'''
nested_base = '''
select rdzen||zak||suf slowo, %(haslo)s, g.hom, l.pos,
case when l.pos = 'ppas' then ch.charfl else '' end as rodzaj,
podparad, row, col, rowspan, colspan, kskl
from
leksemy l
join odsylacze ods on l.id = l_id_od
join typyodsylaczy tods on ods.typods_id = tods.id
join leksemy g on l_id_do = g.id
join odmieniasie o on l.id = o.l_id
join charfle ch on (o.charfl = ch.id)
join wzory on (o.w_id = wzory.id)
join paradygmatywsjp p on (o.charfl = p.charfl and wzory.typ = p.typr)
join zakonczenia z on (o.w_id = z.w_id and p.efobaz = z.efobaz)
where l.slownik in ('SGJP', 'WSJP') and
g.haslo in (%(entry_placeholders)s) and %(main_clause)s
'''
query_parts = [
(
base_query % {
'haslo_tab': 'haslo',
'haslo': 'haslo',
'entry_placeholders': entry_placeholders,
'leks_clause': '''l.pos not in ('skrl','skrw') and
(l.pos != 'v' or haslosuf = '' or haslosuf like %s)''',
},
entries + ['%(%', '1']
),
# czasowniki sięiczne:
(
base_query % {
'haslo_tab': u"haslo||' się'",
'haslo': 'haslo',
'entry_placeholders': entry_placeholders,
'leks_clause': '''(l.pos='v' and haslosuf <> '')''',
},
entries + ['s']
),
# czasowniki zanegowane:
(
base_query % {
'haslo_tab': "'nie '||haslo",
'haslo': "'nie '||haslo",
'entry_placeholders': entry_placeholders,
'leks_clause': '''l.pos='v' and (haslosuf = '' or haslosuf like %s)''',
},
entries + ['%(%', 'n']
),
# czasowniki sięiczne zanegowane:
(
base_query % {
'haslo_tab': u"'nie '||haslo||' się'",
'haslo': "'nie '||haslo",
'entry_placeholders': entry_placeholders,
'leks_clause': '''(l.pos='v' and haslosuf <> '')''',
},
entries + ['ns']
),
# wymagające gniazdowania: adjcom, advcom i ppas
(
nested_base % {
'haslo': 'g.haslo',
'entry_placeholders': entry_placeholders,
'main_clause': '''typods in ('comadj','comadv','ppasver') and
l.pos in ('adjcom','advcom','ppas') and
(l.pos != 'ppas' or g.haslosuf = '' or g.haslosuf like %s)'''
},
entries + ['%(%']
),
# imiesłowy bierne czasowników sięicznych:
(
nested_base % {
'haslo': u"g.haslo||' się'",
'entry_placeholders': entry_placeholders,
'main_clause': '''(typods ='ppasver' and l.pos ='ppas' and
g.haslosuf <> '')'''
},
entries
)
]
query = ' union all '.join(qp[0] for qp in query_parts) + '''
order by haslo, hom, rodzaj, podparad, row, col, kskl, slowo
'''
params = []
for qp in query_parts:
params += qp[1]
cursor = connection.cursor()
cursor.execute(query, params)
return list(cursor)