wsjp.py 5.16 KB
# -*- coding: utf-8 -*-
from django.db import connection
from dictionary.models import LexemeAttributeValue


def make_data(entries):
    entry_placeholders = ', '.join('%s' for entry in entries)
    refls = dict(LexemeAttributeValue.objects.filter(
        attribute__name=u'zwrotność').values_list('pk', 'value'))
    refl_ids = ', '.join(str(pk) for pk in refls)
    refls_rev = dict(LexemeAttributeValue.objects.filter(
        attribute__name=u'zwrotność').values_list('value', 'pk'))
    nonrefl = [refls_rev[v] for v in (u'—', u'(się)', u'(sobie)')]
    nonrefl_ids = ', '.join(str(pk) for pk in nonrefl)
    empty_refl = refls_rev[u'—']
    base_query = '''
    select distinct pref||rdzen||zak||suf slowo, %(haslo_tab)s, hom,
      l.pos,
      case when l.pos in ('subst','osc','v') then ch.charfl else '' end
      as rodzaj, podparad, row, col, rowspan, colspan, kskl
    from
      leksemy l
      left outer join dictionary_lexemeav refl
        on (l.id = refl.lexeme_id and refl.attribute_value_id in (%(refl)s))
      join odmieniasie o on (l.id = o.l_id)
      join charfle ch on (o.charfl = ch.id)
      join wzory on (o.w_id = wzory.id)
      join typywzorow tw on (wzory.typ = tw.id)
      join paradygmatywsjp p on (o.charfl = p.charfl and wzory.typ = p.typr)
      join zakonczenia z on (o.w_id = z.w_id and p.efobaz = z.efobaz)
    where slownik in ('SGJP', 'WSJP') and
      %(haslo)s in (%(entry_placeholders)s) and %(leks_clause)s and wariant=%%s
  '''
    nested_base = '''
    select rdzen||zak||suf slowo, %(haslo)s, g.hom, l.pos,
      case when l.pos = 'ppas' then ch.charfl else '' end as rodzaj,
      podparad, row, col, rowspan, colspan, kskl
    from
      leksemy l
      join odsylacze ods on l.id = l_id_od
      join typyodsylaczy tods on ods.typods_id = tods.id
      join leksemy g on l_id_do = g.id
      left outer join dictionary_lexemeav refl
        on (g.id = refl.lexeme_id and refl.attribute_value_id in (%(refl)s))
      join odmieniasie o on l.id = o.l_id
      join charfle ch on (o.charfl = ch.id)
      join wzory on (o.w_id = wzory.id)
      join paradygmatywsjp p on (o.charfl = p.charfl and wzory.typ = p.typr)
      join zakonczenia z on (o.w_id = z.w_id and p.efobaz = z.efobaz)
    where l.slownik in ('SGJP', 'WSJP') and
      g.haslo in (%(entry_placeholders)s) and %(main_clause)s
  '''
    query_parts = [
        (
            base_query % {
                'haslo_tab': 'haslo',
                'haslo': 'haslo',
                'entry_placeholders': entry_placeholders,
                'leks_clause': '''l.pos not in ('skrl','skrw') and
          (l.pos != 'v' or refl.attribute_value_id in (%s))'''
                               % nonrefl_ids,
                'refl': refl_ids,
            },
            entries + ['1']
        ),
        # czasowniki sięiczne:
        (
            base_query % {
                'haslo_tab': u"haslo||' się'",
                'haslo': 'haslo',
                'entry_placeholders': entry_placeholders,
                'leks_clause': '''(l.pos='v' and refl.attribute_value_id <> %s)''',
                'refl': refl_ids,
            },
            entries + [empty_refl, 's']
        ),
        # czasowniki zanegowane:
        (
            base_query % {
                'haslo_tab': "'nie '||haslo",
                'haslo': "'nie '||haslo",
                'entry_placeholders': entry_placeholders,
                'leks_clause': '''l.pos='v' and refl.attribute_value_id in (%s)'''
                               % nonrefl_ids,
                'refl': refl_ids,
            },
            entries + ['n']
        ),
        # czasowniki sięiczne zanegowane:
        (
            base_query % {
                'haslo_tab': u"'nie '||haslo||' się'",
                'haslo': "'nie '||haslo",
                'entry_placeholders': entry_placeholders,
                'leks_clause': '''(l.pos='v' and refl.attribute_value_id <> %s)''',
                'refl': refl_ids,
            },
            entries + [empty_refl, 'ns']
        ),
        # wymagające gniazdowania: adjcom, advcom i ppas
        (
            nested_base % {
                'haslo': 'g.haslo',
                'entry_placeholders': entry_placeholders,
                'main_clause': '''typods in ('comadj','comadv','ppasver') and
          l.pos in ('adjcom','advcom','ppas') and
          (l.pos != 'ppas' or refl.attribute_value_id in (%s))'''
                               % nonrefl_ids,
                'refl': refl_ids,
            },
            entries
        ),
        # imiesłowy bierne czasowników sięicznych:
        (
            nested_base % {
                'haslo': u"g.haslo||' się'",
                'entry_placeholders': entry_placeholders,
                'main_clause': '''(typods ='ppasver' and l.pos ='ppas' and
          refl.attribute_value_id <> %s)''',
                'refl': refl_ids,
            },
            entries + [empty_refl]
        )
    ]
    query = ' union all '.join(qp[0] for qp in query_parts) + '''
    order by haslo, hom, rodzaj, podparad, row, col, kskl, slowo
  '''
    params = []
    for qp in query_parts:
        params += qp[1]
    cursor = connection.cursor()
    cursor.execute(query, params)
    return list(cursor)