wsjp.py 4.66 KB
# -*- coding: utf-8 -*-
from django.db import connection
from dictionary.models import LexemeAttributeValue


def make_data(entries):
  entry_placeholders = ', '.join('%s' for entry in entries)
  refls = dict(LexemeAttributeValue.objects.filter(
    attribute__name=u'zwrotność').values_list('pk', 'value'))
  refl_ids = ', '.join(str(pk) for pk in refls)
  refls_rev = dict(LexemeAttributeValue.objects.filter(
    attribute__name=u'zwrotność').values_list('value', 'pk'))
  nonrefl = [refls_rev[v] for v in (u'—', u'(się)', u'(sobie)')]
  nonrefl_ids = ', '.join(str(pk) for pk in nonrefl)
  empty_refl = refls_rev[u'—']
  base_query = '''
    select distinct pref||rdzen||zak||suf slowo, %(haslo_tab)s, hom,
      l.pos,
      case when l.pos in ('subst','osc','v') then ch.charfl else '' end
      as rodzaj, podparad, row, col, rowspan, colspan, kskl
    from
      leksemy l
      left outer join dictionary_lexemeav refl
        on (l.id = refl.lexeme_id and refl.attribute_value_id in (%(refl)s))
      join odmieniasie o on (l.id = o.l_id)
      join charfle ch on (o.charfl = ch.id)
      join wzory on (o.w_id = wzory.id)
      join typywzorow tw on (wzory.typ = tw.id)
      join paradygmatywsjp p on (o.charfl = p.charfl and wzory.typ = p.typr)
      join zakonczenia z on (o.w_id = z.w_id and p.efobaz = z.efobaz)
    where slownik in ('SGJP', 'WSJP') and
      %(haslo)s in (%(entry_placeholders)s) and %(leks_clause)s and wariant=%%s
  '''
  nested_base = '''
    select rdzen||zak||suf slowo, %(haslo)s, g.hom, l.pos,
      case when l.pos = 'ppas' then ch.charfl else '' end as rodzaj,
      podparad, row, col, rowspan, colspan, kskl
    from
      leksemy l
      join odsylacze ods on l.id = l_id_od
      join typyodsylaczy tods on ods.typods_id = tods.id
      join leksemy g on l_id_do = g.id
      left outer join dictionary_lexemeav refl
        on (g.id = refl.lexeme_id and refl.attribute_value_id in (%(refl)s))
      join odmieniasie o on l.id = o.l_id
      join charfle ch on (o.charfl = ch.id)
      join wzory on (o.w_id = wzory.id)
      join paradygmatywsjp p on (o.charfl = p.charfl and wzory.typ = p.typr)
      join zakonczenia z on (o.w_id = z.w_id and p.efobaz = z.efobaz)
    where l.slownik in ('SGJP', 'WSJP') and
      g.haslo in (%(entry_placeholders)s) and %(main_clause)s
  '''
  query_parts = [
    (
      base_query % {
        'haslo_tab': 'haslo',
        'haslo': 'haslo',
        'entry_placeholders': entry_placeholders,
        'leks_clause': '''l.pos not in ('skrl','skrw') and
          (l.pos != 'v' or refl.attribute_value_id in (%s))'''
          % nonrefl_ids,
        'refl': refl_ids,
      },
      entries + ['1']
    ),
    # czasowniki sięiczne:
    (
      base_query % {
        'haslo_tab': u"haslo||' się'",
        'haslo': 'haslo',
        'entry_placeholders': entry_placeholders,
        'leks_clause': '''(l.pos='v' and refl.attribute_value_id <> %s)''',
        'refl': refl_ids,
      },
      entries + [empty_refl, 's']
    ),
    # czasowniki zanegowane:
    (
      base_query % {
        'haslo_tab': "'nie '||haslo",
        'haslo': "'nie '||haslo",
        'entry_placeholders': entry_placeholders,
        'leks_clause': '''l.pos='v' and refl.attribute_value_id in (%s)'''
          % nonrefl_ids,
        'refl': refl_ids,
      },
      entries + ['n']
    ),
    # czasowniki sięiczne zanegowane:
    (
      base_query % {
        'haslo_tab': u"'nie '||haslo||' się'",
        'haslo': "'nie '||haslo",
        'entry_placeholders': entry_placeholders,
        'leks_clause': '''(l.pos='v' and refl.attribute_value_id <> %s)''',
        'refl': refl_ids,
      },
      entries + [empty_refl, 'ns']
    ),
    # wymagające gniazdowania: adjcom, advcom i ppas
    (
      nested_base % {
        'haslo': 'g.haslo',
        'entry_placeholders': entry_placeholders,
        'main_clause': '''typods in ('comadj','comadv','ppasver') and
          l.pos in ('adjcom','advcom','ppas') and
          (l.pos != 'ppas' or refl.attribute_value_id in (%s))'''
          % nonrefl_ids,
        'refl': refl_ids,
      },
      entries
    ),
    # imiesłowy bierne czasowników sięicznych:
    (
      nested_base % {
        'haslo': u"g.haslo||' się'",
        'entry_placeholders': entry_placeholders,
        'main_clause': '''(typods ='ppasver' and l.pos ='ppas' and
          refl.attribute_value_id <> %s)''',
        'refl': refl_ids,
      },
      entries + [empty_refl]
    )
  ]
  query = ' union all '.join(qp[0] for qp in query_parts) + '''
    order by haslo, hom, rodzaj, podparad, row, col, kskl, slowo
  '''
  params = []
  for qp in query_parts:
    params += qp[1]
  cursor = connection.cursor()
  cursor.execute(query, params)
  return list(cursor)