wsjp.py 8.34 KB
# -*- coding: utf-8 -*-
from django.db import connection
from dictionary.models import LexemeAttributeValue, Inflection, LexemeAttribute


BASE_QUERY = '''
    select distinct pref||rdzen||zak||suf slowo, %(haslo_tab)s, hom,
      l.pos,
      case when l.pos in ('subst','osc','v') then p.charfl else '' end
      as rodzaj, podparad, %(row)s, col, rowspan, colspan, kskl
    from
      odmieniasie o
      join leksemy l on (o.l_id = l.id)
      left outer join dictionary_lexemeav refl
        on (l.id = refl.lexeme_id and refl.attribute_value_id in (%(refl)s))
      join wzory on (o.w_id = wzory.id)
      join typywzorow tw on (wzory.typ = tw.id)
      join paradygmatywsjp p
        on (%%s = p.charfl and wzory.typ = p.typr and l.pos = p.pos)
      join zakonczenia z on (o.w_id = z.w_id and p.efobaz = z.efobaz)
      join leksemy_w_slownikach lws on (l.id = lws.l_id)
    where lws.slownik in ('SGJP', 'WSJP') and
      o.id=%%s and %(leks_clause)s and wariant=%%s
'''

NESTED_BASE = '''
    select distinct rdzen||zak||suf slowo, %(haslo)s, g.hom, l.pos,
      case when l.pos in ('ppas', 'appas') then p.charfl else '' end as rodzaj,
      podparad, row, col, rowspan, colspan, kskl
    from
      leksemy l
      join odsylacze ods on l.id = l_id_od
      join typyodsylaczy tods on ods.typods_id = tods.id
      join leksemy g on l_id_do = g.id
      left outer join dictionary_lexemeav refl
        on (g.id = refl.lexeme_id and refl.attribute_value_id in (%(refl)s))
      join odmieniasie o
        on case when l.pos in ('ppas', 'appas') then g.id else l.id end = o.l_id
      join wzory on (o.w_id = wzory.id)
      join paradygmatywsjp p
        on (%%s = p.charfl and wzory.typ = p.typr and l.pos = p.pos)
      join zakonczenia z on (o.w_id = z.w_id and p.efobaz = z.efobaz)
      join leksemy_w_slownikach lws on (l.id = lws.l_id)
    where lws.slownik in ('SGJP', 'WSJP') and
      g.id=%%s and %(main_clause)s
'''

ZLOZ = LexemeAttribute.objects.get(name=u'forma złoż.')
POPRZ = LexemeAttribute.objects.get(name=u'forma poprz.')
ASPEKT = LexemeAttribute.objects.get(name=u'aspekt')
WLASC = LexemeAttribute.objects.get(name=u'właściwy')


def get_charfl(inflection):
    l = inflection.lexeme
    pos = l.part_of_speech_id
    if pos == 'adj':
        zloz = l.attribute_value(ZLOZ).value == u'obecna'
        poprz = l.attribute_value(POPRZ).value == u'obecna'
        if not zloz and not poprz:
            return '0-'
        elif zloz and poprz:
            return '3+'
        else:  # jest złoż, nie ma poprz
            return ''
    elif pos == 'osc':
        return 'f'
    elif pos == 'pred':
        return 'qndk'
    elif pos == 'subst':
        return inflection.gender.symbol
    elif pos == 'v':
        wlasc = l.attribute_value(WLASC).value
        return ('q' if wlasc == 'Q' else '') + l.attribute_value(ASPEKT).value
    else:
        return ''


def make_data(entry):
    refls = dict(LexemeAttributeValue.objects.filter(
        attribute__name=u'zwrotność').values_list('pk', 'value'))
    refl_ids = ', '.join(str(pk) for pk in refls)
    refls_rev = dict(LexemeAttributeValue.objects.filter(
        attribute__name=u'zwrotność').values_list('value', 'pk'))
    nonrefls = [refls_rev[v] for v in (u'—', u'(się)', u'(sobie)')]
    sie_refls = [refls_rev[v] for v in (u'się', u'(się)', u'się/sobie')]
    sobie_refls = [refls_rev[v] for v in (u'sobie', u'(sobie)', u'się/sobie')]
    nonrefl_ids, sie_ids, sobie_ids = (
        ', '.join(str(pk) for pk in refls)
        for refls in (nonrefls, sie_refls, sobie_refls))
    inflections = Inflection.objects.filter(lexeme__entry=entry)
    if not inflections:
        return []
    subqueries = []
    params = []
    for inflection in inflections:
        charfl = get_charfl(inflection)
        pattern = inflection.pattern
        fnuni = pattern.endings.filter(
            base_form_label__symbol='pl:gen:fchar').exists()
        funi = pattern.endings.filter(
            base_form_label__symbol='pl:gen:fneut').exists()
        if fnuni and not funi:
            row = '''case when row = 5 and col = 3 then 4 else row end'''
        else:
            row = 'row'
        common_fields = {
            'refl': refl_ids,
            'row': row,
        }
        query_parts = [
            (
                BASE_QUERY,
                {
                    'haslo_tab': 'haslo',
                    'haslo': 'haslo',
                    'leks_clause': '''l.pos not in ('skrl','skrw') and
                        (l.pos not in ('v', 'pact') or
                        refl.attribute_value_id in (%s))''' % nonrefl_ids,
                },
                [charfl, inflection.id, '1']
            ),
            # czasowniki sięiczne:
            (
                BASE_QUERY,
                {
                    'haslo_tab': u"haslo||' się'",
                    'haslo': 'haslo',
                    'leks_clause': '''(l.pos in ('v', 'pact') and
                        refl.attribute_value_id in (%s))''' % sie_ids,
                },
                [charfl, inflection.id, 's']
            ),
            # czasowniki sobieiczne:
            (
                BASE_QUERY,
                {
                    'haslo_tab': u"haslo||' sobie'",
                    'haslo': 'haslo',
                    'leks_clause': '''(l.pos in ('v', 'pact') and
                        refl.attribute_value_id in (%s))''' % sobie_ids,
                },
                [charfl, inflection.id, 's1']
            ),
            # czasowniki zanegowane:
            (
                BASE_QUERY,
                {
                    'haslo_tab': "'nie '||haslo",
                    'haslo': "'nie '||haslo",
                    'leks_clause': '''l.pos='v' and
                        refl.attribute_value_id in (%s)''' % nonrefl_ids,
                },
                [charfl, inflection.id, 'n']
            ),
            # czasowniki sięiczne zanegowane:
            (
                BASE_QUERY,
                {
                    'haslo_tab': u"'nie '||haslo||' się'",
                    'haslo': "'nie '||haslo",
                    'leks_clause': '''(l.pos='v' and
                        refl.attribute_value_id in (%s))''' % sie_ids,
                },
                [charfl, inflection.id, 'ns']
            ),
            # czasowniki sobieiczne zanegowane:
            (
                BASE_QUERY,
                {
                    'haslo_tab': u"'nie '||haslo||' sobie'",
                    'haslo': "'nie '||haslo",
                    'leks_clause': '''(l.pos='v' and
                        refl.attribute_value_id in (%s))''' % sobie_ids,
                },
                [charfl, inflection.id, 'ns1']
            ),
            # wymagające gniazdowania: adjcom, advcom i ppas i appas
            (
                NESTED_BASE,
                {
                    'haslo': 'g.haslo',
                    'main_clause': '''typods in ('comadj','comadv','ppasver') and
                        l.pos in ('adjcom','advcom','ppas','appas') and
                        (l.pos not in ('ppas', 'appas') or refl.attribute_value_id in (%s))'''
                                   % nonrefl_ids,
                },
                [charfl, inflection.lexeme_id]
            ),
            # imiesłowy bierne czasowników sięicznych:
            (
                NESTED_BASE,
                {
                    'haslo': u"g.haslo||' się'",
                    'main_clause': '''(typods ='ppasver' and l.pos in ('ppas', 'appas') and
                        refl.attribute_value_id in (%s))''' % sie_ids,
                },
                [charfl, inflection.lexeme_id]
            ),
            # imiesłowy bierne czasowników sobieicznych:
            (
                NESTED_BASE,
                {
                    'haslo': u"g.haslo||' sobie'",
                    'main_clause': '''(typods ='ppasver' and l.pos in ('ppas', 'appas') and
                        refl.attribute_value_id in (%s))''' % sobie_ids,
                },
                [charfl, inflection.lexeme_id]
            ),
        ]
        subqueries.extend(
            qp[0] % dict(common_fields.items() + qp[1].items())
            for qp in query_parts)
        for qp in query_parts:
            params += qp[2]
    cursor = connection.cursor()
    cursor.execute(' union all '.join(subqueries) + '''
        order by haslo, hom, rodzaj, podparad, row, col, kskl, slowo
    ''', params)
    return list(cursor)