auto_derivatives.py 7.62 KB
# -*- coding: utf-8 -*-
from django.db.models import Max
from dictionary.models import Ending, Lexeme, LexemeInflectionPattern, \
    Pattern, Gender, LexemeAttributeValue, CrossReferenceType, \
    CrossReference

VOWELS = u'aeiouyąęó'

P07 = Pattern.objects.get(name='P07')
P28 = Pattern.objects.get(name='P28')
P12 = Pattern.objects.get(name='P12')
P19 = Pattern.objects.get(name='P19')
P20 = Pattern.objects.get(name='P20')
P0196 = Pattern.objects.get(name='0196')
P0195 = Pattern.objects.get(name='0195')
Posc = Pattern.objects.get(name='0156i')
Pcom = Pattern.objects.get(name='Pcom')
Pndm = Pattern.objects.get(name='ndm')
n2 = Gender.objects.get(symbol='n2')
f = Gender.objects.get(symbol='f')
NO_POPRZ = LexemeAttributeValue.objects.get(
    value=u'nieobecna', attribute__name=u'forma poprz.')
NO_ZLOZ = LexemeAttributeValue.objects.get(
    value=u'nieobecna', attribute__name=u'forma złoż.')

CR_TYPES = {
    'pact': ('verpact', 'pactver'),
    'ppas': ('verppas', 'ppasver'),
    'appas': ('verppas', 'ppasver'),
    'ger': ('verger', 'gerver'),
    'osc': ('adjosc', 'oscadj'),
    'adv': ('adjadv', 'advadj'),
    'advcom': ('adjadvc', 'advcadj'),
    'adjcom': ('adjcom', 'comadj'),
    'nieadj': ('adjnie', 'nieadj'),
    'nieadv': ('adjnie', 'nieadj'),
    'nieosc': ('adjnie', 'nieadj'),
}

def ppas_data(lips, pos='ppas'):
    for lip in lips:
        pattern = lip.pattern
        endings10 = Ending.objects.filter(
                    pattern=pattern, base_form_label__symbol='10')
        endings12 = Ending.objects.filter(
                    pattern=pattern, base_form_label__symbol='12')
        for ending in endings10:
            for ending12 in endings12:
                yield {
                    'pos': pos,
                    'entry': lip.root + ending.string + 'y',
                    'pl': lip.root + ending12.string,
                    'index': lip.index,
                }

def pact_data(lips):
    for lip in lips:
        pattern = lip.pattern
        endings3 = Ending.objects.filter(
                    pattern=pattern, base_form_label__symbol='3')
        for ending in endings3:
            yield {
                'pos': 'pact',
                'entry': lip.root + ending.string + 'cy',
                'index': lip.index,
            }

def ger_data(lips):
    for lip in lips:
        pattern = lip.pattern
        endings11 = Ending.objects.filter(
            pattern=pattern, base_form_label__symbol='11')
        for ending in endings11:
            yield {
                'pos': 'ger',
                'entry': lip.root + ending.string + 'ie',
                'index': lip.index,
            }

def guess_osc(s):
    if s[-1] == 'i':
        if s[-2] in VOWELS:
            base = s[:-1] + u'j'
        elif s[-2] in u'gkl':
            base = s[:-1]
        else:
            base = s
    elif s[-1] == u'y':
        base = s[:-1]
    elif s[-2:] in [u'ek', u'en']:
        base = s[:-2] + s[-1]
    elif s[-2:] in [u'ój', u'ów']:
        base = s[:-2] + u'o' + s[-1]
    else:
        base = s
    return base + u'ość'

def make_negation(s):
    if s[0].islower():
        return u'nie' + s
    else:
        return u'nie-' + s

def lexeme_derivatives(lexeme):
    lips = list(lexeme.lexemeinflectionpattern_set.all())
    if not lips:
        return
    if lexeme.part_of_speech.symbol == 'v':
        proper = lexeme.lexemeattributevalue_set.filter(
            attribute__name=u'właściwy', value__in=('', '(Q)'))
        if proper:
            trans = lexeme.lexemeattributevalue_set.filter(
                attribute__name=u'przechodniość', value='T')
            q_trans = lexeme.lexemeattributevalue_set.filter(
                attribute__name=u'przechodniość', value='qT')
            imperf = lexeme.lexemeattributevalue_set.filter(
                attribute__name=u'aspekt').exclude(value='dk')
            if trans or q_trans:
                pos = 'ppas' if trans else 'appas'
                for data in ppas_data(lips, pos):
                    yield data
            if imperf:
                for data in pact_data(lips):
                    yield data
            for data in ger_data(lips):
                yield data
    elif lexeme.part_of_speech.symbol == 'adj':
        # adjcom, adv, advcom, osc, nieadj
        for pos in ('adjcom', 'adv', 'advcom'):
            yield {
                'pos': pos,
                'entry': None,
                'index': 1,
            }
        yield {
            'pos': 'osc',
            'entry': guess_osc(lexeme.entry),
            'index': 1,
        }
        yield {
            'pos': 'nieadj',
            'entry': make_negation(lexeme.entry),
            'index': 1,
        }


def create_derivative(lexeme, part_of_speech, entry, index, pl=None):
    next_id = Lexeme.all_objects.aggregate(Max('id'))['id__max'] + 1
    negation = part_of_speech.startswith('nie')
    if negation:
        pos = part_of_speech[3:]
    else:
        pos = part_of_speech
    der = Lexeme(
        id=next_id, entry=entry, part_of_speech_id=pos,
        status=lexeme.status, owner_vocabulary_id=lexeme.owner_vocabulary_id,
        specialist=lexeme.specialist,
        borrowing_source_id=lexeme.borrowing_source_id)
    der.fix_homonym_number()
    der.save()
    der.refresh_data()
    lexeme.owner_vocabulary.add_lexeme(der)
    if not negation:
        lip = LexemeInflectionPattern(lexeme=der, index=1)
        if part_of_speech in ('ppas', 'appas'):
            # -ty/-ci
            if entry.endswith('ty'):
                lip.pattern = P28
            # -iony/-eni
            elif entry.endswith('iony') and not pl.endswith('ieni'):
                lip.pattern = P20
            # -ony/-eni
            elif entry.endswith('eni'):
                lip.pattern = P19
            # -ny/-ni
            else:
                lip.pattern = P12
        elif part_of_speech == 'pact':
            lip.pattern = P07
        elif part_of_speech == 'ger':
            lip.gender = n2
            if entry.endswith('cie'):
                lip.pattern = P0195
            else: # -nie
                lip.pattern = P0196
        elif part_of_speech == 'osc':
            lip.pattern = Posc
            lip.gender = f
        elif part_of_speech == 'adjcom':
            lip.pattern = Pcom
        elif part_of_speech in ('adv', 'advcom'):
            lip.pattern = Pndm
        lip.root = lip.get_root()
        lip.save()
        orig_lip = LexemeInflectionPattern.objects.get(
            lexeme=lexeme, index=index)
        # może kopiować kwalifikatory odmieniasia do leksemu?
        for q in orig_lip.qualifiers.all():
            lip.qualifiers.add(q) # der zamiast lip?
    else:
        for orig_lip in lexeme.lexemeinflectionpattern_set.all():
            lip = LexemeInflectionPattern(
                lexeme=der, index=orig_lip.index, pattern=orig_lip.pattern,
                gender=orig_lip.gender)
            lip.root = lip.get_root()
            lip.save()
            for q in orig_lip.qualifiers.all():
                lip.qualifiers.add(q)
    for attr, attr_val in lexeme.attributes_values():
        if attr_val and attr.parts_of_speech.filter(symbol=part_of_speech):
            attr_val.add_lexeme(der)
    if part_of_speech in ('ppas', 'appas', 'pact', 'adjcom', 'nieadj'):
        NO_POPRZ.add_lexeme(der)
        NO_ZLOZ.add_lexeme(der)
    for q in lexeme.qualifiers.all():
        der.qualifiers.add(q)
    cr_to, cr_from = CR_TYPES[part_of_speech]
    lexeme.add_cross_reference(der, cr_to)
    der.add_cross_reference(lexeme, cr_from)
    new_lexemes = [der]
    if part_of_speech in ('osc', 'adv'):
        new_lexemes.extend(create_derivative(
            der, 'nie' + part_of_speech, make_negation(entry), index))
    return new_lexemes