phraseology_generator.py 3.38 KB
# -*- coding: utf-8 -*-

from dictionary.models import sort_arguments, sort_positions, sortatributes
from settings import MORFEUSZ2

def lexicalisation(argument, categories, base):
    subj = is_subj(categories)
    b = argument.type
    if b == 'fixed':
        return (get_words(sortatributes(argument)[-1]), [])
    attributes = sortatributes(argument)
    lexicalisation_type = attributes[0].values.all()[0].argument.type
    lexicalisation_parameters = sortatributes(attributes[0].values.all()[0].argument)
    if lexicalisation_type == 'xp': # xp(...)[np/prepnp], ...
       lexicalisation_type = lexicalisation_parameters[0].values.all()[0].argument.type
       lexicalisation_parameters = sortatributes(lexicalisation_parameters[0].values.all()[0].argument)
    if lexicalisation_type == 'np': # np(case), number, nouns, atr
        nps = get_nps(get_case(lexicalisation_parameters[0], subj), get_number(attributes[1], subj), get_words(attributes[2]), attributes[3])
        return (nps, get_verb(base, get_number(attributes[1], subj), subj))
    elif lexicalisation_type == 'prepnp': #prepnp(prep, case), number, nouns, atr
        prepnps = get_prepnps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1], subj), get_number(attributes[1], subj), get_words(attributes[2]), attributes[3])
        return (prepnps, [])
    else:
        return ([], [])
    return ([], [])

def is_subj(categories):
    for cat in categories:
        if cat.category == u'subj':
            return True
    return False

def get_preposition(attribute):
    return attribute.values.all()[0].parameter.type.name

def get_words(attribute):
    words = [word.text[1:-1] for word in attribute.values.all()]
    return words

def get_case(attribute, is_subj):
    case = attribute.values.all()[0].parameter.type.name
    if case == u'str':
        if is_subj:
            case = u'nom'
        else:
            case = u'acc'
    return case

def get_number(attribute, is_subj):
    number = attribute.values.all()[0].parameter.type.name
    if number == u'_':
        if is_subj:
            number = u'sg'
    return number

def get_nps(case, number, nouns, _atr):
    result = []
    for noun in nouns:
        options = [(interp.orth, interp.getTag(MORFEUSZ2)) for interp in MORFEUSZ2.generate(noun.encode('utf8'))]
        if case != u'_':
            filtered = []
            for option in options:
                (orth, tag) = option
                if case in tag:
                    filtered.append(option)
            options = filtered
        if number != u'_':
            filtered = []
            for option in options:
                (orth, tag) = option
                if number in tag:
                    filtered.append(option)
            options = filtered
    return [orth for orth, _ in options]

def get_prepnps(prep, case, number, nouns, _atr):
    # ala["ma"] = kot
    nps = get_nps(case, number, nouns, _atr)
    return [prep + ' ' + np for np in nps]

def get_verb(inf, number, is_subj):
    if not is_subj:
        return None
    else:
        options = [(interp.orth, interp.getTag(MORFEUSZ2)) for interp in MORFEUSZ2.generate(inf.encode('utf8'))]
        filtered = []
        for option in options:
            (orth, tag) = option
            if u'fin' in tag and u'sg' in tag and u'ter' in tag:
                filtered.append(option)
        options = filtered
        return [orth for orth, _ in options]