parser.py 14.1 KB
# -*- coding: utf-8 -*-

from dictionary.ajax_argument_form import get_or_create_text_attr_value, \
                                          is_empty_lemma, correct_empty_lemma_value, \
                                          prepare_text_attr_value, get_or_create_argument, \
                                          get_or_create_argument_attr_value, get_or_create_position_attr_value
                                          
from dictionary.models import Argument_Model, AttrValueSelectionMode, PositionCategory, \
                              get_or_create_position, get_or_create_attr_parameter, \
                              get_attr_models_to_exclude, get_or_create_attribute, \
                              get_or_create_parameter_attr_value

def parse_argument_text_rep(text_rep):
    arg_type, attributes_ls = get_arg_parts(text_rep)
    arg_model_obj = Argument_Model.objects.get(arg_model_name=arg_type)
    atribute_objs = parse_attributes(arg_model_obj, attributes_ls)
    arg_obj = get_or_create_argument(arg_model_obj, atribute_objs)
    return arg_obj

def parse_argument_parts(arg_type, attributes_ls):
    arg_model_obj = Argument_Model.objects.get(arg_model_name=arg_type)
    atribute_objs = parse_attributes(arg_model_obj, attributes_ls)
    arg_obj = get_or_create_argument(arg_model_obj, atribute_objs)
    return arg_obj
    
def get_arg_parts(text_rep):
    attributes_ls = []
    arg_parts = text_rep.split('(', 1)
    arg_type = arg_parts[0]
    if len(arg_parts) == 2:
        attributes_str = arg_parts[1][:-1]
        attributes_ls = get_attributes(attributes_str)
    return arg_type, attributes_ls

def get_attributes(attributes_str): 
    attribute = ''
    attributes_ls = []
    opened_brackets = 0
    for char in attributes_str:
        attribute += char
        if char == '(' or char == '[':
            opened_brackets += 1
        elif char == ')' or char == ']':
            opened_brackets -= 1
        if char == ',' and opened_brackets == 0:
            attribute = attribute.strip(',')
            attributes_ls.append(attribute.strip())
            attribute = ''
    attributes_ls.append(attribute.strip())
    return attributes_ls

def parse_attributes(arg_model_obj, attributes_ls):
    attribute_objs = []
    attribute_models = get_attribute_models(arg_model_obj, attributes_ls)
    for i in range(len(attribute_models)):
        attribute_type = attribute_models[i].type.sym_name
        selection_modes = attribute_models[i].values_selection_modes
        attribute_str = attributes_ls[i].strip()
        if attribute_type == 'text' and not selection_modes.exists(): 
            attribute_objs.append(get_or_create_simple_text_attr(attribute_models[i], attribute_str))
        elif attribute_type == 'text' and selection_modes.exists():
            attribute_objs.append(get_or_create_complex_text_attr(attribute_models[i], attribute_str))
        elif attribute_type == 'parameter' and not selection_modes.exists():
            attribute_objs.append(get_or_create_simple_parameter_attr(attribute_models[i], attribute_str))
        elif attribute_type == 'parameter' and selection_modes.exists():
            attribute_objs.append(get_or_create_complex_parameter_attr(attribute_models[i], attribute_str))
        elif attribute_type == 'argument' and not selection_modes.exists():
            argument_attr = get_or_create_simple_argument_attr(attribute_models[i], attribute_str)
            attribute_objs.append(argument_attr)
        elif attribute_type == 'argument' and selection_modes.exists():
            argument_attr = get_or_create_complex_argument_attr(attribute_models[i], attribute_str)
            attribute_objs.append(argument_attr)
        elif attribute_type == 'position':
            attribute_objs.append(get_or_create_positions_attr(attribute_models[i], attribute_str))
    return attribute_objs

def get_attribute_models(arg_model, attributes_ls):
    attribute_models = arg_model.atribute_models.order_by('priority')
    if attribute_models.filter(atr_model_name=u'TYP FRAZY').exists():       
        for i in range(len(attribute_models)):
            if attribute_models[i].atr_model_name == u'TYP FRAZY' and len(attributes_ls) > i:
                try:
                    #argument_attr_type, xx = get_arg_parts(attributes_ls[i])
                    
                    arg_attr = parse_argument_text_rep(attributes_ls[i])
                    attr_models_to_exclude = get_attr_models_to_exclude(arg_attr)
                    
#                    argument_attr_model = Argument_Model.objects.get(arg_model_name=argument_attr_type)
#                    attr_models_to_exclude = argument_attr_model.sec_attr_limitations.all()
                    attribute_models = attribute_models.exclude(pk__in=attr_models_to_exclude)
                    attribute_models = attribute_models.order_by('priority')
                except ValueError:
                    pass
                break
    return attribute_models 

def get_or_create_simple_text_attr(attribute_model, attr_str):
    attr_str = prepare_text_attr_value(attr_str)
    attr_val_obj = get_or_create_text_attr_value(attr_str)
    attr_obj = get_or_create_attribute(attribute_model=attribute_model, 
                                       values=[attr_val_obj],
                                       selection_mode=None, 
                                       separator=None) 
    return attr_obj

def get_or_create_complex_text_attr(attribute_model, attr_str):
    values = []
    separator = get_attr_separator(attribute_model, attr_str)
    selection_mode = get_attr_selection_mode(attribute_model, attr_str)
    lemmas_str = get_attr_values_str(attribute_model, attr_str, selection_mode)
    if is_empty_lemma(lemmas_str):
        empty_lemma_value = correct_empty_lemma_value(lemmas_str)
        empty_lemma_value = prepare_text_attr_value(empty_lemma_value)
        values.append(get_or_create_text_attr_value(empty_lemma_value))
    else:
        if separator:
            text_values = lemmas_str.split(separator.symbol)
        else:
            text_values = [attr_str]
        for text_value in text_values:
            text_value = prepare_text_attr_value(text_value)
            values.append(get_or_create_text_attr_value(text_value))
    # nie ma wyboru listowego kiedy mamy tylko jeden lemat
    if len(values) < 2:
        selection_mode = None
        separator = None
    attr_obj = get_or_create_attribute(attribute_model=attribute_model, 
                                       values=values, 
                                       selection_mode=selection_mode, 
                                       separator=separator)
    return attr_obj
    
def get_attr_separator(attribute_model, attr_str):
    separator = None
    possible_separators = attribute_model.value_separators.all()
    for poss_separator in possible_separators:
        if poss_separator.symbol in attr_str:
            separator = poss_separator
            break
    if not separator and possible_separators.count() == 1:
        separator = possible_separators.all()[0]
    return separator

def get_attr_selection_mode(attribute_model, attr_str):
    selection_mode = None
    possible_selection_modes = attribute_model.values_selection_modes
    if possible_selection_modes.count() == 1:
        selection_mode = possible_selection_modes.all()[0]
    else:
        selection_mode_str = attr_str.split(attribute_model.val_list_start)[0]
        try: 
            selection_mode = possible_selection_modes.get(name=selection_mode_str)
        except AttrValueSelectionMode.DoesNotExist:
            selection_mode = None
    return selection_mode

def get_attr_values_str(attribute_model, attr_str, selection_mode):
    attr_values_str = attr_str
    if selection_mode:
        if attribute_model.val_list_start != '':
            attr_parts = attr_values_str.split(attribute_model.val_list_start, 1)
        else:
            attr_parts = [attr_values_str]
        if len(attr_parts) == 1:
            if selection_mode.name:
                attr_values_str = ''
        else:
            attr_values_str = attr_parts[1][:-1]
    return attr_values_str

def get_or_create_simple_parameter_attr(attribute_model, attr_str):
    attr_val_obj = parse_parameter_attr_value(attribute_model, attr_str)
    attr_obj = get_or_create_attribute(attribute_model=attribute_model, 
                                       values=[attr_val_obj], 
                                       selection_mode=None,
                                       separator=None)
    return attr_obj

def parse_parameter_attr_value(attribute_model, attr_val_str):
    param_model_name, subparameters_str_ls = get_param_parts(attr_val_str)
    param_model = attribute_model.possible_parameters.get(name=param_model_name)
    subparam_objs = [param_model.possible_subparams.get(name=subparam_name) 
                     for subparam_name in subparameters_str_ls]
    param_obj, xx = get_or_create_attr_parameter(param_model, subparam_objs)
    attr_val_obj, xx = get_or_create_parameter_attr_value(param_obj)
    return attr_val_obj

def get_param_parts(param_str):
    subparameters_str_ls = []
    param_parts = param_str.split('[', 1)
    param_model_name = param_parts[0]
    if len(param_parts) == 2:
        subparameters_str = param_parts[1][:-1]
        subparameters_str_ls = subparameters_str.split(';')
    return param_model_name, subparameters_str_ls

def get_or_create_complex_parameter_attr(attribute_model, attr_str):
    selection_mode = get_attr_selection_mode(attribute_model, attr_str)
    separator = get_attr_separator(attribute_model, attr_str)
    values_str = get_attr_values_str(attribute_model, attr_str, selection_mode)
    values_text_reps = values_str.split(separator.symbol)
    values = [parse_parameter_attr_value(attribute_model, value_text_rep) for value_text_rep in values_text_reps]
    attr_obj = get_or_create_attribute(attribute_model=attribute_model, 
                                       values=values, 
                                       selection_mode=selection_mode, 
                                       separator=separator)
    return attr_obj

def get_or_create_simple_argument_attr(attribute_model, attr_str):
    argument = parse_argument_text_rep(attr_str)
    value = get_or_create_argument_attr_value(argument)
    attr_obj = get_or_create_attribute(attribute_model=attribute_model, 
                                       values=[value], 
                                       selection_mode=None, 
                                       separator=None)
    return attr_obj

def get_or_create_complex_argument_attr(attribute_model, attr_str):
    values = []
    selection_mode = get_attr_selection_mode(attribute_model, attr_str)
    separator = get_attr_separator(attribute_model, attr_str)
    args_str = attr_str[len(selection_mode.name):]
    args_str = args_str.lstrip(attribute_model.val_list_start)
    args_str = args_str.rstrip(attribute_model.val_list_end)
    arguments_text_reps = get_arguments(args_str, separator.symbol)
    for arg_text_rep in arguments_text_reps:
        argument = parse_argument_text_rep(arg_text_rep)
        values.append(get_or_create_argument_attr_value(argument))
    attr_obj = get_or_create_attribute(attribute_model=attribute_model, 
                                       values=values, 
                                       selection_mode=selection_mode, 
                                       separator=separator)
    return attr_obj

def get_or_create_positions_attr(attribute_model, attr_str):
    values = []
    selection_mode = get_attr_selection_mode(attribute_model, attr_str)
    separator = get_attr_separator(attribute_model, attr_str)
    positions_str = get_attr_values_str(attribute_model, attr_str, selection_mode)
    positions = parse_positions_str(positions_str)
    for position in positions:
        values.append(get_or_create_position_attr_value(position))
    attr_obj = get_or_create_attribute(attribute_model=attribute_model, 
                                       values=values, 
                                       selection_mode=selection_mode, 
                                       separator=separator)
    return attr_obj

def parse_positions_str(positions_str):
    positions_str_ls = get_positions(positions_str)
    positions_objs = [parse_position_text_rep(position_str) for position_str in positions_str_ls if position_str]
    return positions_objs

def get_positions(positions_str): 
    position = ''
    positions_ls = []
    opened_brackets = 0
    for char in positions_str:
        position += char
        if char == '{':
            opened_brackets += 1
        elif char == '}':
            opened_brackets -= 1
        if char == '+' and opened_brackets == 0:
            position = position.strip('+')
            positions_ls.append(position.strip())
            position = ''
    positions_ls.append(position.strip())
    return positions_ls

def parse_position_text_rep(text_rep):
    categories_objs = []
    categories_str_ls, arguments_str_ls = get_position_parts(text_rep)
    if categories_str_ls:
        categories_objs = [PositionCategory.objects.get(category=cat_str) for cat_str in categories_str_ls]
    argument_objs = [parse_argument_text_rep(arg_str) for arg_str in arguments_str_ls if arg_str]
    position_obj = get_or_create_position(categories_objs, argument_objs)
    return position_obj
    
def get_position_parts(text_rep):
    position_parts = text_rep.split('{', 1)
    categories = position_parts[0].strip().split(',')
    categories = [cat for cat in categories if cat]
    if len(position_parts) == 1:
        arguments_str = ''
    else:
        arguments_str = position_parts[1][:-1]
    arguments = get_arguments(arguments_str)
    return categories, arguments

def get_arguments(arguments_str, separator=';'):
    argument = ''
    arguments_ls = []
    opened_brackets = 0
    for char in arguments_str:
        argument += char
        if char == '(' or char == '[':
            opened_brackets += 1
        elif char == ')' or char == ']':
            opened_brackets -= 1
        if char == separator and opened_brackets == 0:
            argument = argument.strip(separator)
            arguments_ls.append(argument.strip())
            argument = ''
    if argument:
        arguments_ls.append(argument.strip())
    return arguments_ls