convert_frames.py 17.2 KB
#-*- coding:utf-8 -*-

"""
Module for converting frames between different part of speeches.
"""

import copy
import re

from dictionary.models import Argument, PositionCategory, get_or_create_position, \
                              positions_to_frame
from dictionary.parser import get_arg_parts, parse_argument_parts, parse_argument_text_rep


def frame_conversion(frame, from_pos, to_pos):
    arg_conversion_function = None
    frame_realizations = []
    if from_pos.tag == 'verb' and to_pos.tag == 'noun':
        arg_conversion_function = verb_to_noun_arg_conversion
        frame_realizations = verb_to_noun_conversion(frame)
    elif from_pos.tag == 'verb' and to_pos.tag == 'adj':
        arg_conversion_function = verb_to_adj_arg_conversion
        frame_realizations = verb_to_adj_conversion(frame)
    elif from_pos.tag == 'noun' and to_pos.tag == 'adj':
        arg_conversion_function = noun_to_adj_arg_conversion
        frame_realizations = noun_to_adj_conversion(frame)
    elif from_pos.tag == 'adj' and to_pos.tag == 'noun':
        arg_conversion_function = adj_to_noun_arg_conversion
        frame_realizations = adj_to_noun_conversion(frame)
    frame_versions = create_frame_versions(frame_realizations, arg_conversion_function)
    return frame_versions

def verb_to_noun_arg_conversion(position_category, argument):
    if argument == None:
        pass
    elif (position_category and position_category == 'subj' and 
        argument.text_rep == 'np(str)'):
        argument = Argument.objects.get(text_rep=u'possp')
    elif argument.type == 'or':
        argument = None
    elif argument.type == 'infp':
        argument = None
    elif argument.contains_parameter_attribute(u'PRZYPADEK', 'pred'):
        argument = None
    elif argument.contains_parameter_attribute(u'PRZYPADEK', 'str'):
        argument = change_str_to_gen(argument)
    return argument 

def verb_to_adj_arg_conversion(position_category, argument):
    if argument == None:
        pass
    elif (position_category and position_category == 'subj' and 
        argument.text_rep == 'np(str)'):
        argument = None
    elif argument.type == 'or':
        argument = None
    elif (argument.type == 'lexnp' or 
          argument.type == 'preplexnp' or
          argument.type == 'fixed' or 
          argument.type == 'lex'):
        argument = None
    elif argument.contains_parameter_attribute(u'PRZYPADEK', 'pred'):
        argument = None
    elif argument.contains_parameter_attribute(u'PRZYPADEK', 'str'):
        argument = change_str_to_gen(argument)
    return argument 

def noun_to_adj_arg_conversion(position_category, argument):
    return argument

def adj_to_noun_arg_conversion(position_category, argument):
    return argument

def change_str_to_gen(argument):
    arg_type, attributes_strs  = get_arg_parts(argument.text_rep)
    attributes_strs = ['gen' if attr_str=='str' else attr_str for attr_str in attributes_strs]
    arg_obj = parse_argument_parts(arg_type, attributes_strs) #arg_data_to_arg(arg_model, attributes_strs)
    return arg_obj

def verb_to_noun_conversion(frame):
    frame_realizations = [frame_to_rule_format(frame)]
#   subj{np(str)} + obj{np(str)}  ==> {np(gen)} -- zarówno z subj, jak i z obj
    convert([{'from': ur'^subj\{np\(str\)\}$', 'to': None}, 
             {'from': ur'^obj\{np\(str\)\}$', 'to': ur'{np(gen)}'}], frame_realizations)
    convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{np(gen)}'}, 
             {'from': ur'^obj\{np\(str\)\}$', 'to': None}], frame_realizations)    
#   subj{np(str)} + obj{ncp(str,int)} ==> {ncp(gen,int)} -- zarówno z subj, jak i z obj
    convert([{'from': ur'^subj\{np\(str\)\}$', 'to': None}, 
             {'from': ur'^obj\{ncp\(str,int(.*)\)\}$', 'to': ur'{ncp(gen,int[.*])}'}], frame_realizations)
    convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{ncp(gen,int[.*])}'}, 
             {'from': ur'^obj\{ncp\(str,int(.*)\)\}$', 'to': None}], frame_realizations)
#   subj{np(str)} + obj{ncp(str,że)} ==> {ncp(gen,że)} -- zarówno z subj, jak i z obj
    convert([{'from': ur'^subj\{np\(str\)\}$', 'to': None}, 
             {'from': ur'^obj\{ncp\(str,że\)\}$', 'to': ur'{ncp(gen,że)}'}], frame_realizations)
    convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{ncp(gen,że)}'}, 
             {'from': ur'^obj\{ncp\(str,że\)\}$', 'to': None}], frame_realizations)
#   subj{np(str)} + obj{ncp(str,żeby)} ==> {ncp(gen,żeby)} -- zarówno z subj, jak i z obj
    convert([{'from': ur'^subj\{np\(str\)\}$', 'to': None}, 
             {'from': ur'^obj\{ncp\(str,żeby\)\}$', 'to': ur'{ncp(gen,żeby)}'}], frame_realizations) 
    convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{ncp(gen,żeby)}'}, 
             {'from': ur'^obj\{ncp\(str,żeby\)\}$', 'to': None}], frame_realizations)
    
#   subj{np(str)} + obj{np(str)}  ==>  {possp} + {prepnp(dla,gen)}
    convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, 
             {'from': ur'^obj\{np\(str\)\}$', 'to': ur'{prepnp(dla,gen)}'}], frame_realizations)
#   subj{np(str)} + obj{ncp(str,że)}  ==>  {possp} + {prepncp(dla,gen,że)}
    convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, 
             {'from': ur'^obj\{ncp\(str,że\)\}$', 'to': ur'{prepncp(dla,gen,że)}'}], frame_realizations)  
#   subj{np(str)} + obj{ncp(str,int)}  ==>  {possp} + {prepncp(dla,gen,int)}
    convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, 
             {'from': ur'^obj\{ncp\(str,int(.*)\)\}$', 'to': ur'{prepncp(dla,gen,int[.*])}'}], frame_realizations)
#   subj{np(str)} + obj{ncp(str,żeby)}  ==>  {possp} + {prepncp(dla,gen,żeby)}
    convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, 
             {'from': ur'^obj\{ncp\(str,żeby\)\}$', 'to': ur'{prepncp(dla,gen,żeby)}'}], frame_realizations)
    
#   subj{np(str)} + obj{np(str)}  ==>  {possp} + {prepnp(o,loc)}
    convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, 
             {'from': ur'^obj\{np\(str\)\}$', 'to': ur'{prepnp(o,loc)}'}], frame_realizations)
#   subj{np(str)} + obj{ncp(str,int)}  ==>  {possp} + {prepncp(o,loc,int)}
    convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, 
             {'from': ur'^obj\{ncp\(str,int(.*)\)\}$', 'to': ur'{prepncp(o,loc,int[.*])}'}], frame_realizations)
#   subj{np(str)} + obj{ncp(str,że)}  ==>  {possp} + {prepncp(o,loc,że)}
    convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, 
             {'from': ur'^obj\{ncp\(str,że\)\}$', 'to': ur'{prepncp(o,loc,że)}'}], frame_realizations)
#   subj{np(str)} + obj{ncp(str,żeby)}  ==>  {possp} + {prepncp(o,loc,żeby)}
    convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, 
             {'from': ur'^obj\{ncp\(str,żeby\)\}$', 'to': ur'{prepncp(o,loc,żeby)}'}], frame_realizations)
    
#   subj{np(str)} + obj{np(str)}  ==>  {possp} + {prepnp(na,acc)}
    convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, 
             {'from': ur'^obj\{np\(str\)\}$', 'to': ur'{prepnp(na,acc)}'}], frame_realizations)
#   subj{np(str)} + obj{ncp(str,int)}  ==>  {possp} + {prepncp(na,acc,int)}
    convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, 
             {'from': ur'^obj\{ncp\(str,int(.*)\)\}$', 'to': ur'{prepncp(na,acc,int[.*])}'}], frame_realizations) 
#   subj{np(str)} + obj{ncp(str,że)}  ==>  {possp} + {prepncp(na,acc,że)}
    convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, 
             {'from': ur'^obj\{ncp\(str,że\)\}$', 'to': ur'{prepncp(na,acc,że)}'}], frame_realizations)
#   subj{np(str)} + obj{ncp(str,żeby)}  ==>  {possp} + {prepncp(na,acc,żeby)}
    convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, 
             {'from': ur'^obj\{ncp\(str,żeby\)\}$', 'to': ur'{prepncp(na,acc,żeby)}'}], frame_realizations)
    
#   subj{np(str)} + obj{np(str)}  ==>  {possp} + {prepnp(na,loc)}
    convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, 
             {'from': ur'^obj\{np\(str\)\}$', 'to': ur'{prepnp(na,loc)}'}], frame_realizations)  
    
#   subj{np(str)} + obj{np(str)}  ==>  {possp} + {prepnp(nad,inst)}
    convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, 
             {'from': ur'^obj\{np\(str\)\}$', 'to': ur'{prepnp(nad,inst)}'}], frame_realizations)
#   subj{np(str)} + obj{ncp(str,int)}  ==>  {possp} + {prepncp(nad,inst,int)} 
    convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, 
             {'from': ur'^obj\{ncp\(str,int(.*)\)\}$', 'to': ur'{prepncp(nad,inst,int[.*])}'}], frame_realizations)
#   subj{np(str)} + obj{ncp(str,że)}  ==>  {possp} + {prepncp(nad,inst,że)} 
    convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, 
             {'from': ur'^obj\{ncp\(str,że\)\}$', 'to': ur'{prepncp(nad,inst,że)}'}], frame_realizations)
#   subj{np(str)} + obj{ncp(str,żeby)}  ==>  {possp} + {prepncp(nad,inst,żeby)} 
    convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, 
             {'from': ur'^obj\{ncp\(str,żeby\)\}$', 'to': ur'{prepncp(nad,inst,żeby)}'}], frame_realizations)
    
#   subj{np(str)} + {np(dat)}  ==>  {possp} + {prepnp(dla,gen)}
    convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, 
             {'from': ur'^\{np\(dat\)\}$', 'to': ur'{prepnp(dla,gen)}'}], frame_realizations)
#   subj{np(str)} + {ncp(dat,int))}  ==>  {possp} + {prepncp(dla,gen,int)} 
    convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, 
             {'from': ur'^\{ncp\(dat,int(.*)\)\}$', 'to': ur'{prepncp(dla,gen,int[.*])}'}], frame_realizations)
    
#   subj{np(str)} + {np(gen)}  ==>  {possp} + {prepnp(na,acc)}
    convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, 
             {'from': ur'^\{np\(gen\)\}$', 'to': ur'{prepnp(na,acc)}'}], frame_realizations)
#   subj{np(str)} + {ncp(gen,int)}  ==>  {possp} + {prepncp(na,acc,int)} 
    convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, 
             {'from': ur'^\{ncp\(gen,int(.*)\)\}$', 'to': ur'{prepncp(na,acc,int[.*])}'}], frame_realizations)
#   subj{np(str)} + {ncp(gen,że)}  ==>  {possp} + {prepncp(na,acc,że)} 
    convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, 
             {'from': ur'^\{ncp\(gen,że\)\}$', 'to': ur'{prepncp(na,acc,że)}'}], frame_realizations)
#   subj{np(str)} + {ncp(gen,żeby)}  ==>  {possp} + {prepnp(na,acc,żeby)} 
    convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, 
             {'from': ur'^\{ncp\(gen,żeby\)\}$', 'to': ur'{prepnp(na,acc,żeby)}'}], frame_realizations)
    
#   subj{np(str)} + obj{np(inst)} ==>  {possp} + {np(inst)}
    convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, 
             {'from': ur'^obj\{np\(inst\)\}$', 'to': ur'{np(inst)}'}], frame_realizations)
    
#   subj{np(str)} + obj{np(inst)} ==>  {possp} + {prepnp(nad,inst)}
    convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, 
             {'from': ur'^obj\{np\(inst\)\}$', 'to': ur'{prepnp(nad,inst)}'}], frame_realizations)
#   subj{np(str)} + obj{ncp(inst,int)} ==>  {possp} + {prepncp(nad,inst,int)}
    convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, 
             {'from': ur'^obj\{ncp\(inst,int(.*)\)\}$', 'to': ur'{prepncp(nad,inst,int[.*])}'}], frame_realizations)
    
#   subj{np(str)} + {prepnp(z,gen)} (się) ==> {possp} + {prepnp(z,gen)} 
    convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, 
             {'from': ur'^\{prepnp\(z,gen\)\}$', 'to': ur'{prepnp(z,gen)}'}], frame_realizations)
#   subj{np(str)} + {prepnp(z,gen)} (się) ==> new_position{prepnp(między,inst)} 
    convert([{'from': ur'^subj\{np\(str\)\}$', 'to': None}, 
             {'from': ur'^\{prepnp\(z,gen\)\}$', 'to': None}], 
            frame_realizations, position_to_add=[ur'{prepnp(między,inst)}'])
    
    return frame_realizations

def verb_to_adj_conversion(frame):
    frame_realizations = [frame_to_rule_format(frame)]
#   subj{np(str)} + obj{np(str)} ==>  {prepnp(dla,gen)} -- z subj
    convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{prepnp(dla,gen)}'}, 
             {'from': ur'^obj\{np\(str\)\}$', 'to': None}], frame_realizations)
    return frame_realizations

def noun_to_adj_conversion(frame):
    frame_realizations = [frame_to_rule_format(frame)]
    return frame_realizations

def adj_to_noun_conversion(frame):
    frame_realizations = [frame_to_rule_format(frame)]
    return frame_realizations
    
def frame_to_rule_format(frame):
    positions = [position_to_rule_format(position) 
                 for position in frame.positions.all()]
    return positions
    
def position_to_rule_format(position):
    try:
        category = position.categories.get(control=False).category
    except PositionCategory.DoesNotExist:
        category = ''
    arguments = ['%s{%s}' % (category, argument.text_rep) 
                 for argument in position.arguments.all()]
    return arguments

def convert(conversion_pairs, frame_realizations, position_to_add=None):
    new_realizations = []
    for frame_realization in frame_realizations:
        new_realizations.extend(create_new_realizations(conversion_pairs, 
                                                        frame_realization,
                                                        position_to_add))
    frame_realizations.extend(new_realizations)  
    
def create_new_realizations(conversion_pairs, frame_realization, position_to_add):
    new_realizations = []
    positions_with_first_arg = get_argument_occurrences(conversion_pairs[0]['from'], frame_realization)
    positions_with_sec_arg = get_argument_occurrences(conversion_pairs[1]['from'], frame_realization)
    if positions_with_first_arg and positions_with_sec_arg:
        for first_arg_position in positions_with_first_arg:
            for sec_arg_position in positions_with_sec_arg:
                new_frame_realization = copy.deepcopy(frame_realization)
                add_new_position_if_needed(new_frame_realization, position_to_add)
                first_position_to_change = get_position(new_frame_realization, first_arg_position)
                second_position_to_change = get_position(new_frame_realization, sec_arg_position)
                replace_argument(first_position_to_change, conversion_pairs[0])
                replace_argument(second_position_to_change, conversion_pairs[1])
                new_realizations.append(new_frame_realization)
    return new_realizations

def get_argument_occurrences(argument, frame_realization):    
    occurrences = []
    pattern = re.compile(argument)
    for position in frame_realization:
        for arg in position:
            if arg and pattern.match(arg):
                occurrences.append(position) 
                break
    return occurrences

def add_new_position_if_needed(frame_realization, new_position):
    if new_position != None:
        frame_realization.append(new_position)
    
def get_position(frame_realization, position):
    return next((pos for pos in frame_realization if position == pos), None)
        
def replace_argument(position, conversion_pair):
    from_pattern = re.compile(conversion_pair['from']) 
    for i in range(len(position)):
        match = from_pattern.match(position[i])
        if match:
            if len(match.groups()) == 2 and match.group(1):
                new_arg = conversion_pair['to'].replace(u'[.*]', match.group(1))
                position[i] = new_arg
            else:
                new_arg = conversion_pair['to']
                if new_arg:
                    new_arg = new_arg.replace(u'[.*]', '')
                position[i] = new_arg
            break

def create_frame_versions(frame_realizations, arg_conversion_function):
    frame_versions = []
    for frame_realization in frame_realizations:
        frame_obj = frame_realization_to_obj(frame_realization, arg_conversion_function)
        if frame_obj != None:
            frame_versions.append(frame_obj)
    return frame_versions

def frame_realization_to_obj(frame_realization, arg_conversion_function):
    frame_obj = None
    positions = []
    for pos_realization in frame_realization:
        pos_obj = position_realization_to_obj(pos_realization, arg_conversion_function)
        if pos_obj != None:
            positions.append(pos_obj)
    positions = remove_duplicates(positions)
    if len(positions) > 0:
        frame_obj = positions_to_frame(positions, '', '', '', '')
    return frame_obj
        
def position_realization_to_obj(pos_realization, arg_conversion_function):
    arguments = []
    position = None
    pos_realization = remove_empty_args(pos_realization)
    for arg in pos_realization:
        arg_obj = argument_realization_to_obj(arg, arg_conversion_function)
        if arg_obj != None:
            arguments.append(arg_obj)
    arguments = remove_duplicates(arguments)
    if len(arguments) > 0:
        position = get_or_create_position(categories=[], arguments=arguments)
    return position

def remove_empty_args(position_realization):
    return filter(None, position_realization)
        
def argument_realization_to_obj(arg_realization, arg_conversion_function):
    position_category, arg_text_rep = get_arg_realization_parts(arg_realization)
    arg_obj = parse_argument_text_rep(arg_text_rep)
    arg_obj = arg_conversion_function(position_category, arg_obj)
    return arg_obj

def get_arg_realization_parts(arg_realization):
    arg_parts = arg_realization.split('{', 1)
    position_category = arg_parts[0]
    arg_text_rep = arg_parts[1].rstrip('}')
    return position_category, arg_text_rep

def remove_duplicates(objects):
    return list(set(objects))