#-*- coding:utf-8 -*- #Copyright (c) 2014, Bartłomiej Nitoń #All rights reserved. #Redistribution and use in source and binary forms, with or without #modification, are permitted provided that the following conditions are met: # Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # Redistributions in binary form must reproduce the above copyright notice, this list of conditions # and the following disclaimer in the documentation and/or other materials provided with the distribution. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. """ Module for converting frames between different part of speeches. """ import copy import re from dictionary.models import Argument, PositionCategory, get_or_create_position, \ positions_to_frame from dictionary.parser import get_arg_parts, parse_argument_parts, parse_argument_text_rep def frame_conversion(frame, from_pos, to_pos): arg_conversion_function = None frame_realizations = [] if from_pos.tag == 'verb' and to_pos.tag == 'noun': arg_conversion_function = verb_to_noun_arg_conversion frame_realizations = verb_to_noun_conversion(frame) elif from_pos.tag == 'verb' and to_pos.tag == 'adj': arg_conversion_function = verb_to_adj_arg_conversion frame_realizations = verb_to_adj_conversion(frame) elif from_pos.tag == 'noun' and to_pos.tag == 'adj': arg_conversion_function = noun_to_adj_arg_conversion frame_realizations = noun_to_adj_conversion(frame) elif from_pos.tag == 'adj' and to_pos.tag == 'noun': arg_conversion_function = adj_to_noun_arg_conversion frame_realizations = adj_to_noun_conversion(frame) frame_versions = create_frame_versions(frame_realizations, arg_conversion_function) return frame_versions def verb_to_noun_arg_conversion(position_category, argument): if argument == None: pass elif (position_category and position_category == 'subj' and argument.text_rep == 'np(str)'): argument = Argument.objects.get(text_rep=u'possp') elif argument.type == 'or': argument = None elif argument.type == 'infp': argument = None elif argument.contains_parameter_attribute(u'PRZYPADEK', 'pred'): argument = None elif argument.contains_parameter_attribute(u'PRZYPADEK', 'str'): argument = change_str_to_gen(argument) return argument def verb_to_adj_arg_conversion(position_category, argument): if argument == None: pass elif (position_category and position_category == 'subj' and argument.text_rep == 'np(str)'): argument = None elif argument.type == 'or': argument = None elif (argument.type == 'lexnp' or argument.type == 'preplexnp' or argument.type == 'fixed' or argument.type == 'lex'): argument = None elif argument.contains_parameter_attribute(u'PRZYPADEK', 'pred'): argument = None elif argument.contains_parameter_attribute(u'PRZYPADEK', 'str'): argument = change_str_to_gen(argument) return argument def noun_to_adj_arg_conversion(position_category, argument): return argument def adj_to_noun_arg_conversion(position_category, argument): return argument def change_str_to_gen(argument): arg_type, attributes_strs = get_arg_parts(argument.text_rep) attributes_strs = ['gen' if attr_str=='str' else attr_str for attr_str in attributes_strs] arg_obj = parse_argument_parts(arg_type, attributes_strs) #arg_data_to_arg(arg_model, attributes_strs) return arg_obj def verb_to_noun_conversion(frame): frame_realizations = [frame_to_rule_format(frame)] # subj{np(str)} + obj{np(str)} ==> {np(gen)} -- zarówno z subj, jak i z obj convert([{'from': ur'^subj\{np\(str\)\}$', 'to': None}, {'from': ur'^obj\{np\(str\)\}$', 'to': ur'{np(gen)}'}], frame_realizations) convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{np(gen)}'}, {'from': ur'^obj\{np\(str\)\}$', 'to': None}], frame_realizations) # subj{np(str)} + obj{ncp(str,int)} ==> {ncp(gen,int)} -- zarówno z subj, jak i z obj convert([{'from': ur'^subj\{np\(str\)\}$', 'to': None}, {'from': ur'^obj\{ncp\(str,int(.*)\)\}$', 'to': ur'{ncp(gen,int[.*])}'}], frame_realizations) convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{ncp(gen,int[.*])}'}, {'from': ur'^obj\{ncp\(str,int(.*)\)\}$', 'to': None}], frame_realizations) # subj{np(str)} + obj{ncp(str,że)} ==> {ncp(gen,że)} -- zarówno z subj, jak i z obj convert([{'from': ur'^subj\{np\(str\)\}$', 'to': None}, {'from': ur'^obj\{ncp\(str,że\)\}$', 'to': ur'{ncp(gen,że)}'}], frame_realizations) convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{ncp(gen,że)}'}, {'from': ur'^obj\{ncp\(str,że\)\}$', 'to': None}], frame_realizations) # subj{np(str)} + obj{ncp(str,żeby)} ==> {ncp(gen,żeby)} -- zarówno z subj, jak i z obj convert([{'from': ur'^subj\{np\(str\)\}$', 'to': None}, {'from': ur'^obj\{ncp\(str,żeby\)\}$', 'to': ur'{ncp(gen,żeby)}'}], frame_realizations) convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{ncp(gen,żeby)}'}, {'from': ur'^obj\{ncp\(str,żeby\)\}$', 'to': None}], frame_realizations) # subj{np(str)} + obj{np(str)} ==> {possp} + {prepnp(dla,gen)} convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, {'from': ur'^obj\{np\(str\)\}$', 'to': ur'{prepnp(dla,gen)}'}], frame_realizations) # subj{np(str)} + obj{ncp(str,że)} ==> {possp} + {prepncp(dla,gen,że)} convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, {'from': ur'^obj\{ncp\(str,że\)\}$', 'to': ur'{prepncp(dla,gen,że)}'}], frame_realizations) # subj{np(str)} + obj{ncp(str,int)} ==> {possp} + {prepncp(dla,gen,int)} convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, {'from': ur'^obj\{ncp\(str,int(.*)\)\}$', 'to': ur'{prepncp(dla,gen,int[.*])}'}], frame_realizations) # subj{np(str)} + obj{ncp(str,żeby)} ==> {possp} + {prepncp(dla,gen,żeby)} convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, {'from': ur'^obj\{ncp\(str,żeby\)\}$', 'to': ur'{prepncp(dla,gen,żeby)}'}], frame_realizations) # subj{np(str)} + obj{np(str)} ==> {possp} + {prepnp(o,loc)} convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, {'from': ur'^obj\{np\(str\)\}$', 'to': ur'{prepnp(o,loc)}'}], frame_realizations) # subj{np(str)} + obj{ncp(str,int)} ==> {possp} + {prepncp(o,loc,int)} convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, {'from': ur'^obj\{ncp\(str,int(.*)\)\}$', 'to': ur'{prepncp(o,loc,int[.*])}'}], frame_realizations) # subj{np(str)} + obj{ncp(str,że)} ==> {possp} + {prepncp(o,loc,że)} convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, {'from': ur'^obj\{ncp\(str,że\)\}$', 'to': ur'{prepncp(o,loc,że)}'}], frame_realizations) # subj{np(str)} + obj{ncp(str,żeby)} ==> {possp} + {prepncp(o,loc,żeby)} convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, {'from': ur'^obj\{ncp\(str,żeby\)\}$', 'to': ur'{prepncp(o,loc,żeby)}'}], frame_realizations) # subj{np(str)} + obj{np(str)} ==> {possp} + {prepnp(na,acc)} convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, {'from': ur'^obj\{np\(str\)\}$', 'to': ur'{prepnp(na,acc)}'}], frame_realizations) # subj{np(str)} + obj{ncp(str,int)} ==> {possp} + {prepncp(na,acc,int)} convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, {'from': ur'^obj\{ncp\(str,int(.*)\)\}$', 'to': ur'{prepncp(na,acc,int[.*])}'}], frame_realizations) # subj{np(str)} + obj{ncp(str,że)} ==> {possp} + {prepncp(na,acc,że)} convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, {'from': ur'^obj\{ncp\(str,że\)\}$', 'to': ur'{prepncp(na,acc,że)}'}], frame_realizations) # subj{np(str)} + obj{ncp(str,żeby)} ==> {possp} + {prepncp(na,acc,żeby)} convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, {'from': ur'^obj\{ncp\(str,żeby\)\}$', 'to': ur'{prepncp(na,acc,żeby)}'}], frame_realizations) # subj{np(str)} + obj{np(str)} ==> {possp} + {prepnp(na,loc)} convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, {'from': ur'^obj\{np\(str\)\}$', 'to': ur'{prepnp(na,loc)}'}], frame_realizations) # subj{np(str)} + obj{np(str)} ==> {possp} + {prepnp(nad,inst)} convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, {'from': ur'^obj\{np\(str\)\}$', 'to': ur'{prepnp(nad,inst)}'}], frame_realizations) # subj{np(str)} + obj{ncp(str,int)} ==> {possp} + {prepncp(nad,inst,int)} convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, {'from': ur'^obj\{ncp\(str,int(.*)\)\}$', 'to': ur'{prepncp(nad,inst,int[.*])}'}], frame_realizations) # subj{np(str)} + obj{ncp(str,że)} ==> {possp} + {prepncp(nad,inst,że)} convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, {'from': ur'^obj\{ncp\(str,że\)\}$', 'to': ur'{prepncp(nad,inst,że)}'}], frame_realizations) # subj{np(str)} + obj{ncp(str,żeby)} ==> {possp} + {prepncp(nad,inst,żeby)} convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, {'from': ur'^obj\{ncp\(str,żeby\)\}$', 'to': ur'{prepncp(nad,inst,żeby)}'}], frame_realizations) # subj{np(str)} + {np(dat)} ==> {possp} + {prepnp(dla,gen)} convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, {'from': ur'^\{np\(dat\)\}$', 'to': ur'{prepnp(dla,gen)}'}], frame_realizations) # subj{np(str)} + {ncp(dat,int))} ==> {possp} + {prepncp(dla,gen,int)} convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, {'from': ur'^\{ncp\(dat,int(.*)\)\}$', 'to': ur'{prepncp(dla,gen,int[.*])}'}], frame_realizations) # subj{np(str)} + {np(gen)} ==> {possp} + {prepnp(na,acc)} convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, {'from': ur'^\{np\(gen\)\}$', 'to': ur'{prepnp(na,acc)}'}], frame_realizations) # subj{np(str)} + {ncp(gen,int)} ==> {possp} + {prepncp(na,acc,int)} convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, {'from': ur'^\{ncp\(gen,int(.*)\)\}$', 'to': ur'{prepncp(na,acc,int[.*])}'}], frame_realizations) # subj{np(str)} + {ncp(gen,że)} ==> {possp} + {prepncp(na,acc,że)} convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, {'from': ur'^\{ncp\(gen,że\)\}$', 'to': ur'{prepncp(na,acc,że)}'}], frame_realizations) # subj{np(str)} + {ncp(gen,żeby)} ==> {possp} + {prepnp(na,acc,żeby)} convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, {'from': ur'^\{ncp\(gen,żeby\)\}$', 'to': ur'{prepnp(na,acc,żeby)}'}], frame_realizations) # subj{np(str)} + obj{np(inst)} ==> {possp} + {np(inst)} convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, {'from': ur'^obj\{np\(inst\)\}$', 'to': ur'{np(inst)}'}], frame_realizations) # subj{np(str)} + obj{np(inst)} ==> {possp} + {prepnp(nad,inst)} convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, {'from': ur'^obj\{np\(inst\)\}$', 'to': ur'{prepnp(nad,inst)}'}], frame_realizations) # subj{np(str)} + obj{ncp(inst,int)} ==> {possp} + {prepncp(nad,inst,int)} convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, {'from': ur'^obj\{ncp\(inst,int(.*)\)\}$', 'to': ur'{prepncp(nad,inst,int[.*])}'}], frame_realizations) # subj{np(str)} + {prepnp(z,gen)} (się) ==> {possp} + {prepnp(z,gen)} convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{possp}'}, {'from': ur'^\{prepnp\(z,gen\)\}$', 'to': ur'{prepnp(z,gen)}'}], frame_realizations) # subj{np(str)} + {prepnp(z,gen)} (się) ==> new_position{prepnp(między,inst)} convert([{'from': ur'^subj\{np\(str\)\}$', 'to': None}, {'from': ur'^\{prepnp\(z,gen\)\}$', 'to': None}], frame_realizations, position_to_add=[ur'{prepnp(między,inst)}']) return frame_realizations def verb_to_adj_conversion(frame): frame_realizations = [frame_to_rule_format(frame)] # subj{np(str)} + obj{np(str)} ==> {prepnp(dla,gen)} -- z subj convert([{'from': ur'^subj\{np\(str\)\}$', 'to': ur'{prepnp(dla,gen)}'}, {'from': ur'^obj\{np\(str\)\}$', 'to': None}], frame_realizations) return frame_realizations def noun_to_adj_conversion(frame): frame_realizations = [frame_to_rule_format(frame)] return frame_realizations def adj_to_noun_conversion(frame): frame_realizations = [frame_to_rule_format(frame)] return frame_realizations def frame_to_rule_format(frame): positions = [position_to_rule_format(position) for position in frame.positions.all()] return positions def position_to_rule_format(position): try: category = position.categories.get(control=False).category except PositionCategory.DoesNotExist: category = '' arguments = ['%s{%s}' % (category, argument.text_rep) for argument in position.arguments.all()] return arguments def convert(conversion_pairs, frame_realizations, position_to_add=None): new_realizations = [] for frame_realization in frame_realizations: new_realizations.extend(create_new_realizations(conversion_pairs, frame_realization, position_to_add)) frame_realizations.extend(new_realizations) def create_new_realizations(conversion_pairs, frame_realization, position_to_add): new_realizations = [] positions_with_first_arg = get_argument_occurrences(conversion_pairs[0]['from'], frame_realization) positions_with_sec_arg = get_argument_occurrences(conversion_pairs[1]['from'], frame_realization) if positions_with_first_arg and positions_with_sec_arg: for first_arg_position in positions_with_first_arg: for sec_arg_position in positions_with_sec_arg: new_frame_realization = copy.deepcopy(frame_realization) add_new_position_if_needed(new_frame_realization, position_to_add) first_position_to_change = get_position(new_frame_realization, first_arg_position) second_position_to_change = get_position(new_frame_realization, sec_arg_position) replace_argument(first_position_to_change, conversion_pairs[0]) replace_argument(second_position_to_change, conversion_pairs[1]) new_realizations.append(new_frame_realization) return new_realizations def get_argument_occurrences(argument, frame_realization): occurrences = [] pattern = re.compile(argument) for position in frame_realization: for arg in position: if arg and pattern.match(arg): occurrences.append(position) break return occurrences def add_new_position_if_needed(frame_realization, new_position): if new_position != None: frame_realization.append(new_position) def get_position(frame_realization, position): return next((pos for pos in frame_realization if position == pos), None) def replace_argument(position, conversion_pair): from_pattern = re.compile(conversion_pair['from']) for i in range(len(position)): match = from_pattern.match(position[i]) if match: if len(match.groups()) == 2 and match.group(1): new_arg = conversion_pair['to'].replace(u'[.*]', match.group(1)) position[i] = new_arg else: new_arg = conversion_pair['to'] if new_arg: new_arg = new_arg.replace(u'[.*]', '') position[i] = new_arg break def create_frame_versions(frame_realizations, arg_conversion_function): frame_versions = [] for frame_realization in frame_realizations: frame_obj = frame_realization_to_obj(frame_realization, arg_conversion_function) if frame_obj != None: frame_versions.append(frame_obj) return frame_versions def frame_realization_to_obj(frame_realization, arg_conversion_function): frame_obj = None positions = [] for pos_realization in frame_realization: pos_obj = position_realization_to_obj(pos_realization, arg_conversion_function) if pos_obj != None: positions.append(pos_obj) positions = remove_duplicates(positions) if len(positions) > 0: frame_obj = positions_to_frame(positions, '', '', '', '') return frame_obj def position_realization_to_obj(pos_realization, arg_conversion_function): arguments = [] position = None pos_realization = remove_empty_args(pos_realization) for arg in pos_realization: arg_obj = argument_realization_to_obj(arg, arg_conversion_function) if arg_obj != None: arguments.append(arg_obj) arguments = remove_duplicates(arguments) if len(arguments) > 0: position = get_or_create_position(categories=[], arguments=arguments) return position def remove_empty_args(position_realization): return filter(None, position_realization) def argument_realization_to_obj(arg_realization, arg_conversion_function): position_category, arg_text_rep = get_arg_realization_parts(arg_realization) arg_obj = parse_argument_text_rep(arg_text_rep) arg_obj = arg_conversion_function(position_category, arg_obj) return arg_obj def get_arg_realization_parts(arg_realization): arg_parts = arg_realization.split('{', 1) position_category = arg_parts[0] arg_text_rep = arg_parts[1].rstrip('}') return position_category, arg_text_rep def remove_duplicates(objects): return list(set(objects))