group_frames.py 4.71 KB

Edit Raw Blame History

#! /usr/bin/python

from transform_frame import TransformationRules
from frame import Frame, SelectionalPreference
from collections import defaultdict
from hungarian import hungarian_algorithm
from copy import copy

import numpy as np

def join_frames(frames, transformation_rule):
    rules = TransformationRules.get_rules()

    rule = None
    for r in rules:
        print r.sign(), transformation_rule
        if r.sign() == transformation_rule:
            rule = r
            break

    transformed_frames = []
    for slowal_frame, signature in frames:
        frame = Frame.from_slowal(slowal_frame)
        transformed_list = rule.apply(frame)
        for s, f in transformed_list:
            if s == signature:
                transformed_frames.append(f)

    unified_frame = create_unified_frame(transformed_frames)
    matching = match_frames(unified_frame, transformed_frames)
    return unified_frame, matching


def create_unified_frame(frames):

    unified_frame = Frame()
    set_arguments(unified_frame, frames)
    return unified_frame

def set_arguments(unified_frame, frames):
    index = -1
    current_best = defaultdict(lambda: (0, []))
    max_best = defaultdict(lambda: 0)
    for frame in frames:
        for role in frame.get_role_labels():
            arguments = frame.get_arguments(role)
            p, a = count_preferences(arguments)
            if p > current_best[role][0]:
                current_best[role] = (p, arguments)
            max_best[role] = max(max_best[role], a)
    for role in max_best.keys():
        p, arguments = current_best[role]
        for argument in arguments:
            unified_frame.add_argument(role, argument)
        m = max_best[role]
        for i in range(m-p):
            unified_frame.add_argument(role, SelectionalPreference(index))
            index -= 1

def count_preferences(arguments):
    count = 0
    for argument in arguments:
        if argument.not_all():
            count += 1
    return (len(arguments), count)

def match_frames(unified_frame, frames):
    matches = {}
    for frame in frames:
        match = match_frame(unified_frame, frame)
        matches[frame._id] = match
    return matches

def match_frame(big_frame, small_frame):
    matching = {}
    for role in big_frame.get_role_labels():
        match_arguments(big_frame.get_arguments(role), small_frame.get_arguments(role), matching)
    return matching

def match_arguments(longer_arguments_list, shorter_arguments_list, matching):
    # to have access to original arguments from copies we will work on
    args_dict = {}
    get_arguments_dict(longer_arguments_list, args_dict)
    get_arguments_dict(shorter_arguments_list, args_dict)
    # first we match arguments with all preference, randomly
    selprefs1 = copy(longer_arguments_list)
    alls1 = [sp._content[0] for sp in selprefs1]
    a1 = sum(alls1)
    selprefs2 = copy(shorter_arguments_list)
    alls2 = [sp._content[0] for sp in selprefs2]
    a2 = sum(alls2)
    if a1 > 0 and a2 > 0:
        # we have to match alls in both lists and pass the rest to max match function
        indexes1 = [y for _, y in list(filter(lambda x: x[0], zip(alls1, range(len(alls1)))))]
        indexes1.reverse() # ordered from largest to smallest
        indexes2 = [y for _, y in list(filter(lambda x: x[0], zip(alls2, range(len(alls2)))))]
        indexes2.reverse() # ordered from largest to smallest
        m = max(a1, a2)
        for i, j in zip(indexes1, indexes2):
            # i and j are matched
            matching[selprefs2[j]._id] = selprefs1[i]._id
            del selprefs1[i]
            del selprefs2[j]
    if len(selprefs1) == 0 and len(selprefs2) == 0:
        return
    else:
        return match_remaining_arguments(selprefs1, selprefs2, matching)

def get_arguments_dict(args, args_dict):
    for arg in args:
        args_dict[arg._id] = arg

def gen_array(l1, l2, s1, s2):
    fake = 10.0**6
    matrix = []
    for preference1 in l1:
        row = []
        for preference2 in l2:
            p = SelectionalPreference.similarity(preference1, preference2, {})
            if p == 0:
                v = fake
            else:
                v = -np.log(p)
            row.append(v)
        matrix.append(row)
    for i in range(s1, s2):
        row = []
        for preference in l2:
            row.append(fake)
        matrix.append(row)
    return np.array(matrix)

def match_remaining_arguments(arglist1, arglist2, matching):
    s1 = len(arglist1)
    s2 = len(arglist2)
    cut = s1
    array = gen_array(arglist2, arglist1, s2, s1)
    cut = s2
    ans_pos = hungarian_algorithm(array.copy())
    for i, j in ans_pos:
        # i and j are matched
        if i < cut:
            matching[arglist2[i]._id] = arglist1[j]._id
    return