group_frames.py
4.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#! /usr/bin/python
from transform_frame import TransformationRules
from frame import Frame, SelectionalPreference
from collections import defaultdict
from hungarian import hungarian_algorithm
from copy import copy
import numpy as np
def join_frames(frames, transformation_rule):
rules = TransformationRules.get_rules()
rule = None
for r in rules:
print r.sign(), transformation_rule
if r.sign() == transformation_rule:
rule = r
break
transformed_frames = []
for slowal_frame, signature in frames:
frame = Frame.from_slowal(slowal_frame)
transformed_list = rule.apply(frame)
for s, f in transformed_list:
if s == signature:
transformed_frames.append(f)
unified_frame = create_unified_frame(transformed_frames)
matching = match_frames(unified_frame, transformed_frames)
return unified_frame, matching
def create_unified_frame(frames):
unified_frame = Frame()
set_arguments(unified_frame, frames)
return unified_frame
def set_arguments(unified_frame, frames):
index = -1
current_best = defaultdict(lambda: (0, []))
max_best = defaultdict(lambda: 0)
for frame in frames:
for role in frame.get_role_labels():
arguments = frame.get_arguments(role)
p, a = count_preferences(arguments)
if p > current_best[role][0]:
current_best[role] = (p, arguments)
max_best[role] = max(max_best[role], a)
for role in max_best.keys():
p, arguments = current_best[role]
for argument in arguments:
unified_frame.add_argument(role, argument)
m = max_best[role]
for i in range(m-p):
unified_frame.add_argument(role, SelectionalPreference(index))
index -= 1
def count_preferences(arguments):
count = 0
for argument in arguments:
if argument.not_all():
count += 1
return (len(arguments), count)
def match_frames(unified_frame, frames):
matches = {}
for frame in frames:
match = match_frame(unified_frame, frame)
matches[frame._id] = match
return matches
def match_frame(big_frame, small_frame):
matching = {}
for role in big_frame.get_role_labels():
match_arguments(big_frame.get_arguments(role), small_frame.get_arguments(role), matching)
return matching
def match_arguments(longer_arguments_list, shorter_arguments_list, matching):
# to have access to original arguments from copies we will work on
args_dict = {}
get_arguments_dict(longer_arguments_list, args_dict)
get_arguments_dict(shorter_arguments_list, args_dict)
# first we match arguments with all preference, randomly
selprefs1 = copy(longer_arguments_list)
alls1 = [sp._content[0] for sp in selprefs1]
a1 = sum(alls1)
selprefs2 = copy(shorter_arguments_list)
alls2 = [sp._content[0] for sp in selprefs2]
a2 = sum(alls2)
if a1 > 0 and a2 > 0:
# we have to match alls in both lists and pass the rest to max match function
indexes1 = [y for _, y in list(filter(lambda x: x[0], zip(alls1, range(len(alls1)))))]
indexes1.reverse() # ordered from largest to smallest
indexes2 = [y for _, y in list(filter(lambda x: x[0], zip(alls2, range(len(alls2)))))]
indexes2.reverse() # ordered from largest to smallest
m = max(a1, a2)
for i, j in zip(indexes1, indexes2):
# i and j are matched
matching[selprefs2[j]._id] = selprefs1[i]._id
del selprefs1[i]
del selprefs2[j]
if len(selprefs1) == 0 and len(selprefs2) == 0:
return
else:
return match_remaining_arguments(selprefs1, selprefs2, matching)
def get_arguments_dict(args, args_dict):
for arg in args:
args_dict[arg._id] = arg
def gen_array(l1, l2, s1, s2):
fake = 10.0**6
matrix = []
for preference1 in l1:
row = []
for preference2 in l2:
p = SelectionalPreference.similarity(preference1, preference2, {})
if p == 0:
v = fake
else:
v = -np.log(p)
row.append(v)
matrix.append(row)
for i in range(s1, s2):
row = []
for preference in l2:
row.append(fake)
matrix.append(row)
return np.array(matrix)
def match_remaining_arguments(arglist1, arglist2, matching):
s1 = len(arglist1)
s2 = len(arglist2)
cut = s1
array = gen_array(arglist2, arglist1, s2, s1)
cut = s2
ans_pos = hungarian_algorithm(array.copy())
for i, j in ans_pos:
# i and j are matched
if i < cut:
matching[arglist2[i]._id] = arglist1[j]._id
return