reduce_cluster.py 1.53 KB
#! /usr/bin/python3

import numpy as np

TRESHOLD_FACTOR = 0.9
STEP = 0.01

def perform_reduce(matrix, keys, elements):
    order = sorted(elements, reverse = True)
    new_matrix = matrix
    new_keys = keys
    for i in elements:
        new_matrix = np.delete(new_matrix, i, axis = 0)
        new_matrix = np.delete(new_matrix, i, axis = 1)
        print new_keys
        new_keys = np.delete(new_keys, i, axis = 0)
        print new_keys
    return (new_matrix, new_keys)

def calculate_average_similarities(matrix):
    sim = []
    count = matrix.shape[0]
    for i in range(count):
        tmp = np.sum(matrix[i,:i])
        tmp += np.sum(matrix[i, (i+1):])
        sim.append((tmp * 1.0) / (count - 1))
    return sim

def reduce_cluster_iteration(matrix, keys, normalised_treshold):
    avg_sims = calculate_average_similarities(matrix)
    avg_sim = np.average(avg_sims)
    min_avg_sim = np.min(avg_sims)
    if min_avg_sim < (TRESHOLD_FACTOR * normalised_treshold) or avg_sim < normalised_treshold:
        l = []
        for i, avg in enumerate(avg_sims):
            if avg < min_avg_sim + STEP:
                l.append(i)
        print l
        new_matrix, new_keys = perform_reduce(matrix, keys, l)
        return (True, new_matrix, new_keys)
    else:
        return (False, matrix, keys)

def reduce_cluster(matrix, keys, normalised_treshold):
    repeat = True
    new_matrix = matrix
    new_keys = keys
    while repeat:
        repeat, new_matrix, new_keys = reduce_cluster_iteration(new_matrix, new_keys, normalised_treshold)
    return new_keys