clusterer.py 1.92 KB
#! /usr/bin/python
# -*- coding: utf-8 -*-

from local_db import get_db_data, FreeFrames
from frame import LexicalUnit, SelectionalPreference, Frame
from group_frames import join_frames
from generate_xml import create_xml
from import_cluster import find_cluster
from sqlalchemy import delete

from semantics.models import SemanticFrame

import numpy as np
import time
    
def generate_xml(cluster_heart, xml_file):
    # prepare database
    session, TT_dict = get_db_data()
    LexicalUnit._session = session
    SelectionalPreference._session = session
    
    keys, transformation = find_cluster(cluster_heart, session, TT_dict)
    
    frames = []
    taken = []
    for i in list(keys):
        frame_id = int(i[0,0])
        frame = SemanticFrame.objects.get(id=frame_id)
        transformation_id =  i[0,1]
        frames.append((frame, transformation_id))
        taken.append(frame_id)
    
    unified_frame, matching = join_frames(frames, transformation)
    
    create_xml(xml_file, unified_frame, matching)
    change_frames_statuses([], taken)

def generate_empty_xml(xml_file):
    create_xml(xml_file, Frame(), {})

def generate_singleton_xml(cluster_heart, xml_file):
    # prepare database
    session, TT_dict = get_db_data()
    LexicalUnit._session = session
    SelectionalPreference._session = session
    
    frame = SemanticFrame.objects.get(id=cluster_heart)
    unified_frame, matching = join_frames([(frame, '')], 'zero')
    
    create_xml(xml_file, unified_frame, matching)
    change_frames_statuses([], [cluster_heart])
    
def change_frames_statuses(freed, taken):
    if len(set(freed) & set(taken)) > 0:
        return 'conflicting data'
    else:
        session, _ = get_db_data()
        stmt = delete(FreeFrames).where(FreeFrames.frame_id.in_(taken))
        session.execute(stmt)
        new = [FreeFrames(frame_id=fid) for fid in freed]
        session.bulk_save_objects(new)
        session.commit()
        return 'ok'