utils.py 2.49 KB
from __future__ import print_function

import codecs
import sys

import javaobj

from keras.models import Model
from keras.layers import Input, Dense, Dropout, Activation, BatchNormalization


def eprint(*args, **kwargs):
    print(*args, file=sys.stderr, **kwargs)


def initialize_neural_model(number_of_features, path_to_model):
    inputs = Input(shape=(number_of_features,))

    output_from_1st_layer = Dense(1000, activation='relu')(inputs)
    output_from_1st_layer = Dropout(0.2)(output_from_1st_layer)
    output_from_1st_layer = BatchNormalization()(output_from_1st_layer)

    output_from_2nd_layer = Dense(500, activation='relu')(output_from_1st_layer)
    output_from_2nd_layer = Dropout(0.2)(output_from_2nd_layer)
    output_from_2nd_layer = BatchNormalization()(output_from_2nd_layer)

    output_from_3rd_layer = Dense(300, activation='relu')(output_from_2nd_layer)
    output_from_3rd_layer = Dropout(0.2)(output_from_3rd_layer)
    output_from_3rd_layer = BatchNormalization()(output_from_3rd_layer)

    output = Dense(1, activation='sigmoid')(output_from_3rd_layer)

    model = Model(inputs, output)
    model.compile(optimizer='Adam', loss='binary_crossentropy', metrics=['accuracy'])
    model.load_weights(path_to_model)

    return model


def load_freq_list(freq_path):
    freq_list = {}
    with codecs.open(freq_path, 'r', 'utf-8') as freq_file:
        lines = freq_file.readlines()
        for line in lines:
            line_parts = line.split()
            freq = int(line_parts[0])
            base = line_parts[1]
            if base not in freq_list:
                freq_list[base] = freq
    return freq_list


def load_one2many_map(map_path):
    this_map = {}
    marshaller = javaobj.JavaObjectUnmarshaller(open(map_path, 'rb'))
    pobj = marshaller.readObject()
    jmap_annotations = pobj.__dict__['annotations']
    jmap_annotations_count = len(jmap_annotations)
    for i in range(jmap_annotations_count):
    	if i%2 == 1:
    		mapped_elements = set(jmap_annotations[i+1].__dict__['annotations'])
    		this_map[jmap_annotations[i]] = mapped_elements
    return this_map


def load_one2one_map(map_path):
    this_map = {}
    marshaller = javaobj.JavaObjectUnmarshaller(open(map_path, 'rb'))
    pobj = marshaller.readObject()
    jmap_annotations = pobj.__dict__['annotations']
    jmap_annotations_count = len(jmap_annotations)
    for i in range(jmap_annotations_count):
    	if i%2 == 1:
    		element = jmap_annotations[i+1]
    		this_map[jmap_annotations[i]] = element
    return this_map