Commit 5ee737bf2b2951378f2adeeffd7f1abc7b7670b5

Authored by Norbert Ryciak
1 parent 6060efac

Porownanie modelu MLP na danych polskich i angielskich z tree-lstmem. Pierwsza w…

…ersja i eksperymenty sieci z parametryzajcja krawedziami

Too many changes to show.

To preserve performance only 19 of 24 files are displayed.

main_for_experiments_on_polish_data_LSTM.py 0 → 100644
  1 +import numpy as np
  2 +import time
  3 +import sys
  4 +import subprocess
  5 +import os
  6 +import random
  7 +
  8 +#from modules.data import load
  9 +from modules.rnn.LSTM_models import *
  10 +#from modules.metrics.accuracy import conlleval
  11 +from modules.utils.tools import load_stanford_data4, shuffle
  12 +
  13 +from theano import pp
  14 +
  15 +import theano.tensor as T
  16 +import theano
  17 +from theano.sandbox.rng_mrg import MRG_RandomStreams #as MRG_RandomStreams
  18 +
  19 +import itertools
  20 +
  21 +import os.path
  22 +import pickle
  23 +
  24 +from collections import Counter
  25 +
  26 +
  27 +
  28 +from theano import tensor as T, printing
  29 +from collections import OrderedDict
  30 +from theano.ifelse import ifelse
  31 +
  32 +from keras.preprocessing import sequence as seq
  33 +
  34 +dataType = 'int64'
  35 +
  36 +
  37 +if __name__ == '__main__':
  38 +
  39 +
  40 +
  41 + w2v_DIM = "300"
  42 +
  43 +
  44 +
  45 + file_with_filtered_embeddings = "embeddings/embedding_and_words2ids_dim"+w2v_DIM+"_polish.pkl"
  46 + if not os.path.exists(file_with_filtered_embeddings):
  47 + print("Cannot find file with only needed embeddings. We use 'filter_embeddings' in order to create it.")
  48 + filter_embeddings(["data/dane_polskie/train/train_labels.txt", "data/dane_polskie/train/train_parents.txt","data/dane_polskie/train/train_sentence.txt",
  49 + "data/dane_polskie/dev/dev_labels.txt", "data/dane_polskie/dev/dev_parents.txt","data/dane_polskie/dev/dev_sentence.txt",
  50 + "data/dane_polskie/test/test_labels.txt", "data/dane_polskie/test/test_parents.txt","data/dane_polskie/test/test_sentence.txt"],
  51 +
  52 + "/home/norbert/Doktorat/clarin2sent/deeptagger/embeddings/w2v_allwiki_nkjpfull_"+w2v_DIM+".txt",
  53 + file_with_filtered_embeddings)
  54 +
  55 +
  56 + s = {'lr':0.002,
  57 + 'nepochs':40,
  58 + 'seed':345,
  59 + 'nc':3 # number of y classes
  60 + }
  61 + batch_size = 1
  62 +
  63 +
  64 + for h_dim in [100, 150]:
  65 +
  66 + np.random.seed(s['seed'])
  67 + random.seed(s['seed'])
  68 +
  69 +
  70 + rnn = LSTM_1( h_dim,
  71 + nc = s['nc'],
  72 + w2v_model_path = file_with_filtered_embeddings, #sciezka do pliku z embeddingami
  73 + max_phrase_length = 60 )
  74 +
  75 +
  76 + train_data = load_stanford_data4("data/dane_polskie/train/train_labels.txt", "data/dane_polskie/train/train_parents.txt","data/dane_polskie/train/train_sentence.txt",rnn.words2ids,True,batch_size,s['nc'])
  77 + train_data_check = train_data
  78 + dev_data = load_stanford_data4("data/dane_polskie/dev/dev_labels.txt", "data/dane_polskie/dev/dev_parents.txt","data/dane_polskie/dev/dev_sentence.txt",rnn.words2ids,False,0,s['nc'])
  79 + test_data = load_stanford_data4("data/dane_polskie/test/test_labels.txt", "data/dane_polskie/test/test_parents.txt","data/dane_polskie/test/test_sentence.txt",rnn.words2ids,False,0,s['nc'])
  80 +
  81 + n_train = len(train_data)
  82 + n_dev = len(dev_data)
  83 + n_test = len(test_data)
  84 +
  85 + print ""
  86 + #print "model 56 : h_dim = ", h_dim, "h2_dim = ", h2_dim, "h3_dim = ", h3_dim, " learning rate = ", s['lr']#, "dropout rate: ", dropout_rate
  87 + print "model LSTM_` : " , "h_dim = ", h_dim
  88 + print ""
  89 +
  90 + best_prediction_valid_all = 0
  91 + best_prediction_test_all = 0
  92 + best_prediction_test_root = 0
  93 + early_stop = 0
  94 +
  95 +
  96 + tic = time.time()
  97 +
  98 + for e in xrange(s['nepochs']):
  99 +
  100 + #if e >= 1:
  101 + # s['lr'] = 0.8 * s['lr']
  102 +
  103 + if early_stop == 10:
  104 + break
  105 +
  106 +
  107 + # shuffle
  108 + shuffle([train_data], s['seed'])
  109 +
  110 + for i in range(n_train):
  111 + rnn.train(train_data[i][0],train_data[i][1], train_data[i][2], train_data[i][3], s['lr'])
  112 +
  113 +
  114 +
  115 + # Dev:
  116 + counts_dev = np.zeros((s['nc'],s['nc']),dtype='int')
  117 + counts_dev_root = np.zeros((s['nc'],s['nc']),dtype='int')
  118 + for ii in range(n_dev):
  119 + pred = rnn.classify(dev_data[ii][0],dev_data[ii][1], dev_data[ii][3])
  120 + for j in range(len(pred)):
  121 + counts_dev[pred[j], dev_data[ii][2][j]] += 1
  122 + counts_dev_root[pred[-1], dev_data[ii][2][-1]] += 1
  123 +
  124 +
  125 + # Test:
  126 + counts_test = np.zeros((s['nc'],s['nc']),dtype='int')
  127 + counts_test_root = np.zeros((s['nc'],s['nc']),dtype='int')
  128 + for i in range(n_test):
  129 + pred = rnn.classify(test_data[i][0],test_data[i][1], test_data[i][3])
  130 + for j in range(len(pred)):
  131 + counts_test[pred[j], test_data[i][2][j]] += 1
  132 + counts_test_root[pred[-1], test_data[i][2][-1]] += 1
  133 +
  134 + # Train
  135 + counts = np.zeros((s['nc'],s['nc']),dtype='int')
  136 + counts_root = np.zeros((s['nc'],s['nc']),dtype='int')
  137 + for i in range(len(train_data_check)):
  138 +
  139 + if i % 1 == 0: #sprawdzamy dopasowanie na 1/100 zbioru zeby oszczedzic czas
  140 + pred = rnn.classify(train_data_check[i][0],train_data_check[i][1], train_data_check[i][3])
  141 + for j in range(len(pred)):
  142 + counts[pred[j], train_data_check[i][2][j]] += 1
  143 + counts_root[pred[-1], train_data_check[i][2][-1]] += 1
  144 +
  145 + print("epoch: ", e,
  146 + "V all: ", "%0.2f" % (100 * np.diag(counts_dev).sum()/float(counts_dev.sum())),
  147 + " Test all: ", "%0.2f" % (100 * np.diag(counts_test).sum()/float(counts_test.sum())),
  148 + "V root: ", "%0.2f" % (100 * np.diag(counts_dev_root).sum()/float(counts_dev_root.sum())),
  149 + " Test root: ", "%0.2f" % (100 * np.diag(counts_test_root).sum()/float(counts_test_root.sum())),
  150 + " Train: ", "%0.2f" % (100 * np.diag(counts).sum()/float(counts.sum())),
  151 + " Train root: ", "%0.2f" % (100 * np.diag(counts_root).sum()/float(counts_root.sum()))
  152 + )
  153 +
  154 +
  155 + if np.diag(counts_dev).sum()/float(counts_dev.sum()) > best_prediction_valid_all:
  156 + best_prediction_valid_all = np.diag(counts_dev).sum()/float(counts_dev.sum())
  157 + best_prediction_test_all = np.diag(counts_test).sum()/float(counts_test.sum())
  158 + best_prediction_test_root = np.diag(counts_test_root).sum()/float(counts_test_root.sum())
  159 +
  160 + early_stop = 0
  161 + else:
  162 + early_stop = early_stop + 1
  163 +
  164 +
  165 + print("Best valid: ", "%0.2f" % (100 * best_prediction_valid_all)," Test all: ","%0.2f" % (100 * best_prediction_test_all),"Test root: ","%0.2f" % (100 * best_prediction_test_root), " time: ", time.time()-tic)
  166 +
... ...
main_for_experiments_on_polish_data_MLP2.py 0 → 100644
  1 +import numpy as np
  2 +import time
  3 +import sys
  4 +import subprocess
  5 +import os
  6 +import random
  7 +
  8 +#from modules.data import load
  9 +from modules.rnn.models_with_relations import *
  10 +#from modules.metrics.accuracy import conlleval
  11 +from modules.utils.tools import load_stanford_data6, shuffle
  12 +
  13 +from theano import pp
  14 +
  15 +import theano.tensor as T
  16 +import theano
  17 +from theano.sandbox.rng_mrg import MRG_RandomStreams #as MRG_RandomStreams
  18 +
  19 +import itertools
  20 +
  21 +import os.path
  22 +import pickle
  23 +
  24 +from collections import Counter
  25 +
  26 +
  27 +
  28 +from theano import tensor as T, printing
  29 +from collections import OrderedDict
  30 +from theano.ifelse import ifelse
  31 +
  32 +from keras.preprocessing import sequence as seq
  33 +
  34 +dataType = 'int64'
  35 +
  36 +
  37 +if __name__ == '__main__':
  38 +
  39 +
  40 +
  41 + w2v_DIM = "300"
  42 +
  43 +
  44 +
  45 + file_with_filtered_embeddings = "embeddings/embedding_and_words2ids_dim"+w2v_DIM+"_polish.pkl"
  46 + if not os.path.exists(file_with_filtered_embeddings):
  47 + print("Cannot find file with only needed embeddings. We use 'filter_embeddings' in order to create it.")
  48 + filter_embeddings(["data/dane_polskie/train/train_labels.txt", "data/dane_polskie/train/train_parents.txt","data/dane_polskie/train/train_sentence.txt",
  49 + "data/dane_polskie/dev/dev_labels.txt", "data/dane_polskie/dev/dev_parents.txt","data/dane_polskie/dev/dev_sentence.txt",
  50 + "data/dane_polskie/test/test_labels.txt", "data/dane_polskie/test/test_parents.txt","data/dane_polskie/test/test_sentence.txt"],
  51 +
  52 + "/home/norbert/Doktorat/clarin2sent/deeptagger/embeddings/w2v_allwiki_nkjpfull_"+w2v_DIM+".txt",
  53 + file_with_filtered_embeddings)
  54 +
  55 +
  56 + s = {'lr':0.002,
  57 + 'nepochs':40,
  58 + 'seed':345,
  59 + 'nc':3 # number of y classes
  60 + }
  61 + batch_size = 1
  62 +
  63 +
  64 + for h_dim in [50]:
  65 +
  66 + np.random.seed(s['seed'])
  67 + random.seed(s['seed'])
  68 +
  69 +
  70 + rnn = MLP_2_1( h_dim, h_dim,
  71 + nc = s['nc'],
  72 + w2v_model_path = file_with_filtered_embeddings, #sciezka do pliku z embeddingami
  73 + max_phrase_length = 60 )
  74 +
  75 +
  76 + train_data = load_stanford_data4("data/dane_polskie/train/train_labels.txt",
  77 + "data/dane_polskie/train/train_parents.txt",
  78 + "data/dane_polskie/train/train_sentence.txt",
  79 +
  80 +#############################################################################################
  81 +
  82 + NIE MA RELACJI DLA POLSKICH DANYCH
  83 +
  84 +#############################################################################################
  85 +
  86 + rnn.words2ids,True,batch_size,s['nc'])
  87 + train_data_check = train_data
  88 + dev_data = load_stanford_data4("data/dane_polskie/dev/dev_labels.txt", "data/dane_polskie/dev/dev_parents.txt","data/dane_polskie/dev/dev_sentence.txt",rnn.words2ids,False,0,s['nc'])
  89 + test_data = load_stanford_data4("data/dane_polskie/test/test_labels.txt", "data/dane_polskie/test/test_parents.txt","data/dane_polskie/test/test_sentence.txt",rnn.words2ids,False,0,s['nc'])
  90 +
  91 + n_train = len(train_data)
  92 + n_dev = len(dev_data)
  93 + n_test = len(test_data)
  94 +
  95 + print ""
  96 + #print "model 56 : h_dim = ", h_dim, "h2_dim = ", h2_dim, "h3_dim = ", h3_dim, " learning rate = ", s['lr']#, "dropout rate: ", dropout_rate
  97 + print "model LSTM_` : " , "h_dim = ", h_dim
  98 + print ""
  99 +
  100 + best_prediction_valid_all = 0
  101 + best_prediction_test_all = 0
  102 + best_prediction_test_root = 0
  103 + early_stop = 0
  104 +
  105 +
  106 + tic = time.time()
  107 +
  108 + for e in xrange(s['nepochs']):
  109 +
  110 + #if e >= 1:
  111 + # s['lr'] = 0.8 * s['lr']
  112 +
  113 + if early_stop == 10:
  114 + break
  115 +
  116 +
  117 + # shuffle
  118 + shuffle([train_data], s['seed'])
  119 +
  120 + for i in range(n_train):
  121 + rnn.train(train_data[i][0],train_data[i][1], train_data[i][2], train_data[i][3], s['lr'])
  122 +
  123 +
  124 +
  125 + # Dev:
  126 + counts_dev = np.zeros((s['nc'],s['nc']),dtype='int')
  127 + counts_dev_root = np.zeros((s['nc'],s['nc']),dtype='int')
  128 + for ii in range(n_dev):
  129 + pred = rnn.classify(dev_data[ii][0],dev_data[ii][1], dev_data[ii][3])
  130 + for j in range(len(pred)):
  131 + counts_dev[pred[j], dev_data[ii][2][j]] += 1
  132 + counts_dev_root[pred[-1], dev_data[ii][2][-1]] += 1
  133 +
  134 +
  135 + # Test:
  136 + counts_test = np.zeros((s['nc'],s['nc']),dtype='int')
  137 + counts_test_root = np.zeros((s['nc'],s['nc']),dtype='int')
  138 + for i in range(n_test):
  139 + pred = rnn.classify(test_data[i][0],test_data[i][1], test_data[i][3])
  140 + for j in range(len(pred)):
  141 + counts_test[pred[j], test_data[i][2][j]] += 1
  142 + counts_test_root[pred[-1], test_data[i][2][-1]] += 1
  143 +
  144 + # Train
  145 + counts = np.zeros((s['nc'],s['nc']),dtype='int')
  146 + counts_root = np.zeros((s['nc'],s['nc']),dtype='int')
  147 + for i in range(len(train_data_check)):
  148 +
  149 + if i % 1 == 0: #sprawdzamy dopasowanie na 1/100 zbioru zeby oszczedzic czas
  150 + pred = rnn.classify(train_data_check[i][0],train_data_check[i][1], train_data_check[i][3])
  151 + for j in range(len(pred)):
  152 + counts[pred[j], train_data_check[i][2][j]] += 1
  153 + counts_root[pred[-1], train_data_check[i][2][-1]] += 1
  154 +
  155 + print("epoch: ", e,
  156 + "V all: ", "%0.2f" % (100 * np.diag(counts_dev).sum()/float(counts_dev.sum())),
  157 + " Test all: ", "%0.2f" % (100 * np.diag(counts_test).sum()/float(counts_test.sum())),
  158 + "V root: ", "%0.2f" % (100 * np.diag(counts_dev_root).sum()/float(counts_dev_root.sum())),
  159 + " Test root: ", "%0.2f" % (100 * np.diag(counts_test_root).sum()/float(counts_test_root.sum())),
  160 + " Train: ", "%0.2f" % (100 * np.diag(counts).sum()/float(counts.sum())),
  161 + " Train root: ", "%0.2f" % (100 * np.diag(counts_root).sum()/float(counts_root.sum()))
  162 + )
  163 +
  164 +
  165 + if np.diag(counts_dev).sum()/float(counts_dev.sum()) > best_prediction_valid_all:
  166 + best_prediction_valid_all = np.diag(counts_dev).sum()/float(counts_dev.sum())
  167 + best_prediction_test_all = np.diag(counts_test).sum()/float(counts_test.sum())
  168 + best_prediction_test_root = np.diag(counts_test_root).sum()/float(counts_test_root.sum())
  169 +
  170 + early_stop = 0
  171 + else:
  172 + early_stop = early_stop + 1
  173 +
  174 +
  175 + print("Best valid: ", "%0.2f" % (100 * best_prediction_valid_all)," Test all: ","%0.2f" % (100 * best_prediction_test_all),"Test root: ","%0.2f" % (100 * best_prediction_test_root), " time: ", time.time()-tic)
  176 +
... ...
main_for_experiments_on_sst_MLP2.py 0 → 100644
  1 +import numpy as np
  2 +import time
  3 +import sys
  4 +import subprocess
  5 +import os
  6 +import random
  7 +
  8 +#from modules.data import load
  9 +from modules.rnn.models_with_relations import *
  10 +#from modules.metrics.accuracy import conlleval
  11 +from modules.utils.tools import load_stanford_data6, shuffle
  12 +
  13 +from theano import pp
  14 +
  15 +import theano.tensor as T
  16 +import theano
  17 +from theano.sandbox.rng_mrg import MRG_RandomStreams #as MRG_RandomStreams
  18 +
  19 +import itertools
  20 +
  21 +import os.path
  22 +import pickle
  23 +
  24 +from collections import Counter
  25 +
  26 +
  27 +
  28 +from theano import tensor as T, printing
  29 +from collections import OrderedDict
  30 +from theano.ifelse import ifelse
  31 +
  32 +from keras.preprocessing import sequence as seq
  33 +
  34 +dataType = 'int64'
  35 +
  36 +
  37 +
  38 +if __name__ == '__main__':
  39 +
  40 + #theano.config.floatX = 'float64'
  41 +
  42 + file_with_filtered_embeddings = "embeddings/embedding_and_words2ids.pkl"
  43 + if not os.path.exists(file_with_filtered_embeddings):
  44 + print("Cannot find file with only needed embeddings. We use 'filter_embeddings' in order to create it.")
  45 + filter_embeddings(["data/sst/train/dlabels.txt", "data/sst/train/dparents.txt","data/sst/train/sents.toks", "data/sst/dev/dlabels.txt", "data/sst/dev/dparents.txt","data/sst/dev/sents.toks", "data/sst/test/dlabels.txt", "data/sst/test/dparents.txt","data/sst/test/sents.toks"],
  46 +
  47 + "/home/norbert/Doktorat/clarin2sent/treelstm/data/glove/glove.840B.300d.txt",
  48 + file_with_filtered_embeddings)
  49 +
  50 +
  51 + batch_size = 1
  52 +
  53 + s = {'lr':0.002,
  54 + 'nepochs':30,
  55 + 'seed':345,
  56 + 'nc':5 # number of y classes
  57 + }
  58 +
  59 +
  60 + batch_size = 1
  61 +
  62 +
  63 +
  64 + for ne_dim, nchd_dim, nh2_dim, number_of_relations in [(50,50, 50, 5),(100,100, 100, 5),(50,50, 50, 10),(100,100, 100, 10),(200,200, 100, 5)]:
  65 +
  66 + np.random.seed(s['seed'])
  67 + random.seed(s['seed'])
  68 +
  69 +
  70 + rnn = MLP_2_2( ne = ne_dim, nchd = nchd_dim, nh2 = nh2_dim,
  71 + nc = s['nc'],
  72 + w2v_model_path = file_with_filtered_embeddings, #sciezka do pliku z embeddingami
  73 + max_phrase_length = 60,
  74 + number_of_relations = number_of_relations )
  75 +
  76 + train_data = load_stanford_data6("data/sst/train/dlabels.txt", "data/sst/train/dparents.txt","data/sst/train/sents.toks","data/sst/train/rels.txt",rnn.words2ids,True,batch_size,s['nc'], k_most_common_relations = number_of_relations)
  77 +
  78 + dev_data = load_stanford_data6("data/sst/dev/dlabels.txt", "data/sst/dev/dparents.txt","data/sst/dev/sents.toks","data/sst/dev/rels.txt",rnn.words2ids,False,0,s['nc'], k_most_common_relations = number_of_relations)
  79 +
  80 + test_data = load_stanford_data6("data/sst/test/dlabels.txt", "data/sst/test/dparents.txt","data/sst/test/sents.toks","data/sst/test/rels.txt",rnn.words2ids,False,0,s['nc'], k_most_common_relations = number_of_relations)
  81 +
  82 + n_train = len(train_data)
  83 + n_dev = len(dev_data)
  84 + n_test = len(test_data)
  85 +
  86 + print ""
  87 + print "lr = ", s['lr'], "number_of_relations = ", number_of_relations
  88 + print "model MLP_2_2 : ", "nchd_dim = ", nchd_dim ,"ne_dim = ", ne_dim , "nh2 =", nh2_dim
  89 + print ""
  90 +
  91 + best_prediction_valid_all = 0
  92 + best_prediction_test_all = 0
  93 + best_prediction_test_root = 0
  94 + early_stop = 0
  95 +
  96 +
  97 + tic = time.time()
  98 +
  99 + for e in xrange(s['nepochs']):
  100 +
  101 + #if e >= 1:
  102 + # s['lr'] = 0.8 * s['lr']
  103 +
  104 + if early_stop == 5:
  105 + break
  106 +
  107 +
  108 + # shuffle
  109 + shuffle([train_data], s['seed'])
  110 +
  111 + for i in range(n_train):
  112 + rnn.train(train_data[i][0],train_data[i][1], train_data[i][2], train_data[i][3], train_data[i][4],s['lr'])
  113 +
  114 +
  115 +
  116 + # Dev:
  117 + counts_dev = np.zeros((s['nc'],s['nc']),dtype='int')
  118 + counts_dev_root = np.zeros((s['nc'],s['nc']),dtype='int')
  119 + for ii in range(n_dev):
  120 + pred = rnn.classify(dev_data[ii][0],dev_data[ii][1], dev_data[ii][3], dev_data[ii][4])
  121 + for j in range(len(pred)):
  122 + counts_dev[pred[j], dev_data[ii][2][j]] += 1
  123 + counts_dev_root[pred[-1], dev_data[ii][2][-1]] += 1
  124 +
  125 +
  126 + # Test:
  127 + counts_test = np.zeros((s['nc'],s['nc']),dtype='int')
  128 + counts_test_root = np.zeros((s['nc'],s['nc']),dtype='int')
  129 + for i in range(n_test):
  130 + pred = rnn.classify(test_data[i][0],test_data[i][1], test_data[i][3], test_data[i][4])
  131 + for j in range(len(pred)):
  132 + counts_test[pred[j], test_data[i][2][j]] += 1
  133 + counts_test_root[pred[-1], test_data[i][2][-1]] += 1
  134 +
  135 + # Train
  136 + counts = np.zeros((s['nc'],s['nc']),dtype='int')
  137 + counts_root = np.zeros((s['nc'],s['nc']),dtype='int')
  138 + for i in range(len(train_data)):
  139 +
  140 + if i % 10 == 0: #sprawdzamy dopasowanie na 1/10 zbioru zeby oszczedzic czas
  141 + pred = rnn.classify(train_data[i][0],train_data[i][1], train_data[i][3], train_data[i][4])
  142 + for j in range(len(pred)):
  143 + counts[pred[j], train_data[i][2][j]] += 1
  144 + counts_root[pred[-1], train_data[i][2][-1]] += 1
  145 +
  146 +
  147 +
  148 + print("Valid: ", "%0.2f" % (100 * np.diag(counts_dev).sum()/float(counts_dev.sum())),
  149 + "Valid root: ","%0.2f" % (100 * np.diag(counts_dev_root).sum()/float(counts_dev_root.sum())),
  150 + " Test all: ","%0.2f" % (100 * np.diag(counts_test).sum()/float(counts_test.sum())),
  151 + "Test root: ","%0.2f" % (100 * np.diag(counts_test_root).sum()/float(counts_test_root.sum())),
  152 + " Train all: ","%0.2f" % (100 * np.diag(counts).sum()/float(counts.sum())),
  153 + "Train root: ","%0.2f" % (100 * np.diag(counts_root).sum()/float(counts_root.sum()))," time: ", time.time()-tic)
  154 +
  155 + if np.diag(counts_dev).sum()/float(counts_dev.sum()) > best_prediction_valid_all:
  156 + best_prediction_valid_all = np.diag(counts_dev).sum()/float(counts_dev.sum())
  157 + best_prediction_test_all = np.diag(counts_test).sum()/float(counts_test.sum())
  158 + best_prediction_test_root = np.diag(counts_test_root).sum()/float(counts_test_root.sum())
  159 +
  160 + early_stop = 0
  161 + else:
  162 + early_stop = early_stop + 1
  163 +
  164 +
  165 + print("Best valid: ", "%0.2f" % (100 * best_prediction_valid_all)," Test all: ","%0.2f" % (100 * best_prediction_test_all),"Test root: ","%0.2f" % (100 * best_prediction_test_root), " time: ", time.time()-tic)
  166 +
  167 +
  168 +
  169 +
  170 +
... ...
main_for_experiments_on_stanford_data.py 0 → 100644
  1 +import numpy as np
  2 +import time
  3 +import sys
  4 +import subprocess
  5 +import os
  6 +import random
  7 +
  8 +#from modules.data import load
  9 +from modules.rnn.models import *
  10 +#from modules.metrics.accuracy import conlleval
  11 +from modules.utils.tools import load_stanford_data4, shuffle
  12 +
  13 +from theano import pp
  14 +
  15 +import theano.tensor as T
  16 +import theano
  17 +from theano.sandbox.rng_mrg import MRG_RandomStreams #as MRG_RandomStreams
  18 +
  19 +import itertools
  20 +
  21 +import os.path
  22 +import pickle
  23 +
  24 +from collections import Counter
  25 +
  26 +
  27 +
  28 +from theano import tensor as T, printing
  29 +from collections import OrderedDict
  30 +from theano.ifelse import ifelse
  31 +
  32 +from keras.preprocessing import sequence as seq
  33 +
  34 +dataType = 'int64'
  35 +
  36 +
  37 +
  38 +if __name__ == '__main__':
  39 +
  40 + #theano.config.floatX = 'float64'
  41 +
  42 + file_with_filtered_embeddings = "embeddings/embedding_and_words2ids.pkl"
  43 + if not os.path.exists(file_with_filtered_embeddings):
  44 + print("Cannot find file with only needed embeddings. We use 'filter_embeddings' in order to create it.")
  45 + filter_embeddings(["data/sst/train/dlabels.txt", "data/sst/train/dparents.txt","data/sst/train/sents.toks", "data/sst/dev/dlabels.txt", "data/sst/dev/dparents.txt","data/sst/dev/sents.toks", "data/sst/test/dlabels.txt", "data/sst/test/dparents.txt","data/sst/test/sents.toks"],
  46 +
  47 + "/home/norbert/Doktorat/clarin2sent/treelstm/data/glove/glove.840B.300d.txt",
  48 + file_with_filtered_embeddings)
  49 +
  50 +
  51 + batch_size = 1
  52 +
  53 +
  54 + # ZDABAC MODELE 7,8,9 , 1, 2, 3 , 10, 11, 5,6
  55 +
  56 +
  57 +
  58 + s = {'lr':0.002,
  59 + 'nepochs':30,
  60 + 'seed':345,
  61 + 'nc':5 # number of y classes
  62 + }
  63 +
  64 +
  65 + batch_size = 1
  66 +
  67 +
  68 +
  69 +
  70 +
  71 + for ne_dim, nchd_dim in [(100,100)]:#,(200,200, 200,100),(200,200, 300,100),(100,100, 200,100)]:
  72 +
  73 + np.random.seed(s['seed'])
  74 + random.seed(s['seed'])
  75 +
  76 +
  77 + rnn = model55_pf1( ne = ne_dim, nchd = nchd_dim,# nh2 = nh2_dim,
  78 + nc = s['nc'],
  79 + w2v_model_path = file_with_filtered_embeddings, #sciezka do pliku z embeddingami
  80 + max_phrase_length = 60 )
  81 +
  82 + train_data = load_stanford_data4("data/sst/train/dlabels.txt", "data/sst/train/dparents.txt","data/sst/train/sents.toks",rnn.words2ids,True,batch_size,s['nc'])
  83 +
  84 + dev_data = load_stanford_data4("data/sst/dev/dlabels.txt", "data/sst/dev/dparents.txt","data/sst/dev/sents.toks",rnn.words2ids,False,0,s['nc'])
  85 +
  86 + test_data = load_stanford_data4("data/sst/test/dlabels.txt", "data/sst/test/dparents.txt","data/sst/test/sents.toks",rnn.words2ids,False,0,s['nc'])
  87 +
  88 + n_train = len(train_data)
  89 + n_dev = len(dev_data)
  90 + n_test = len(test_data)
  91 +
  92 + print ""
  93 + print "lr = ", s['lr']
  94 + print "model 55_pf1 : ", "nchd_dim = ", nchd_dim ,"ne_dim = ", ne_dim #, "nh2_dim = ", nh2_dim
  95 + print ""
  96 +
  97 + best_prediction_valid_all = 0
  98 + best_prediction_test_all = 0
  99 + best_prediction_test_root = 0
  100 + early_stop = 0
  101 +
  102 +
  103 + tic = time.time()
  104 +
  105 + for e in xrange(s['nepochs']):
  106 +
  107 + #if e >= 1:
  108 + # s['lr'] = 0.8 * s['lr']
  109 +
  110 + if early_stop == 5:
  111 + break
  112 +
  113 +
  114 + # shuffle
  115 + shuffle([train_data], s['seed'])
  116 +
  117 + for i in range(n_train):
  118 + rnn.train(train_data[i][0],train_data[i][1], train_data[i][2], train_data[i][3], s['lr'])
  119 +
  120 +
  121 +
  122 + # Dev:
  123 + counts_dev = np.zeros((s['nc'],s['nc']),dtype='int')
  124 + counts_dev_root = np.zeros((s['nc'],s['nc']),dtype='int')
  125 + for ii in range(n_dev):
  126 + pred = rnn.classify(dev_data[ii][0],dev_data[ii][1], dev_data[ii][3])
  127 + for j in range(len(pred)):
  128 + counts_dev[pred[j], dev_data[ii][2][j]] += 1
  129 + counts_dev_root[pred[-1], dev_data[ii][2][-1]] += 1
  130 +
  131 +
  132 + # Test:
  133 + counts_test = np.zeros((s['nc'],s['nc']),dtype='int')
  134 + counts_test_root = np.zeros((s['nc'],s['nc']),dtype='int')
  135 + for i in range(n_test):
  136 + pred = rnn.classify(test_data[i][0],test_data[i][1], test_data[i][3])
  137 + for j in range(len(pred)):
  138 + counts_test[pred[j], test_data[i][2][j]] += 1
  139 + counts_test_root[pred[-1], test_data[i][2][-1]] += 1
  140 +
  141 + # Train
  142 + counts = np.zeros((s['nc'],s['nc']),dtype='int')
  143 + counts_root = np.zeros((s['nc'],s['nc']),dtype='int')
  144 + for i in range(len(train_data)):
  145 +
  146 + if i % 10 == 0: #sprawdzamy dopasowanie na 1/10 zbioru zeby oszczedzic czas
  147 + pred = rnn.classify(train_data[i][0],train_data[i][1], train_data[i][3])
  148 + for j in range(len(pred)):
  149 + counts[pred[j], train_data[i][2][j]] += 1
  150 + counts_root[pred[-1], train_data[i][2][-1]] += 1
  151 +
  152 +
  153 +
  154 + print("Valid: ", "%0.2f" % (100 * np.diag(counts_dev).sum()/float(counts_dev.sum())),
  155 + "Valid root: ","%0.2f" % (100 * np.diag(counts_dev_root).sum()/float(counts_dev_root.sum())),
  156 + " Test all: ","%0.2f" % (100 * np.diag(counts_test).sum()/float(counts_test.sum())),
  157 + "Test root: ","%0.2f" % (100 * np.diag(counts_test_root).sum()/float(counts_test_root.sum())),
  158 + " Train all: ","%0.2f" % (100 * np.diag(counts).sum()/float(counts.sum())),
  159 + "Train root: ","%0.2f" % (100 * np.diag(counts_root).sum()/float(counts_root.sum()))," time: ", time.time()-tic)
  160 +
  161 + if np.diag(counts_dev).sum()/float(counts_dev.sum()) > best_prediction_valid_all:
  162 + best_prediction_valid_all = np.diag(counts_dev).sum()/float(counts_dev.sum())
  163 + best_prediction_test_all = np.diag(counts_test).sum()/float(counts_test.sum())
  164 + best_prediction_test_root = np.diag(counts_test_root).sum()/float(counts_test_root.sum())
  165 +
  166 + early_stop = 0
  167 + else:
  168 + early_stop = early_stop + 1
  169 +
  170 +
  171 + print("Best valid: ", "%0.2f" % (100 * best_prediction_valid_all)," Test all: ","%0.2f" % (100 * best_prediction_test_all),"Test root: ","%0.2f" % (100 * best_prediction_test_root), " time: ", time.time()-tic)
  172 +
  173 +
  174 +
  175 +
  176 +### 9 , 2,3 , 5,6
  177 +
  178 +
  179 +
... ...
main_for_sst_LSTM.py 0 → 100644
  1 +import numpy as np
  2 +import time
  3 +import sys
  4 +import subprocess
  5 +import os
  6 +import random
  7 +
  8 +#from modules.data import load
  9 +from modules.rnn.LSTM_models import *
  10 +#from modules.metrics.accuracy import conlleval
  11 +from modules.utils.tools import load_stanford_data4, shuffle
  12 +
  13 +from theano import pp
  14 +
  15 +import theano.tensor as T
  16 +import theano
  17 +from theano.sandbox.rng_mrg import MRG_RandomStreams #as MRG_RandomStreams
  18 +
  19 +import itertools
  20 +
  21 +import os.path
  22 +import pickle
  23 +
  24 +from collections import Counter
  25 +
  26 +
  27 +
  28 +from theano import tensor as T, printing
  29 +from collections import OrderedDict
  30 +from theano.ifelse import ifelse
  31 +
  32 +from keras.preprocessing import sequence as seq
  33 +
  34 +dataType = 'int64'
  35 +
  36 +
  37 +if __name__ == '__main__':
  38 +
  39 +
  40 + sys.setrecursionlimit(10000)
  41 +
  42 + #w2v_DIM = "300"
  43 +
  44 +
  45 +
  46 + file_with_filtered_embeddings = "embeddings/embedding_and_words2ids.pkl"
  47 + #if not os.path.exists(file_with_filtered_embeddings):
  48 + # print("Cannot find file with only needed embeddings. We use 'filter_embeddings' in order to create it.")
  49 + # filter_embeddings(["data/dane_polskie/train/train_labels.txt", "data/dane_polskie/train/train_parents.txt","data/dane_polskie/train/train_sentence.txt",
  50 + # "data/dane_polskie/dev/dev_labels.txt", "data/dane_polskie/dev/dev_parents.txt","data/dane_polskie/dev/dev_sentence.txt",
  51 + # "data/dane_polskie/test/test_labels.txt", "data/dane_polskie/test/test_parents.txt","data/dane_polskie/test/test_sentence.txt"],
  52 +
  53 + #"/home/norbert/Doktorat/clarin2sent/deeptagger/embeddings/w2v_allwiki_nkjpfull_"+w2v_DIM+".txt",
  54 + #file_with_filtered_embeddings)
  55 +
  56 +
  57 + s = {'lr':0.002,
  58 + 'nepochs':40,
  59 + 'seed':345,
  60 + 'nc':5 # number of y classes
  61 + }
  62 + batch_size = 1
  63 +
  64 +
  65 + for h_dim in [100]: #100, 150, 200
  66 +
  67 + np.random.seed(s['seed'])
  68 + random.seed(s['seed'])
  69 +
  70 +
  71 + rnn = LSTM_1( h_dim,
  72 + nc = s['nc'],
  73 + w2v_model_path = file_with_filtered_embeddings, #sciezka do pliku z embeddingami
  74 + max_phrase_length = 60 )
  75 +
  76 +
  77 + train_data = load_stanford_data4("data/sst/train/dlabels.txt", "data/sst/train/dparents.txt","data/sst/train/sents.toks",rnn.words2ids,True,batch_size,s['nc'])
  78 + dev_data = load_stanford_data4("data/sst/dev/dlabels.txt", "data/sst/dev/dparents.txt","data/sst/dev/sents.toks",rnn.words2ids,False,0,s['nc'])
  79 + test_data = load_stanford_data4("data/sst/test/dlabels.txt", "data/sst/test/dparents.txt","data/sst/test/sents.toks",rnn.words2ids,False,0,s['nc'])
  80 +
  81 + n_train = len(train_data)
  82 + n_dev = len(dev_data)
  83 + n_test = len(test_data)
  84 +
  85 + print ""
  86 + print "learning rate: ", s['lr']
  87 + print "model LSTM_1 : " , "h_dim = ", h_dim
  88 + print ""
  89 +
  90 + best_prediction_valid_all = 0
  91 + best_prediction_test_all = 0
  92 + best_prediction_test_root = 0
  93 + early_stop = 0
  94 +
  95 +
  96 + tic = time.time()
  97 +
  98 + for e in xrange(s['nepochs']):
  99 +
  100 + #if e >= 1:
  101 + # s['lr'] = 0.8 * s['lr']
  102 +
  103 + if early_stop == 5:
  104 + break
  105 +
  106 +
  107 + # shuffle
  108 + shuffle([train_data], s['seed'])
  109 +
  110 + for i in range(n_train):
  111 + rnn.train(train_data[i][0],train_data[i][1], train_data[i][2], train_data[i][3], s['lr'])
  112 +
  113 + pickle.dump(rnn, open("model" + str(e) + ".pkl",'wb'))
  114 +
  115 + # Dev:
  116 + counts_dev = np.zeros((s['nc'],s['nc']),dtype='int')
  117 + counts_dev_root = np.zeros((s['nc'],s['nc']),dtype='int')
  118 + for ii in range(n_dev):
  119 + pred = rnn.classify(dev_data[ii][0],dev_data[ii][1], dev_data[ii][3])
  120 + for j in range(len(pred)):
  121 + counts_dev[pred[j], dev_data[ii][2][j]] += 1
  122 + counts_dev_root[pred[-1], dev_data[ii][2][-1]] += 1
  123 +
  124 +
  125 + # Test:
  126 + counts_test = np.zeros((s['nc'],s['nc']),dtype='int')
  127 + counts_test_root = np.zeros((s['nc'],s['nc']),dtype='int')
  128 + for i in range(n_test):
  129 + pred = rnn.classify(test_data[i][0],test_data[i][1], test_data[i][3])
  130 + for j in range(len(pred)):
  131 + counts_test[pred[j], test_data[i][2][j]] += 1
  132 + counts_test_root[pred[-1], test_data[i][2][-1]] += 1
  133 +
  134 + # Train
  135 + counts = np.zeros((s['nc'],s['nc']),dtype='int')
  136 + counts_root = np.zeros((s['nc'],s['nc']),dtype='int')
  137 + for i in range(len(train_data)):
  138 +
  139 + if i % 5 == 0: #sprawdzamy dopasowanie na 1/100 zbioru zeby oszczedzic czas
  140 + pred = rnn.classify(train_data[i][0],train_data[i][1], train_data[i][3])
  141 + for j in range(len(pred)):
  142 + counts[pred[j], train_data[i][2][j]] += 1
  143 + counts_root[pred[-1], train_data[i][2][-1]] += 1
  144 +
  145 + print("epoch: ", e,
  146 + "V all: ", "%0.2f" % (100 * np.diag(counts_dev).sum()/float(counts_dev.sum())),
  147 + " Test all: ", "%0.2f" % (100 * np.diag(counts_test).sum()/float(counts_test.sum())),
  148 + "V root: ", "%0.2f" % (100 * np.diag(counts_dev_root).sum()/float(counts_dev_root.sum())),
  149 + " Test root: ", "%0.2f" % (100 * np.diag(counts_test_root).sum()/float(counts_test_root.sum())),
  150 + " Train: ", "%0.2f" % (100 * np.diag(counts).sum()/float(counts.sum())),
  151 + " Train root: ", "%0.2f" % (100 * np.diag(counts_root).sum()/float(counts_root.sum()))
  152 + )
  153 +
  154 +
  155 + if np.diag(counts_dev).sum()/float(counts_dev.sum()) > best_prediction_valid_all:
  156 + best_prediction_valid_all = np.diag(counts_dev).sum()/float(counts_dev.sum())
  157 + best_prediction_test_all = np.diag(counts_test).sum()/float(counts_test.sum())
  158 + best_prediction_test_root = np.diag(counts_test_root).sum()/float(counts_test_root.sum())
  159 +
  160 + early_stop = 0
  161 + else:
  162 + early_stop = early_stop + 1
  163 +
  164 +
  165 + print("Best valid: ", "%0.2f" % (100 * best_prediction_valid_all)," Test all: ","%0.2f" % (100 * best_prediction_test_all),"Test root: ","%0.2f" % (100 * best_prediction_test_root), " time: ", time.time()-tic)
  166 +
... ...
modules/rnn/LSTM_models.py 0 → 100644
  1 +import numpy as np
  2 +import time
  3 +import sys
  4 +import subprocess
  5 +import os
  6 +import random
  7 +
  8 +#from modules.data import load
  9 +#from modules.rnn.many_models import *
  10 +#from modules.metrics.accuracy import conlleval
  11 +from modules.utils.tools import load_stanford_data4
  12 +
  13 +from theano import pp
  14 +
  15 +import theano.tensor as T
  16 +import theano
  17 +from theano.sandbox.rng_mrg import MRG_RandomStreams #as MRG_RandomStreams
  18 +
  19 +import itertools
  20 +
  21 +import os.path
  22 +import pickle
  23 +
  24 +from collections import Counter
  25 +
  26 +
  27 +
  28 +from theano import tensor as T, printing
  29 +from collections import OrderedDict
  30 +from theano.ifelse import ifelse
  31 +
  32 +from keras.preprocessing import sequence as seq
  33 +
  34 +dataType = 'int64'
  35 +
  36 +
  37 +
  38 +
  39 +class LSTM_1(object):
  40 + def __init__(self, h_dim, nc, w2v_model_path, max_phrase_length):
  41 +
  42 + '''
  43 + nh :: dimension of hidden state
  44 + nc :: number of classes
  45 + '''
  46 +
  47 + self.max_phrase_length = max_phrase_length
  48 + w2vecs = pickle.load(open(w2v_model_path,"r"))
  49 + self.emb = theano.shared(w2vecs["vectors"].astype(theano.config.floatX))
  50 + self.words2ids = w2vecs["words2ids"]
  51 +
  52 + emb_dim = w2vecs["vectors"].shape[1]
  53 + del w2vecs
  54 +
  55 + r = 0.05
  56 +
  57 + self.W_i = theano.shared(r * np.random.uniform(-1.0, 1.0, (emb_dim, h_dim) ).astype(theano.config.floatX))
  58 + self.U_i = theano.shared(r * np.random.uniform(-1.0, 1.0, (h_dim, h_dim) ).astype(theano.config.floatX))
  59 + self.b_i = theano.shared(r * np.random.uniform(-1.0, 1.0, h_dim ).astype(theano.config.floatX))
  60 +
  61 + self.W_f = theano.shared(r * np.random.uniform(-1.0, 1.0, (emb_dim, h_dim) ).astype(theano.config.floatX))
  62 + self.U_f = theano.shared(r * np.random.uniform(-1.0, 1.0, (h_dim, h_dim) ).astype(theano.config.floatX))
  63 + self.b_f = theano.shared(r * np.random.uniform(-1.0, 1.0, h_dim ).astype(theano.config.floatX))
  64 +
  65 + self.W_o = theano.shared(r * np.random.uniform(-1.0, 1.0, (emb_dim, h_dim) ).astype(theano.config.floatX))
  66 + self.U_o = theano.shared(r * np.random.uniform(-1.0, 1.0, (h_dim, h_dim) ).astype(theano.config.floatX))
  67 + self.b_o = theano.shared(r * np.random.uniform(-1.0, 1.0, h_dim ).astype(theano.config.floatX))
  68 +
  69 + self.W_u = theano.shared(r * np.random.uniform(-1.0, 1.0, (emb_dim, h_dim) ).astype(theano.config.floatX))
  70 + self.U_u = theano.shared(r * np.random.uniform(-1.0, 1.0, (h_dim, h_dim) ).astype(theano.config.floatX))
  71 + self.b_u = theano.shared(r * np.random.uniform(-1.0, 1.0, h_dim ).astype(theano.config.floatX))
  72 +
  73 + self.W_y = theano.shared(r * np.random.uniform(-1.0, 1.0, (h_dim, nc)).astype(theano.config.floatX))
  74 + self.b_y = theano.shared(r * np.random.uniform(-1.0, 1.0, nc).astype(theano.config.floatX))
  75 +
  76 +
  77 + def one_step(word_id, word_children_positions, y_true, k, hidden_states, cell_states, learning_rate):
  78 +
  79 + x = self.emb[word_id]
  80 + # czyli wektor zerowy # sprawdzic + 0.5
  81 + tmp = word_children_positions>=0.0
  82 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  83 + idx_tmp = tmp.nonzero() # indeksy realne dzieci - czyli te, gdzie nie ma -1
  84 +
  85 + h_aggregated = ifelse(T.gt(number_of_children, 0.0), hidden_states[word_children_positions[idx_tmp]].sum(axis=0), hidden_states[-1])
  86 +
  87 + #number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  88 + #h_aggregated = h_aggregated/number_of_children # Usrednianie stanow ukrytych dzieci -
  89 +
  90 +
  91 + i = T.nnet.sigmoid( T.dot(x, self.W_i) + T.dot(h_aggregated, self.U_i) + self.b_i)
  92 +
  93 + o = T.nnet.sigmoid( T.dot(x, self.W_o) + T.dot(h_aggregated, self.U_o) + self.b_o)
  94 +
  95 + u = T.tanh( T.dot(x, self.W_u) + T.dot(h_aggregated, self.U_u) + self.b_u)
  96 +
  97 + f_c = ifelse(T.gt(number_of_children, 0.0),
  98 + (T.nnet.sigmoid( T.dot(x, self.W_f ) + T.dot(hidden_states[word_children_positions[idx_tmp]], self.U_f) + self.b_f )*cell_states[word_children_positions[idx_tmp]]).sum(axis=0),
  99 + T.nnet.sigmoid( T.dot(x, self.W_f ) + T.dot(hidden_states[-1], self.U_f) + self.b_f ) * cell_states[-1]
  100 + )
  101 +
  102 + c = i*u + f_c
  103 +
  104 + h = o * T.tanh(c)
  105 +
  106 + current_cell_state = cell_states[k]
  107 + cell_states_new = T.set_subtensor(current_cell_state, c)
  108 +
  109 + current_hidden_state = hidden_states[k]
  110 + hidden_states_new = T.set_subtensor(current_hidden_state, h)
  111 +
  112 +
  113 + y_prob = T.nnet.softmax(T.dot(h,self.W_y) + self.b_y)[0]
  114 +
  115 + cross_entropy = -T.log(y_prob[y_true]) # + norm_coefficient * l2_norm
  116 +
  117 + return cross_entropy, hidden_states_new, cell_states_new
  118 +
  119 +
  120 + y = T.vector('y',dtype=dataType)
  121 + learning_rate = T.scalar('lr',dtype=theano.config.floatX)
  122 + words = T.vector(dtype=dataType)
  123 + children_positions = T.matrix(dtype=dataType)
  124 + words_indexes = T.vector(dtype=dataType)
  125 +
  126 + [cross_entropy_vector, _, _] , _ = theano.scan(fn=one_step, \
  127 + sequences = [words, children_positions,y,words_indexes],
  128 + outputs_info = [None,
  129 + theano.shared(np.zeros((self.max_phrase_length+1,h_dim), dtype = theano.config.floatX)),
  130 + theano.shared(np.zeros((self.max_phrase_length+1,h_dim), dtype = theano.config.floatX))],
  131 + non_sequences = learning_rate,
  132 + n_steps = words.shape[0])
  133 + cost = T.sum(cross_entropy_vector)
  134 +
  135 + updates = OrderedDict([
  136 + (self.W_i, self.W_i-learning_rate*T.grad(cost, self.W_i)),
  137 + (self.W_f, self.W_f-learning_rate*T.grad(cost, self.W_f)),
  138 + (self.W_o, self.W_o-learning_rate*T.grad(cost, self.W_o)),
  139 + (self.W_u, self.W_u-learning_rate*T.grad(cost, self.W_u)),
  140 + (self.W_y, self.W_y-learning_rate*T.grad(cost, self.W_y)),
  141 + (self.emb, self.emb-learning_rate*T.grad(cost, self.emb)), #updated_current_emb), #
  142 + (self.b_i, self.b_i-learning_rate*T.grad(cost,self.b_i)),
  143 + (self.b_f, self.b_f-learning_rate*T.grad(cost,self.b_f)),
  144 + (self.b_o, self.b_o-learning_rate*T.grad(cost,self.b_o)),
  145 + (self.b_u, self.b_u-learning_rate*T.grad(cost,self.b_u)),
  146 + (self.b_y, self.b_y-learning_rate*T.grad(cost,self.b_y))
  147 + ])
  148 +
  149 + self.train = theano.function( inputs = [words, children_positions, y, words_indexes, learning_rate],
  150 + outputs = [],
  151 + updates = updates,
  152 + allow_input_downcast=True,
  153 + mode='FAST_RUN'
  154 + )
  155 +
  156 +
  157 + def one_step_classify(word_id, word_children_positions, k, hidden_states, cell_states):
  158 +
  159 + x = self.emb[word_id]
  160 + # czyli wektor zerowy # sprawdzic + 0.5
  161 + tmp = word_children_positions>=0.0
  162 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  163 + idx_tmp = tmp.nonzero() # indeksy realne dzieci - czyli te, gdzie nie ma -1
  164 +
  165 + h_aggregated = ifelse(T.gt(number_of_children, 0.0), hidden_states[word_children_positions[idx_tmp]].sum(axis=0), hidden_states[-1])
  166 +
  167 + #number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  168 + #h_aggregated = h_aggregated/number_of_children # Usrednianie stanow ukrytych dzieci -
  169 +
  170 +
  171 + i = T.nnet.sigmoid( T.dot(x, self.W_i) + T.dot(h_aggregated, self.U_i) + self.b_i)
  172 +
  173 + o = T.nnet.sigmoid( T.dot(x, self.W_o) + T.dot(h_aggregated, self.U_o) + self.b_o)
  174 +
  175 + u = T.tanh( T.dot(x, self.W_u) + T.dot(h_aggregated, self.U_u) + self.b_u)
  176 +
  177 + f_c = ifelse(T.gt(number_of_children, 0.0),
  178 + (T.nnet.sigmoid( T.dot(x, self.W_f ) + T.dot(hidden_states[word_children_positions[idx_tmp]], self.U_f) + self.b_f )*cell_states[word_children_positions[idx_tmp]]).sum(axis=0),
  179 + T.nnet.sigmoid( T.dot(x, self.W_f ) + T.dot(hidden_states[-1], self.U_f) + self.b_f ) * cell_states[-1]
  180 + )
  181 +
  182 + c = i*u + f_c
  183 +
  184 + h = o * T.tanh(c)
  185 +
  186 + current_cell_state = cell_states[k]
  187 + cell_states_new = T.set_subtensor(current_cell_state, c)
  188 +
  189 + current_hidden_state = hidden_states[k]
  190 + hidden_states_new = T.set_subtensor(current_hidden_state, h)
  191 +
  192 +
  193 + y_prob = T.nnet.softmax(T.dot(h,self.W_y) + self.b_y)[0]
  194 +
  195 + return y_prob, hidden_states_new, cell_states_new
  196 +
  197 +
  198 + [y_probs_classify, _, _ ], _ = theano.scan(
  199 + fn=one_step_classify,
  200 + sequences = [words, children_positions, words_indexes],
  201 + outputs_info = [None,
  202 + theano.shared(np.zeros((self.max_phrase_length+1,h_dim), dtype = theano.config.floatX)),
  203 + theano.shared(np.zeros((self.max_phrase_length+1,h_dim), dtype = theano.config.floatX))])
  204 +
  205 + predictions, _ = theano.scan(lambda i: T.argmax(y_probs_classify[i]),
  206 + sequences = [words_indexes])
  207 +
  208 + self.classify = theano.function(inputs=[words,children_positions,words_indexes],
  209 + outputs=predictions,
  210 + allow_input_downcast=True,
  211 + mode='FAST_RUN'
  212 + )
  213 +
  214 +
... ...
modules/rnn/LSTM_models.pyc 0 → 100644
No preview for this file type
modules/rnn/models.py 0 → 100644
  1 +import numpy as np
  2 +import time
  3 +import sys
  4 +import subprocess
  5 +import os
  6 +import random
  7 +
  8 +#from modules.data import load
  9 +#from modules.rnn.many_models import *
  10 +#from modules.metrics.accuracy import conlleval
  11 +from modules.utils.tools import load_stanford_data4
  12 +
  13 +from theano import pp
  14 +
  15 +import theano.tensor as T
  16 +import theano
  17 +from theano.sandbox.rng_mrg import MRG_RandomStreams #as MRG_RandomStreams
  18 +
  19 +import itertools
  20 +
  21 +import os.path
  22 +import pickle
  23 +
  24 +from collections import Counter
  25 +
  26 +
  27 +
  28 +from theano import tensor as T, printing
  29 +from collections import OrderedDict
  30 +from theano.ifelse import ifelse
  31 +
  32 +from keras.preprocessing import sequence as seq
  33 +
  34 +dataType = 'int64'
  35 +
  36 +
  37 +
  38 +# UWAGA: "ne" to NIE JEST to co jest napisane - to jest wymiar warstwy bezposrednio nad embeddingiem
  39 +
  40 +
  41 +class model55_pf1(object):
  42 + def __init__(self, ne, nchd, nc, w2v_model_path, max_phrase_length):
  43 + '''
  44 + nh :: dimension of hidden state
  45 + nc :: number of classes
  46 + ne :: number of word embeddings in the vocabulary
  47 + de :: dimension of the word embeddings
  48 + ds :: dimension of the sentiment state
  49 + '''
  50 + self.max_phrase_length = max_phrase_length
  51 + w2vecs = pickle.load(open(w2v_model_path,"r"))
  52 + self.emb = theano.shared(w2vecs["vectors"].astype(theano.config.floatX))
  53 + self.words2ids = w2vecs["words2ids"]
  54 +
  55 + #ne = len(w2vecs["words2ids"])
  56 + de = w2vecs["vectors"].shape[1]
  57 + del w2vecs
  58 +
  59 + r = 0.05
  60 + self.W_e_h = theano.shared(r * np.random.uniform(-1.0, 1.0, (de, ne)).astype(theano.config.floatX))
  61 + self.W_h_y = theano.shared(r * np.random.uniform(-1.0, 1.0, (ne+nchd, nc)).astype(theano.config.floatX))
  62 +
  63 + self.W_sh_h = theano.shared(r * np.random.uniform(-1.0, 1.0, (ne+nchd, nchd)).astype(theano.config.floatX))
  64 +
  65 + self.b_y = theano.shared(r * np.random.uniform(-1.0, 1.0, nc).astype(theano.config.floatX))
  66 +
  67 +
  68 + def one_step(word_id, word_children_positions, y_true, i, hidden_states, learning_rate):
  69 +
  70 + schh = hidden_states[-1] #+ 0.5 # czyli wektor zerowy
  71 +
  72 + tmp = word_children_positions>=0.0
  73 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  74 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  75 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  76 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  77 + schh = schh/number_of_children
  78 +
  79 + h = T.tanh(T.concatenate([T.dot(self.emb[word_id],self.W_e_h), T.dot(schh,self.W_sh_h)]))
  80 +
  81 + current_hidden_state = hidden_states[i]
  82 + hidden_states_new = T.set_subtensor(current_hidden_state, h)
  83 +
  84 + y_prob = T.nnet.softmax(T.dot(h,self.W_h_y) + self.b_y)[0]
  85 +
  86 + #l2_norm = T.sum(self.W_h_h2**2) + T.sum(self.W_h2_y**2) + T.sum(self.W_e_h**2) + T.sum(self.W_sh_h**2) + T.sum(self.emb**2) + T.sum(self.b_h**2) + T.sum(self.b_y**2)
  87 +
  88 + cross_entropy = -T.log(y_prob[y_true]) # + norm_coefficient * l2_norm
  89 +
  90 + return cross_entropy, hidden_states_new
  91 +
  92 +
  93 + y = T.vector('y',dtype=dataType)
  94 + learning_rate = T.scalar('lr',dtype=theano.config.floatX)
  95 + words = T.vector(dtype=dataType)
  96 + children_positions = T.matrix(dtype=dataType)
  97 + words_indexes = T.vector(dtype=dataType)
  98 +
  99 + cross_entropy_vector, _ = theano.scan(fn=one_step, \
  100 + sequences = [words, children_positions,y,words_indexes],
  101 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,ne+nchd), dtype = theano.config.floatX))],
  102 + non_sequences = learning_rate,
  103 + n_steps = words.shape[0])
  104 + cost = T.sum(cross_entropy_vector[0])
  105 +
  106 + updates = OrderedDict([
  107 + (self.W_h_y, self.W_h_y-learning_rate*T.grad(cost, self.W_h_y)),
  108 + (self.W_e_h, self.W_e_h-learning_rate*T.grad(cost, self.W_e_h)),
  109 + (self.W_sh_h, self.W_sh_h-learning_rate*T.grad(cost, self.W_sh_h)),
  110 + (self.emb, self.emb-learning_rate*T.grad(cost, self.emb)), #updated_current_emb), #
  111 + (self.b_y, self.b_y-learning_rate*T.grad(cost,self.b_y))
  112 + ])
  113 +
  114 + self.train = theano.function( inputs = [words, children_positions, y, words_indexes, learning_rate],
  115 + outputs = [],
  116 + updates = updates,
  117 + allow_input_downcast=True,
  118 + mode='FAST_RUN'
  119 + )
  120 +
  121 +
  122 + def one_step_classify(word_id, word_children_positions, i, hidden_states):
  123 +
  124 + schh = hidden_states[-1] #+ 0.5# czyli wektor zerowy
  125 +
  126 + tmp = word_children_positions>=0.0
  127 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  128 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  129 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  130 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  131 + schh = schh/number_of_children
  132 +
  133 + h = T.tanh(T.concatenate([T.dot(self.emb[word_id],self.W_e_h), T.dot(schh,self.W_sh_h)]) )
  134 +
  135 + current_hidden_state = hidden_states[i]
  136 + hidden_states_new = T.set_subtensor(current_hidden_state, h)
  137 +
  138 + y_prob = T.nnet.softmax(T.dot(h,self.W_h_y) + self.b_y)[0]
  139 +
  140 + return y_prob, hidden_states_new
  141 +
  142 +
  143 + [y_probs_classify, hidden_states ], _ = theano.scan(
  144 + fn=one_step_classify,
  145 + sequences = [words, children_positions,words_indexes],
  146 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,ne+nchd), dtype = theano.config.floatX))])
  147 +
  148 + predictions, _ = theano.scan(lambda i: T.argmax(y_probs_classify[i]),
  149 + sequences = [words_indexes])
  150 +
  151 + self.classify = theano.function(inputs=[words,children_positions,words_indexes],
  152 + outputs=predictions,
  153 + allow_input_downcast=True,
  154 + mode='FAST_RUN'
  155 + )
  156 +
  157 +
  158 +
  159 +class model55_pf2(object):
  160 + def __init__(self, ne, nchd, nh2, nc, w2v_model_path, max_phrase_length):
  161 + '''
  162 + nh :: dimension of hidden state
  163 + nc :: number of classes
  164 + ne :: number of word embeddings in the vocabulary
  165 + de :: dimension of the word embeddings
  166 + ds :: dimension of the sentiment state
  167 + '''
  168 + self.max_phrase_length = max_phrase_length
  169 + w2vecs = pickle.load(open(w2v_model_path,"r"))
  170 + self.emb = theano.shared(w2vecs["vectors"].astype(theano.config.floatX))
  171 + self.words2ids = w2vecs["words2ids"]
  172 +
  173 + #ne = len(w2vecs["words2ids"])
  174 + de = w2vecs["vectors"].shape[1]
  175 + del w2vecs
  176 +
  177 + r = 0.05
  178 +
  179 + self.W_e_h = theano.shared(r * np.random.uniform(-1.0, 1.0, (de, ne)).astype(theano.config.floatX))
  180 +
  181 + self.W_sh_h = theano.shared(r * np.random.uniform(-1.0, 1.0, (ne+nchd, nchd)).astype(theano.config.floatX))
  182 +
  183 + self.W_h_h2 = theano.shared(r * np.random.uniform(-1.0, 1.0, (ne+nchd, nh2)).astype(theano.config.floatX))
  184 + self.W_h2_y = theano.shared(r * np.random.uniform(-1.0, 1.0, (nh2, nc)).astype(theano.config.floatX))
  185 +
  186 + self.b_y = theano.shared(r * np.random.uniform(-1.0, 1.0, nc).astype(theano.config.floatX))
  187 +
  188 +
  189 + def one_step(word_id, word_children_positions, y_true, i, hidden_states, learning_rate):
  190 +
  191 + schh = hidden_states[-1] #+ 0.5 # czyli wektor zerowy
  192 +
  193 + tmp = word_children_positions>=0.0
  194 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  195 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  196 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  197 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  198 + schh = schh/number_of_children
  199 +
  200 + h = T.tanh(T.concatenate([T.dot(self.emb[word_id],self.W_e_h), T.dot(schh,self.W_sh_h)]))
  201 + #h = T.nnet.sigmoid(T.dot(self.emb[word_id],self.W_eh) + T.dot(schh,self.W_shsh) + self.bh)
  202 +
  203 + current_hidden_state = hidden_states[i]
  204 + hidden_states_new = T.set_subtensor(current_hidden_state, h)
  205 +
  206 + h2 = T.tanh(T.dot(h, self.W_h_h2))
  207 +
  208 + y_prob = T.nnet.softmax(T.dot(h2,self.W_h2_y) + self.b_y)[0]
  209 +
  210 + cross_entropy = -T.log(y_prob[y_true]) # + norm_coefficient * l2_norm
  211 +
  212 + return cross_entropy, hidden_states_new
  213 +
  214 +
  215 + y = T.vector('y',dtype=dataType)
  216 + learning_rate = T.scalar('lr',dtype=theano.config.floatX)
  217 + words = T.vector(dtype=dataType)
  218 + children_positions = T.matrix(dtype=dataType)
  219 + words_indexes = T.vector(dtype=dataType)
  220 +
  221 + cross_entropy_vector, _ = theano.scan(fn=one_step, \
  222 + sequences = [words, children_positions,y,words_indexes],
  223 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,ne+nchd), dtype = theano.config.floatX))],
  224 + non_sequences = learning_rate,
  225 + n_steps = words.shape[0])
  226 + cost = T.sum(cross_entropy_vector[0])
  227 +
  228 + updates = OrderedDict([
  229 + (self.W_h_h2, self.W_h_h2-learning_rate*T.grad(cost, self.W_h_h2)),
  230 + (self.W_h2_y, self.W_h2_y-learning_rate*T.grad(cost, self.W_h2_y)),
  231 + (self.W_e_h, self.W_e_h-learning_rate*T.grad(cost, self.W_e_h)),
  232 + (self.W_sh_h, self.W_sh_h-learning_rate*T.grad(cost, self.W_sh_h)),
  233 + (self.emb, self.emb-learning_rate*T.grad(cost, self.emb)), #updated_current_emb), #
  234 + (self.b_y, self.b_y-learning_rate*T.grad(cost,self.b_y))
  235 + ])
  236 +
  237 + self.train = theano.function( inputs = [words, children_positions, y, words_indexes, learning_rate],
  238 + outputs = [],
  239 + updates = updates,
  240 + allow_input_downcast=True,
  241 + mode='FAST_RUN'
  242 + )
  243 +
  244 +
  245 + def one_step_classify(word_id, word_children_positions, i, hidden_states):
  246 +
  247 + schh = hidden_states[-1] #+ 0.5# czyli wektor zerowy
  248 +
  249 + tmp = word_children_positions>=0.0
  250 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  251 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  252 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  253 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  254 + schh = schh/number_of_children
  255 +
  256 + h = T.tanh(T.concatenate([T.dot(self.emb[word_id],self.W_e_h), T.dot(schh,self.W_sh_h)]))
  257 +
  258 + current_hidden_state = hidden_states[i]
  259 + hidden_states_new = T.set_subtensor(current_hidden_state, h)
  260 +
  261 + h2 = T.tanh(T.dot(h, self.W_h_h2))
  262 +
  263 + y_prob = T.nnet.softmax(T.dot(h2,self.W_h2_y) + self.b_y)[0]
  264 +
  265 + return y_prob, hidden_states_new
  266 +
  267 +
  268 + [y_probs_classify, hidden_states ], _ = theano.scan(
  269 + fn=one_step_classify,
  270 + sequences = [words, children_positions,words_indexes],
  271 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,ne+nchd), dtype = theano.config.floatX))])
  272 +
  273 + predictions, _ = theano.scan(lambda i: T.argmax(y_probs_classify[i]),
  274 + sequences = [words_indexes])
  275 +
  276 + self.classify = theano.function(inputs=[words,children_positions,words_indexes],
  277 + outputs=predictions,
  278 + allow_input_downcast=True,
  279 + mode='FAST_RUN'
  280 + )
  281 +
  282 +
  283 +
  284 +
  285 +class model55_pf3(object):
  286 + def __init__(self, ne, nchd, nh2, nh3, nc, w2v_model_path, max_phrase_length):
  287 + '''
  288 + nh :: dimension of hidden state
  289 + nc :: number of classes
  290 + ne :: number of word embeddings in the vocabulary
  291 + de :: dimension of the word embeddings
  292 + ds :: dimension of the sentiment state
  293 + '''
  294 + self.max_phrase_length = max_phrase_length
  295 + w2vecs = pickle.load(open(w2v_model_path,"r"))
  296 + self.emb = theano.shared(w2vecs["vectors"].astype(theano.config.floatX))
  297 + self.words2ids = w2vecs["words2ids"]
  298 +
  299 + #ne = len(w2vecs["words2ids"])
  300 + de = w2vecs["vectors"].shape[1]
  301 + del w2vecs
  302 +
  303 + r = 0.05
  304 + self.W_e_h = theano.shared(r * np.random.uniform(-1.0, 1.0, (de, ne)).astype(theano.config.floatX))
  305 + self.W_sh_h = theano.shared(r * np.random.uniform(-1.0, 1.0, (ne+nchd, nchd)).astype(theano.config.floatX))
  306 +
  307 + self.W_h_h2 = theano.shared(r * np.random.uniform(-1.0, 1.0, (ne+nchd, nh2)).astype(theano.config.floatX))
  308 + self.W_h2_h3 = theano.shared(r * np.random.uniform(-1.0, 1.0, (nh2, nh3)).astype(theano.config.floatX))
  309 + self.W_h3_y = theano.shared(r * np.random.uniform(-1.0, 1.0, (nh3, nc)).astype(theano.config.floatX))
  310 +
  311 +
  312 + self.b_y = theano.shared(r * np.random.uniform(-1.0, 1.0, nc).astype(theano.config.floatX))
  313 +
  314 +
  315 + def one_step(word_id, word_children_positions, y_true, i, hidden_states, learning_rate):
  316 +
  317 + schh = hidden_states[-1] #+ 0.5 # czyli wektor zerowy
  318 +
  319 + tmp = word_children_positions>=0.0
  320 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  321 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  322 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  323 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  324 + schh = schh/number_of_children
  325 +
  326 + h = T.tanh(T.concatenate([T.dot(self.emb[word_id],self.W_e_h), T.dot(schh,self.W_sh_h)]))
  327 + #h = T.nnet.sigmoid(T.dot(self.emb[word_id],self.W_eh) + T.dot(schh,self.W_shsh) + self.bh)
  328 +
  329 + current_hidden_state = hidden_states[i]
  330 + hidden_states_new = T.set_subtensor(current_hidden_state, h)
  331 +
  332 + h2 = T.tanh(T.dot(h, self.W_h_h2))
  333 +
  334 + h3 = T.tanh(T.dot(h2, self.W_h2_h3))
  335 +
  336 + y_prob = T.nnet.softmax(T.dot(h3,self.W_h3_y) + self.b_y)[0]
  337 +
  338 + #l2_norm = T.sum(self.W_h_h2**2) + T.sum(self.W_h2_y**2) + T.sum(self.W_e_h**2) + T.sum(self.W_sh_h**2) + T.sum(self.emb**2) + T.sum(self.b_h**2) + T.sum(self.b_y**2)
  339 +
  340 + cross_entropy = -T.log(y_prob[y_true]) # + norm_coefficient * l2_norm
  341 +
  342 + return cross_entropy, hidden_states_new
  343 +
  344 +
  345 + y = T.vector('y',dtype=dataType)
  346 + learning_rate = T.scalar('lr',dtype=theano.config.floatX)
  347 + words = T.vector(dtype=dataType)
  348 + children_positions = T.matrix(dtype=dataType)
  349 + words_indexes = T.vector(dtype=dataType)
  350 +
  351 + cross_entropy_vector, _ = theano.scan(fn=one_step, \
  352 + sequences = [words, children_positions,y,words_indexes],
  353 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,ne+nchd), dtype = theano.config.floatX))],
  354 + non_sequences = learning_rate,
  355 + n_steps = words.shape[0])
  356 + cost = T.sum(cross_entropy_vector[0])
  357 +
  358 + updates = OrderedDict([
  359 + (self.W_h_h2, self.W_h_h2-learning_rate*T.grad(cost, self.W_h_h2)),
  360 + (self.W_h2_h3, self.W_h2_h3-learning_rate*T.grad(cost, self.W_h2_h3)),
  361 + (self.W_h3_y, self.W_h3_y-learning_rate*T.grad(cost, self.W_h3_y)),
  362 + (self.W_e_h, self.W_e_h-learning_rate*T.grad(cost, self.W_e_h)),
  363 + (self.W_sh_h, self.W_sh_h-learning_rate*T.grad(cost, self.W_sh_h)),
  364 + (self.emb, self.emb-learning_rate*T.grad(cost, self.emb)), #updated_current_emb), #
  365 + (self.b_y, self.b_y-learning_rate*T.grad(cost,self.b_y))
  366 + ])
  367 +
  368 + self.train = theano.function( inputs = [words, children_positions, y, words_indexes, learning_rate],
  369 + outputs = [],
  370 + updates = updates,
  371 + allow_input_downcast=True,
  372 + mode='FAST_RUN'
  373 + )
  374 +
  375 +
  376 + def one_step_classify(word_id, word_children_positions, i, hidden_states):
  377 +
  378 + schh = hidden_states[-1] #+ 0.5# czyli wektor zerowy
  379 +
  380 + tmp = word_children_positions>=0.0
  381 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  382 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  383 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  384 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  385 + schh = schh/number_of_children
  386 +
  387 + h = T.tanh(T.concatenate([T.dot(self.emb[word_id],self.W_e_h), T.dot(schh,self.W_sh_h)]) )
  388 +
  389 + current_hidden_state = hidden_states[i]
  390 + hidden_states_new = T.set_subtensor(current_hidden_state, h)
  391 +
  392 + h2 = T.tanh(T.dot(h, self.W_h_h2))
  393 +
  394 + h3 = T.tanh(T.dot(h2, self.W_h2_h3))
  395 +
  396 + y_prob = T.nnet.softmax(T.dot(h3,self.W_h3_y) + self.b_y)[0]
  397 +
  398 + return y_prob, hidden_states_new
  399 +
  400 +
  401 + [y_probs_classify, hidden_states ], _ = theano.scan(
  402 + fn=one_step_classify,
  403 + sequences = [words, children_positions,words_indexes],
  404 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,ne+nchd), dtype = theano.config.floatX))])
  405 +
  406 + predictions, _ = theano.scan(lambda i: T.argmax(y_probs_classify[i]),
  407 + sequences = [words_indexes])
  408 +
  409 + self.classify = theano.function(inputs=[words,children_positions,words_indexes],
  410 + outputs=predictions,
  411 + allow_input_downcast=True,
  412 + mode='FAST_RUN'
  413 + )
  414 +
  415 +
  416 +
  417 +
  418 +
  419 +
  420 +class model55_pf4(object):
  421 + def __init__(self, neh, ne, nchd, nc, w2v_model_path, max_phrase_length):
  422 + '''
  423 + nh :: dimension of hidden state
  424 + nc :: number of classes
  425 + ne :: number of word embeddings in the vocabulary
  426 + de :: dimension of the word embeddings
  427 + ds :: dimension of the sentiment state
  428 + '''
  429 + self.max_phrase_length = max_phrase_length
  430 + w2vecs = pickle.load(open(w2v_model_path,"r"))
  431 + self.emb = theano.shared(w2vecs["vectors"].astype(theano.config.floatX))
  432 + self.words2ids = w2vecs["words2ids"]
  433 +
  434 + de = w2vecs["vectors"].shape[1]
  435 + del w2vecs
  436 +
  437 + r = 0.05
  438 + self.W_e_eh = theano.shared(r * np.random.uniform(-1.0, 1.0, (de, neh)).astype(theano.config.floatX))
  439 +
  440 + self.W_eh_h = theano.shared(r * np.random.uniform(-1.0, 1.0, (neh, ne)).astype(theano.config.floatX))
  441 +
  442 + self.W_h_y = theano.shared(r * np.random.uniform(-1.0, 1.0, (ne+nchd, nc)).astype(theano.config.floatX))
  443 +
  444 + self.W_sh_h = theano.shared(r * np.random.uniform(-1.0, 1.0, (ne+nchd, nchd)).astype(theano.config.floatX))
  445 +
  446 + self.b_y = theano.shared(r * np.random.uniform(-1.0, 1.0, nc).astype(theano.config.floatX))
  447 +
  448 +
  449 + def one_step(word_id, word_children_positions, y_true, i, hidden_states, learning_rate):
  450 +
  451 + schh = hidden_states[-1] #+ 0.5 # czyli wektor zerowy
  452 +
  453 + tmp = word_children_positions>=0.0
  454 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  455 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  456 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  457 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  458 + schh = schh/number_of_children
  459 +
  460 + eh = T.tanh(T.dot(self.emb[word_id],self.W_e_eh))
  461 +
  462 + h = T.tanh(T.concatenate([T.dot(eh, self.W_eh_h), T.dot(schh,self.W_sh_h)]))
  463 +
  464 + current_hidden_state = hidden_states[i]
  465 + hidden_states_new = T.set_subtensor(current_hidden_state, h)
  466 +
  467 + y_prob = T.nnet.softmax(T.dot(h,self.W_h_y) + self.b_y)[0]
  468 +
  469 + #l2_norm = T.sum(self.W_h_h2**2) + T.sum(self.W_h2_y**2) + T.sum(self.W_e_h**2) + T.sum(self.W_sh_h**2) + T.sum(self.emb**2) + T.sum(self.b_h**2) + T.sum(self.b_y**2)
  470 +
  471 + cross_entropy = -T.log(y_prob[y_true]) # + norm_coefficient * l2_norm
  472 +
  473 + return cross_entropy, hidden_states_new
  474 +
  475 +
  476 + y = T.vector('y',dtype=dataType)
  477 + learning_rate = T.scalar('lr',dtype=theano.config.floatX)
  478 + words = T.vector(dtype=dataType)
  479 + children_positions = T.matrix(dtype=dataType)
  480 + words_indexes = T.vector(dtype=dataType)
  481 +
  482 + cross_entropy_vector, _ = theano.scan(fn=one_step, \
  483 + sequences = [words, children_positions,y,words_indexes],
  484 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,ne+nchd), dtype = theano.config.floatX))],
  485 + non_sequences = learning_rate,
  486 + n_steps = words.shape[0])
  487 + cost = T.sum(cross_entropy_vector[0])
  488 +
  489 + updates = OrderedDict([
  490 + (self.W_h_y, self.W_h_y-learning_rate*T.grad(cost, self.W_h_y)),
  491 + (self.W_e_eh, self.W_e_eh-learning_rate*T.grad(cost, self.W_e_eh)),
  492 + (self.W_eh_h, self.W_eh_h-learning_rate*T.grad(cost, self.W_eh_h)),
  493 + (self.W_sh_h, self.W_sh_h-learning_rate*T.grad(cost, self.W_sh_h)),
  494 + (self.emb, self.emb-learning_rate*T.grad(cost, self.emb)), #updated_current_emb), #
  495 + (self.b_y, self.b_y-learning_rate*T.grad(cost,self.b_y))
  496 + ])
  497 +
  498 + self.train = theano.function( inputs = [words, children_positions, y, words_indexes, learning_rate],
  499 + outputs = [],
  500 + updates = updates,
  501 + allow_input_downcast=True,
  502 + mode='FAST_RUN'
  503 + )
  504 +
  505 +
  506 + def one_step_classify(word_id, word_children_positions, i, hidden_states):
  507 +
  508 + schh = hidden_states[-1] #+ 0.5# czyli wektor zerowy
  509 +
  510 + tmp = word_children_positions>=0.0
  511 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  512 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  513 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  514 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  515 + schh = schh/number_of_children
  516 +
  517 + eh = T.tanh(T.dot(self.emb[word_id],self.W_e_eh))
  518 +
  519 + h = T.tanh(T.concatenate([T.dot(eh, self.W_eh_h), T.dot(schh,self.W_sh_h)]))
  520 +
  521 + current_hidden_state = hidden_states[i]
  522 + hidden_states_new = T.set_subtensor(current_hidden_state, h)
  523 +
  524 + y_prob = T.nnet.softmax(T.dot(h,self.W_h_y) + self.b_y)[0]
  525 +
  526 + return y_prob, hidden_states_new
  527 +
  528 +
  529 + [y_probs_classify, hidden_states ], _ = theano.scan(
  530 + fn=one_step_classify,
  531 + sequences = [words, children_positions,words_indexes],
  532 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,ne+nchd), dtype = theano.config.floatX))])
  533 +
  534 + predictions, _ = theano.scan(lambda i: T.argmax(y_probs_classify[i]),
  535 + sequences = [words_indexes])
  536 +
  537 + self.classify = theano.function(inputs=[words,children_positions,words_indexes],
  538 + outputs=predictions,
  539 + allow_input_downcast=True,
  540 + mode='FAST_RUN'
  541 + )
  542 +
  543 +
  544 +
  545 +
  546 +
  547 +
  548 +class model55_pf5(object):
  549 + def __init__(self, ne, nshh, nchd, nc, w2v_model_path, max_phrase_length):
  550 + '''
  551 + nh :: dimension of hidden state
  552 + nc :: number of classes
  553 + ne :: number of word embeddings in the vocabulary
  554 + de :: dimension of the word embeddings
  555 + ds :: dimension of the sentiment state
  556 + '''
  557 + self.max_phrase_length = max_phrase_length
  558 + w2vecs = pickle.load(open(w2v_model_path,"r"))
  559 + self.emb = theano.shared(w2vecs["vectors"].astype(theano.config.floatX))
  560 + self.words2ids = w2vecs["words2ids"]
  561 +
  562 + #ne = len(w2vecs["words2ids"])
  563 + de = w2vecs["vectors"].shape[1]
  564 + del w2vecs
  565 +
  566 + r = 0.05
  567 + self.W_e_h = theano.shared(r * np.random.uniform(-1.0, 1.0, (de, ne)).astype(theano.config.floatX))
  568 +
  569 +
  570 + self.W_h_y = theano.shared(r * np.random.uniform(-1.0, 1.0, (ne+nchd, nc)).astype(theano.config.floatX))
  571 +
  572 + self.W_sh_shh = theano.shared(r * np.random.uniform(-1.0, 1.0, (ne+nchd, nshh)).astype(theano.config.floatX))
  573 +
  574 + self.W_shh_h = theano.shared(r * np.random.uniform(-1.0, 1.0, (nshh, nchd)).astype(theano.config.floatX))
  575 +
  576 +
  577 + self.b_y = theano.shared(r * np.random.uniform(-1.0, 1.0, nc).astype(theano.config.floatX))
  578 +
  579 +
  580 + def one_step(word_id, word_children_positions, y_true, i, hidden_states, learning_rate):
  581 +
  582 + schh = hidden_states[-1] #+ 0.5 # czyli wektor zerowy
  583 +
  584 + tmp = word_children_positions>=0.0
  585 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  586 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  587 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  588 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  589 + schh = schh/number_of_children
  590 +
  591 + shh = T.tanh(T.dot(schh,self.W_sh_shh))
  592 +
  593 + h = T.tanh(T.concatenate([T.dot(self.emb[word_id], self.W_e_h), T.dot(shh,self.W_shh_h)]))
  594 +
  595 + current_hidden_state = hidden_states[i]
  596 + hidden_states_new = T.set_subtensor(current_hidden_state, h)
  597 +
  598 + y_prob = T.nnet.softmax(T.dot(h,self.W_h_y) + self.b_y)[0]
  599 +
  600 + #l2_norm = T.sum(self.W_h_h2**2) + T.sum(self.W_h2_y**2) + T.sum(self.W_e_h**2) + T.sum(self.W_sh_h**2) + T.sum(self.emb**2) + T.sum(self.b_h**2) + T.sum(self.b_y**2)
  601 +
  602 + cross_entropy = -T.log(y_prob[y_true]) # + norm_coefficient * l2_norm
  603 +
  604 + return cross_entropy, hidden_states_new
  605 +
  606 +
  607 + y = T.vector('y',dtype=dataType)
  608 + learning_rate = T.scalar('lr',dtype=theano.config.floatX)
  609 + words = T.vector(dtype=dataType)
  610 + children_positions = T.matrix(dtype=dataType)
  611 + words_indexes = T.vector(dtype=dataType)
  612 +
  613 + cross_entropy_vector, _ = theano.scan(fn=one_step, \
  614 + sequences = [words, children_positions,y,words_indexes],
  615 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,ne+nchd), dtype = theano.config.floatX))],
  616 + non_sequences = learning_rate,
  617 + n_steps = words.shape[0])
  618 + cost = T.sum(cross_entropy_vector[0])
  619 +
  620 + updates = OrderedDict([
  621 + (self.W_h_y, self.W_h_y-learning_rate*T.grad(cost, self.W_h_y)),
  622 + (self.W_e_h, self.W_e_h-learning_rate*T.grad(cost, self.W_e_h)),
  623 + (self.W_shh_h, self.W_shh_h-learning_rate*T.grad(cost, self.W_shh_h)),
  624 + (self.W_sh_shh, self.W_sh_shh-learning_rate*T.grad(cost, self.W_sh_shh)),
  625 + (self.emb, self.emb-learning_rate*T.grad(cost, self.emb)), #updated_current_emb), #
  626 + (self.b_y, self.b_y-learning_rate*T.grad(cost,self.b_y))
  627 + ])
  628 +
  629 + self.train = theano.function( inputs = [words, children_positions, y, words_indexes, learning_rate],
  630 + outputs = [],
  631 + updates = updates,
  632 + allow_input_downcast=True,
  633 + mode='FAST_RUN'
  634 + )
  635 +
  636 +
  637 + def one_step_classify(word_id, word_children_positions, i, hidden_states):
  638 +
  639 + schh = hidden_states[-1] #+ 0.5# czyli wektor zerowy
  640 +
  641 + tmp = word_children_positions>=0.0
  642 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  643 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  644 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  645 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  646 + schh = schh/number_of_children
  647 +
  648 + shh = T.tanh(T.dot(schh,self.W_sh_shh))
  649 +
  650 + h = T.tanh(T.concatenate([T.dot(self.emb[word_id], self.W_e_h), T.dot(shh,self.W_shh_h)]))
  651 +
  652 +
  653 + current_hidden_state = hidden_states[i]
  654 + hidden_states_new = T.set_subtensor(current_hidden_state, h)
  655 +
  656 + y_prob = T.nnet.softmax(T.dot(h,self.W_h_y) + self.b_y)[0]
  657 +
  658 + return y_prob, hidden_states_new
  659 +
  660 +
  661 + [y_probs_classify, hidden_states ], _ = theano.scan(
  662 + fn=one_step_classify,
  663 + sequences = [words, children_positions,words_indexes],
  664 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,ne+nchd), dtype = theano.config.floatX))])
  665 +
  666 + predictions, _ = theano.scan(lambda i: T.argmax(y_probs_classify[i]),
  667 + sequences = [words_indexes])
  668 +
  669 + self.classify = theano.function(inputs=[words,children_positions,words_indexes],
  670 + outputs=predictions,
  671 + allow_input_downcast=True,
  672 + mode='FAST_RUN'
  673 + )
  674 +
  675 +
  676 +
  677 +
  678 +class model55_pf6(object):
  679 + def __init__(self, neh, ne, nshh, nchd, nc, w2v_model_path, max_phrase_length):
  680 + '''
  681 + nh :: dimension of hidden state
  682 + nc :: number of classes
  683 + ne :: number of word embeddings in the vocabulary
  684 + de :: dimension of the word embeddings
  685 + ds :: dimension of the sentiment state
  686 + '''
  687 + self.max_phrase_length = max_phrase_length
  688 + w2vecs = pickle.load(open(w2v_model_path,"r"))
  689 + self.emb = theano.shared(w2vecs["vectors"].astype(theano.config.floatX))
  690 + self.words2ids = w2vecs["words2ids"]
  691 +
  692 + #ne = len(w2vecs["words2ids"])
  693 + de = w2vecs["vectors"].shape[1]
  694 + del w2vecs
  695 +
  696 + r = 0.05
  697 + self.W_e_eh = theano.shared(r * np.random.uniform(-1.0, 1.0, (de, neh)).astype(theano.config.floatX))
  698 +
  699 + self.W_eh_h = theano.shared(r * np.random.uniform(-1.0, 1.0, (neh, ne)).astype(theano.config.floatX))
  700 +
  701 + self.W_h_y = theano.shared(r * np.random.uniform(-1.0, 1.0, (ne+nchd, nc)).astype(theano.config.floatX))
  702 +
  703 + self.W_sh_shh = theano.shared(r * np.random.uniform(-1.0, 1.0, (ne+nchd, nshh)).astype(theano.config.floatX))
  704 +
  705 + self.W_shh_h = theano.shared(r * np.random.uniform(-1.0, 1.0, (nshh, nchd)).astype(theano.config.floatX))
  706 +
  707 +
  708 + self.b_y = theano.shared(r * np.random.uniform(-1.0, 1.0, nc).astype(theano.config.floatX))
  709 +
  710 +
  711 + def one_step(word_id, word_children_positions, y_true, i, hidden_states, learning_rate):
  712 +
  713 + schh = hidden_states[-1] #+ 0.5 # czyli wektor zerowy
  714 +
  715 + tmp = word_children_positions>=0.0
  716 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  717 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  718 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  719 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  720 + schh = schh/number_of_children
  721 +
  722 + eh = T.tanh(T.dot(self.emb[word_id],self.W_e_eh))
  723 + shh = T.tanh(T.dot(schh,self.W_sh_shh))
  724 +
  725 + h = T.tanh(T.concatenate([T.dot(eh, self.W_eh_h), T.dot(shh,self.W_shh_h)]))
  726 +
  727 + current_hidden_state = hidden_states[i]
  728 + hidden_states_new = T.set_subtensor(current_hidden_state, h)
  729 +
  730 + y_prob = T.nnet.softmax(T.dot(h,self.W_h_y) + self.b_y)[0]
  731 +
  732 + #l2_norm = T.sum(self.W_h_h2**2) + T.sum(self.W_h2_y**2) + T.sum(self.W_e_h**2) + T.sum(self.W_sh_h**2) + T.sum(self.emb**2) + T.sum(self.b_h**2) + T.sum(self.b_y**2)
  733 +
  734 + cross_entropy = -T.log(y_prob[y_true]) # + norm_coefficient * l2_norm
  735 +
  736 + return cross_entropy, hidden_states_new
  737 +
  738 +
  739 + y = T.vector('y',dtype=dataType)
  740 + learning_rate = T.scalar('lr',dtype=theano.config.floatX)
  741 + words = T.vector(dtype=dataType)
  742 + children_positions = T.matrix(dtype=dataType)
  743 + words_indexes = T.vector(dtype=dataType)
  744 +
  745 + cross_entropy_vector, _ = theano.scan(fn=one_step, \
  746 + sequences = [words, children_positions,y,words_indexes],
  747 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,ne+nchd), dtype = theano.config.floatX))],
  748 + non_sequences = learning_rate,
  749 + n_steps = words.shape[0])
  750 + cost = T.sum(cross_entropy_vector[0])
  751 +
  752 + updates = OrderedDict([
  753 + (self.W_h_y, self.W_h_y-learning_rate*T.grad(cost, self.W_h_y)),
  754 + (self.W_e_eh, self.W_e_eh-learning_rate*T.grad(cost, self.W_e_eh)),
  755 + (self.W_eh_h, self.W_eh_h-learning_rate*T.grad(cost, self.W_eh_h)),
  756 + (self.W_shh_h, self.W_shh_h-learning_rate*T.grad(cost, self.W_shh_h)),
  757 + (self.W_sh_shh, self.W_sh_shh-learning_rate*T.grad(cost, self.W_sh_shh)),
  758 + (self.emb, self.emb-learning_rate*T.grad(cost, self.emb)), #updated_current_emb), #
  759 + (self.b_y, self.b_y-learning_rate*T.grad(cost,self.b_y))
  760 + ])
  761 +
  762 + self.train = theano.function( inputs = [words, children_positions, y, words_indexes, learning_rate],
  763 + outputs = [],
  764 + updates = updates,
  765 + allow_input_downcast=True,
  766 + mode='FAST_RUN'
  767 + )
  768 +
  769 +
  770 + def one_step_classify(word_id, word_children_positions, i, hidden_states):
  771 +
  772 + schh = hidden_states[-1] #+ 0.5# czyli wektor zerowy
  773 +
  774 + tmp = word_children_positions>=0.0
  775 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  776 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  777 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  778 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  779 + schh = schh/number_of_children
  780 +
  781 + eh = T.tanh(T.dot(self.emb[word_id],self.W_e_eh))
  782 + shh = T.tanh(T.dot(schh,self.W_sh_shh))
  783 +
  784 + h = T.tanh(T.concatenate([T.dot(eh, self.W_eh_h), T.dot(shh,self.W_shh_h)]))
  785 +
  786 +
  787 + current_hidden_state = hidden_states[i]
  788 + hidden_states_new = T.set_subtensor(current_hidden_state, h)
  789 +
  790 + y_prob = T.nnet.softmax(T.dot(h,self.W_h_y) + self.b_y)[0]
  791 +
  792 + return y_prob, hidden_states_new
  793 +
  794 +
  795 + [y_probs_classify, hidden_states ], _ = theano.scan(
  796 + fn=one_step_classify,
  797 + sequences = [words, children_positions,words_indexes],
  798 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,ne+nchd), dtype = theano.config.floatX))])
  799 +
  800 + predictions, _ = theano.scan(lambda i: T.argmax(y_probs_classify[i]),
  801 + sequences = [words_indexes])
  802 +
  803 + self.classify = theano.function(inputs=[words,children_positions,words_indexes],
  804 + outputs=predictions,
  805 + allow_input_downcast=True,
  806 + mode='FAST_RUN'
  807 + )
  808 +
  809 +
  810 +
  811 +
  812 +
  813 +class model55_pf7(object):
  814 + def __init__(self, neh, ne, nshh, nchd, nh2, nc, w2v_model_path, max_phrase_length):
  815 + '''
  816 + nh :: dimension of hidden state
  817 + nc :: number of classes
  818 + ne :: number of word embeddings in the vocabulary
  819 + de :: dimension of the word embeddings
  820 + ds :: dimension of the sentiment state
  821 + '''
  822 + self.max_phrase_length = max_phrase_length
  823 + w2vecs = pickle.load(open(w2v_model_path,"r"))
  824 + self.emb = theano.shared(w2vecs["vectors"].astype(theano.config.floatX))
  825 + self.words2ids = w2vecs["words2ids"]
  826 +
  827 + #ne = len(w2vecs["words2ids"])
  828 + de = w2vecs["vectors"].shape[1]
  829 + del w2vecs
  830 +
  831 + r = 0.05
  832 + self.W_e_eh = theano.shared(r * np.random.uniform(-1.0, 1.0, (de, neh)).astype(theano.config.floatX))
  833 +
  834 + self.W_eh_h = theano.shared(r * np.random.uniform(-1.0, 1.0, (neh, ne)).astype(theano.config.floatX))
  835 +
  836 + self.W_h_h2 = theano.shared(r * np.random.uniform(-1.0, 1.0, (ne+nchd, nh2)).astype(theano.config.floatX))
  837 + self.W_h2_y = theano.shared(r * np.random.uniform(-1.0, 1.0, (nh2, nc)).astype(theano.config.floatX))
  838 +
  839 + self.W_sh_shh = theano.shared(r * np.random.uniform(-1.0, 1.0, (ne+nchd, nshh)).astype(theano.config.floatX))
  840 +
  841 + self.W_shh_h = theano.shared(r * np.random.uniform(-1.0, 1.0, (nshh, nchd)).astype(theano.config.floatX))
  842 +
  843 +
  844 + self.b_y = theano.shared(r * np.random.uniform(-1.0, 1.0, nc).astype(theano.config.floatX))
  845 +
  846 +
  847 + def one_step(word_id, word_children_positions, y_true, i, hidden_states, learning_rate):
  848 +
  849 + schh = hidden_states[-1] #+ 0.5 # czyli wektor zerowy
  850 +
  851 + tmp = word_children_positions>=0.0
  852 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  853 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  854 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  855 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  856 + schh = schh/number_of_children
  857 +
  858 + eh = T.tanh(T.dot(self.emb[word_id],self.W_e_eh))
  859 + shh = T.tanh(T.dot(schh,self.W_sh_shh))
  860 +
  861 + h = T.tanh(T.concatenate([T.dot(eh, self.W_eh_h), T.dot(shh,self.W_shh_h)]))
  862 +
  863 + h2 = T.tanh(T.dot(h, self.W_h_h2))
  864 +
  865 + current_hidden_state = hidden_states[i]
  866 + hidden_states_new = T.set_subtensor(current_hidden_state, h)
  867 +
  868 + y_prob = T.nnet.softmax(T.dot(h2,self.W_h2_y) + self.b_y)[0]
  869 +
  870 + #l2_norm = T.sum(self.W_h_h2**2) + T.sum(self.W_h2_y**2) + T.sum(self.W_e_h**2) + T.sum(self.W_sh_h**2) + T.sum(self.emb**2) + T.sum(self.b_h**2) + T.sum(self.b_y**2)
  871 +
  872 + cross_entropy = -T.log(y_prob[y_true]) # + norm_coefficient * l2_norm
  873 +
  874 + return cross_entropy, hidden_states_new
  875 +
  876 +
  877 + y = T.vector('y',dtype=dataType)
  878 + learning_rate = T.scalar('lr',dtype=theano.config.floatX)
  879 + words = T.vector(dtype=dataType)
  880 + children_positions = T.matrix(dtype=dataType)
  881 + words_indexes = T.vector(dtype=dataType)
  882 +
  883 + cross_entropy_vector, _ = theano.scan(fn=one_step, \
  884 + sequences = [words, children_positions,y,words_indexes],
  885 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,ne+nchd), dtype = theano.config.floatX))],
  886 + non_sequences = learning_rate,
  887 + n_steps = words.shape[0])
  888 +
  889 + cost = T.sum(cross_entropy_vector[0])
  890 +
  891 + updates = OrderedDict([
  892 + (self.W_h_h2, self.W_h_h2-learning_rate*T.grad(cost, self.W_h_h2)),
  893 + (self.W_h2_y, self.W_h2_y-learning_rate*T.grad(cost, self.W_h2_y)),
  894 + (self.W_e_eh, self.W_e_eh-learning_rate*T.grad(cost, self.W_e_eh)),
  895 + (self.W_eh_h, self.W_eh_h-learning_rate*T.grad(cost, self.W_eh_h)),
  896 + (self.W_shh_h, self.W_shh_h-learning_rate*T.grad(cost, self.W_shh_h)),
  897 + (self.W_sh_shh, self.W_sh_shh-learning_rate*T.grad(cost, self.W_sh_shh)),
  898 + (self.emb, self.emb-learning_rate*T.grad(cost, self.emb)), #updated_current_emb), #
  899 + (self.b_y, self.b_y-learning_rate*T.grad(cost,self.b_y))
  900 + ])
  901 +
  902 + self.train = theano.function( inputs = [words, children_positions, y, words_indexes, learning_rate],
  903 + outputs = [],
  904 + updates = updates,
  905 + allow_input_downcast=True,
  906 + mode='FAST_RUN'
  907 + )
  908 +
  909 +
  910 + def one_step_classify(word_id, word_children_positions, i, hidden_states):
  911 +
  912 + schh = hidden_states[-1] #+ 0.5# czyli wektor zerowy
  913 +
  914 + tmp = word_children_positions>=0.0
  915 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  916 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  917 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  918 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  919 + schh = schh/number_of_children
  920 +
  921 + eh = T.tanh(T.dot(self.emb[word_id],self.W_e_eh))
  922 + shh = T.tanh(T.dot(schh,self.W_sh_shh))
  923 +
  924 + h = T.tanh(T.concatenate([T.dot(eh, self.W_eh_h), T.dot(shh,self.W_shh_h)]))
  925 +
  926 + h2 = T.tanh(T.dot(h, self.W_h_h2))
  927 +
  928 + current_hidden_state = hidden_states[i]
  929 + hidden_states_new = T.set_subtensor(current_hidden_state, h)
  930 +
  931 + y_prob = T.nnet.softmax(T.dot(h2,self.W_h2_y) + self.b_y)[0]
  932 +
  933 + return y_prob, hidden_states_new
  934 +
  935 +
  936 + [y_probs_classify, hidden_states ], _ = theano.scan(
  937 + fn=one_step_classify,
  938 + sequences = [words, children_positions,words_indexes],
  939 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,ne+nchd), dtype = theano.config.floatX))])
  940 +
  941 + predictions, _ = theano.scan(lambda i: T.argmax(y_probs_classify[i]),
  942 + sequences = [words_indexes])
  943 +
  944 + self.classify = theano.function(inputs=[words,children_positions,words_indexes],
  945 + outputs=predictions,
  946 + allow_input_downcast=True,
  947 + mode='FAST_RUN'
  948 + )
  949 +
  950 +
  951 +
  952 +
  953 +
  954 +class model55_pf8(object):
  955 + def __init__(self, neh, ne, nshh, nchd, nh2, nh3, nc, w2v_model_path, max_phrase_length):
  956 + '''
  957 + nh :: dimension of hidden state
  958 + nc :: number of classes
  959 + ne :: number of word embeddings in the vocabulary
  960 + de :: dimension of the word embeddings
  961 + ds :: dimension of the sentiment state
  962 + '''
  963 + self.max_phrase_length = max_phrase_length
  964 + w2vecs = pickle.load(open(w2v_model_path,"r"))
  965 + self.emb = theano.shared(w2vecs["vectors"].astype(theano.config.floatX))
  966 + self.words2ids = w2vecs["words2ids"]
  967 +
  968 + #ne = len(w2vecs["words2ids"])
  969 + de = w2vecs["vectors"].shape[1]
  970 + del w2vecs
  971 +
  972 + r = 0.05
  973 + self.W_e_eh = theano.shared(r * np.random.uniform(-1.0, 1.0, (de, neh)).astype(theano.config.floatX))
  974 +
  975 + self.W_eh_h = theano.shared(r * np.random.uniform(-1.0, 1.0, (neh, ne)).astype(theano.config.floatX))
  976 +
  977 + self.W_h_h2 = theano.shared(r * np.random.uniform(-1.0, 1.0, (ne+nchd, nh2)).astype(theano.config.floatX))
  978 + self.W_h2_h3 = theano.shared(r * np.random.uniform(-1.0, 1.0, (nh2, nh3)).astype(theano.config.floatX))
  979 + self.W_h3_y = theano.shared(r * np.random.uniform(-1.0, 1.0, (nh3, nc)).astype(theano.config.floatX))
  980 +
  981 + self.W_sh_shh = theano.shared(r * np.random.uniform(-1.0, 1.0, (ne+nchd, nshh)).astype(theano.config.floatX))
  982 +
  983 + self.W_shh_h = theano.shared(r * np.random.uniform(-1.0, 1.0, (nshh, nchd)).astype(theano.config.floatX))
  984 +
  985 + self.b_y = theano.shared(r * np.random.uniform(-1.0, 1.0, nc).astype(theano.config.floatX))
  986 +
  987 +
  988 + def one_step(word_id, word_children_positions, y_true, i, hidden_states, learning_rate):
  989 +
  990 + schh = hidden_states[-1] #+ 0.5 # czyli wektor zerowy
  991 +
  992 + tmp = word_children_positions>=0.0
  993 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  994 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  995 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  996 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  997 + schh = schh/number_of_children
  998 +
  999 + eh = T.tanh(T.dot(self.emb[word_id],self.W_e_eh))
  1000 + shh = T.tanh(T.dot(schh,self.W_sh_shh))
  1001 +
  1002 + h = T.tanh(T.concatenate([T.dot(eh, self.W_eh_h), T.dot(shh,self.W_shh_h)]))
  1003 +
  1004 + h2 = T.tanh(T.dot(h, self.W_h_h2))
  1005 +
  1006 +
  1007 + h3 = T.tanh(T.dot(h2, self.W_h2_h3))
  1008 +
  1009 + current_hidden_state = hidden_states[i]
  1010 + hidden_states_new = T.set_subtensor(current_hidden_state, h)
  1011 +
  1012 + y_prob = T.nnet.softmax(T.dot(h3,self.W_h3_y) + self.b_y)[0]
  1013 +
  1014 + #l2_norm = T.sum(self.W_h_h2**2) + T.sum(self.W_h2_y**2) + T.sum(self.W_e_h**2) + T.sum(self.W_sh_h**2) + T.sum(self.emb**2) + T.sum(self.b_h**2) + T.sum(self.b_y**2)
  1015 +
  1016 + cross_entropy = -T.log(y_prob[y_true]) # + norm_coefficient * l2_norm
  1017 +
  1018 + return cross_entropy, hidden_states_new
  1019 +
  1020 +
  1021 + y = T.vector('y',dtype=dataType)
  1022 + learning_rate = T.scalar('lr',dtype=theano.config.floatX)
  1023 + words = T.vector(dtype=dataType)
  1024 + children_positions = T.matrix(dtype=dataType)
  1025 + words_indexes = T.vector(dtype=dataType)
  1026 +
  1027 + cross_entropy_vector, _ = theano.scan(fn=one_step, \
  1028 + sequences = [words, children_positions,y,words_indexes],
  1029 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,ne+nchd), dtype = theano.config.floatX))],
  1030 + non_sequences = learning_rate,
  1031 + n_steps = words.shape[0])
  1032 +
  1033 + cost = T.sum(cross_entropy_vector[0])
  1034 +
  1035 + updates = OrderedDict([
  1036 + (self.W_h_h2, self.W_h_h2-learning_rate*T.grad(cost, self.W_h_h2)),
  1037 + (self.W_h2_h3, self.W_h2_h3-learning_rate*T.grad(cost, self.W_h2_h3)),
  1038 + (self.W_h3_y, self.W_h3_y-learning_rate*T.grad(cost, self.W_h3_y)),
  1039 + (self.W_e_eh, self.W_e_eh-learning_rate*T.grad(cost, self.W_e_eh)),
  1040 + (self.W_eh_h, self.W_eh_h-learning_rate*T.grad(cost, self.W_eh_h)),
  1041 + (self.W_shh_h, self.W_shh_h-learning_rate*T.grad(cost, self.W_shh_h)),
  1042 + (self.W_sh_shh, self.W_sh_shh-learning_rate*T.grad(cost, self.W_sh_shh)),
  1043 + (self.emb, self.emb-learning_rate*T.grad(cost, self.emb)), #updated_current_emb), #
  1044 + (self.b_y, self.b_y-learning_rate*T.grad(cost,self.b_y))
  1045 + ])
  1046 +
  1047 + self.train = theano.function( inputs = [words, children_positions, y, words_indexes, learning_rate],
  1048 + outputs = [],
  1049 + updates = updates,
  1050 + allow_input_downcast=True,
  1051 + mode='FAST_RUN'
  1052 + )
  1053 +
  1054 +
  1055 + def one_step_classify(word_id, word_children_positions, i, hidden_states):
  1056 +
  1057 + schh = hidden_states[-1] #+ 0.5# czyli wektor zerowy
  1058 +
  1059 + tmp = word_children_positions>=0.0
  1060 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  1061 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  1062 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  1063 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  1064 + schh = schh/number_of_children
  1065 +
  1066 + eh = T.tanh(T.dot(self.emb[word_id],self.W_e_eh))
  1067 + shh = T.tanh(T.dot(schh,self.W_sh_shh))
  1068 +
  1069 + h = T.tanh(T.concatenate([T.dot(eh, self.W_eh_h), T.dot(shh,self.W_shh_h)]))
  1070 +
  1071 + h2 = T.tanh(T.dot(h, self.W_h_h2))
  1072 +
  1073 + h3 = T.tanh(T.dot(h2, self.W_h2_h3))
  1074 +
  1075 + current_hidden_state = hidden_states[i]
  1076 + hidden_states_new = T.set_subtensor(current_hidden_state, h)
  1077 +
  1078 + y_prob = T.nnet.softmax(T.dot(h3,self.W_h3_y) + self.b_y)[0]
  1079 +
  1080 + return y_prob, hidden_states_new
  1081 +
  1082 +
  1083 + [y_probs_classify, hidden_states ], _ = theano.scan(
  1084 + fn=one_step_classify,
  1085 + sequences = [words, children_positions,words_indexes],
  1086 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,ne+nchd), dtype = theano.config.floatX))])
  1087 +
  1088 + predictions, _ = theano.scan(lambda i: T.argmax(y_probs_classify[i]),
  1089 + sequences = [words_indexes])
  1090 +
  1091 + self.classify = theano.function(inputs=[words,children_positions,words_indexes],
  1092 + outputs=predictions,
  1093 + allow_input_downcast=True,
  1094 + mode='FAST_RUN'
  1095 + )
  1096 +
  1097 +
  1098 +class model55_pf9(object):
  1099 + def __init__(self, ne, nchd, nh2, nh3, nc, w2v_model_path, max_phrase_length):
  1100 + '''
  1101 + nh :: dimension of hidden state
  1102 + nc :: number of classes
  1103 + ne :: number of word embeddings in the vocabulary
  1104 + de :: dimension of the word embeddings
  1105 + ds :: dimension of the sentiment state
  1106 + '''
  1107 + self.max_phrase_length = max_phrase_length
  1108 + w2vecs = pickle.load(open(w2v_model_path,"r"))
  1109 + self.emb = theano.shared(w2vecs["vectors"].astype(theano.config.floatX))
  1110 + self.words2ids = w2vecs["words2ids"]
  1111 +
  1112 + #ne = len(w2vecs["words2ids"])
  1113 + de = w2vecs["vectors"].shape[1]
  1114 + del w2vecs
  1115 +
  1116 + r = 0.05
  1117 + self.W_e_h = theano.shared(r * np.random.uniform(-1.0, 1.0, (de, ne)).astype(theano.config.floatX))
  1118 +
  1119 +
  1120 + self.W_h_h2 = theano.shared(r * np.random.uniform(-1.0, 1.0, (ne+nchd, nh2)).astype(theano.config.floatX))
  1121 + self.W_h2_h3 = theano.shared(r * np.random.uniform(-1.0, 1.0, (nh2, nh3)).astype(theano.config.floatX))
  1122 + self.W_h3_y = theano.shared(r * np.random.uniform(-1.0, 1.0, (nh3, nc)).astype(theano.config.floatX))
  1123 +
  1124 + self.W_sh_h = theano.shared(r * np.random.uniform(-1.0, 1.0, (ne+nchd, nchd)).astype(theano.config.floatX))
  1125 +
  1126 +
  1127 + self.b_y = theano.shared(r * np.random.uniform(-1.0, 1.0, nc).astype(theano.config.floatX))
  1128 +
  1129 +
  1130 + def one_step(word_id, word_children_positions, y_true, i, hidden_states, learning_rate):
  1131 +
  1132 + schh = hidden_states[-1] #+ 0.5 # czyli wektor zerowy
  1133 +
  1134 + tmp = word_children_positions>=0.0
  1135 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  1136 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  1137 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  1138 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  1139 + schh = schh/number_of_children
  1140 +
  1141 +
  1142 + h = T.tanh(T.concatenate([T.dot(self.emb[word_id],self.W_e_h), T.dot(schh,self.W_sh_h)]))
  1143 +
  1144 + h2 = T.tanh(T.dot(h, self.W_h_h2))
  1145 +
  1146 +
  1147 + h3 = T.tanh(T.dot(h2, self.W_h2_h3))
  1148 +
  1149 + current_hidden_state = hidden_states[i]
  1150 + hidden_states_new = T.set_subtensor(current_hidden_state, h)
  1151 +
  1152 + y_prob = T.nnet.softmax(T.dot(h3,self.W_h3_y) + self.b_y)[0]
  1153 +
  1154 + #l2_norm = T.sum(self.W_h_h2**2) + T.sum(self.W_h2_y**2) + T.sum(self.W_e_h**2) + T.sum(self.W_sh_h**2) + T.sum(self.emb**2) + T.sum(self.b_h**2) + T.sum(self.b_y**2)
  1155 +
  1156 + cross_entropy = -T.log(y_prob[y_true]) # + norm_coefficient * l2_norm
  1157 +
  1158 + return cross_entropy, hidden_states_new
  1159 +
  1160 +
  1161 + y = T.vector('y',dtype=dataType)
  1162 + learning_rate = T.scalar('lr',dtype=theano.config.floatX)
  1163 + words = T.vector(dtype=dataType)
  1164 + children_positions = T.matrix(dtype=dataType)
  1165 + words_indexes = T.vector(dtype=dataType)
  1166 +
  1167 + cross_entropy_vector, _ = theano.scan(fn=one_step, \
  1168 + sequences = [words, children_positions,y,words_indexes],
  1169 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,ne+nchd), dtype = theano.config.floatX))],
  1170 + non_sequences = learning_rate,
  1171 + n_steps = words.shape[0])
  1172 +
  1173 + cost = T.sum(cross_entropy_vector[0])
  1174 +
  1175 + updates = OrderedDict([
  1176 + (self.W_h_h2, self.W_h_h2-learning_rate*T.grad(cost, self.W_h_h2)),
  1177 + (self.W_h2_h3, self.W_h2_h3-learning_rate*T.grad(cost, self.W_h2_h3)),
  1178 + (self.W_h3_y, self.W_h3_y-learning_rate*T.grad(cost, self.W_h3_y)),
  1179 + (self.W_e_h, self.W_e_h-learning_rate*T.grad(cost, self.W_e_h)),
  1180 + (self.W_sh_h, self.W_sh_h-learning_rate*T.grad(cost, self.W_sh_h)),
  1181 + (self.emb, self.emb-learning_rate*T.grad(cost, self.emb)), #updated_current_emb), #
  1182 + (self.b_y, self.b_y-learning_rate*T.grad(cost,self.b_y))
  1183 + ])
  1184 +
  1185 + self.train = theano.function( inputs = [words, children_positions, y, words_indexes, learning_rate],
  1186 + outputs = [],
  1187 + updates = updates,
  1188 + allow_input_downcast=True,
  1189 + mode='FAST_RUN'
  1190 + )
  1191 +
  1192 +
  1193 + def one_step_classify(word_id, word_children_positions, i, hidden_states):
  1194 +
  1195 + schh = hidden_states[-1] #+ 0.5# czyli wektor zerowy
  1196 +
  1197 + tmp = word_children_positions>=0.0
  1198 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  1199 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  1200 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  1201 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  1202 + schh = schh/number_of_children
  1203 +
  1204 +
  1205 + h = T.tanh(T.concatenate([T.dot(self.emb[word_id],self.W_e_h), T.dot(schh,self.W_sh_h)]))
  1206 +
  1207 + h2 = T.tanh(T.dot(h, self.W_h_h2))
  1208 +
  1209 + h3 = T.tanh(T.dot(h2, self.W_h2_h3))
  1210 +
  1211 + current_hidden_state = hidden_states[i]
  1212 + hidden_states_new = T.set_subtensor(current_hidden_state, h)
  1213 +
  1214 + y_prob = T.nnet.softmax(T.dot(h3,self.W_h3_y) + self.b_y)[0]
  1215 +
  1216 + return y_prob, hidden_states_new
  1217 +
  1218 +
  1219 + [y_probs_classify, hidden_states ], _ = theano.scan(
  1220 + fn=one_step_classify,
  1221 + sequences = [words, children_positions,words_indexes],
  1222 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,ne+nchd), dtype = theano.config.floatX))])
  1223 +
  1224 + predictions, _ = theano.scan(lambda i: T.argmax(y_probs_classify[i]),
  1225 + sequences = [words_indexes])
  1226 +
  1227 + self.classify = theano.function(inputs=[words,children_positions,words_indexes],
  1228 + outputs=predictions,
  1229 + allow_input_downcast=True,
  1230 + mode='FAST_RUN'
  1231 + )
  1232 +
  1233 +
  1234 +
  1235 +class model55_pf10(object):
  1236 + def __init__(self, nchd, nh2, nc, w2v_model_path, max_phrase_length):
  1237 + '''
  1238 + nh :: dimension of hidden state
  1239 + nc :: number of classes
  1240 + ne :: number of word embeddings in the vocabulary
  1241 + de :: dimension of the word embeddings
  1242 + ds :: dimension of the sentiment state
  1243 + '''
  1244 + self.max_phrase_length = max_phrase_length
  1245 + w2vecs = pickle.load(open(w2v_model_path,"r"))
  1246 + self.emb = theano.shared(w2vecs["vectors"].astype(theano.config.floatX))
  1247 + self.words2ids = w2vecs["words2ids"]
  1248 +
  1249 + #ne = len(w2vecs["words2ids"])
  1250 + de = w2vecs["vectors"].shape[1]
  1251 + del w2vecs
  1252 +
  1253 + r = 0.05
  1254 +
  1255 + self.W_h_h2 = theano.shared(r * np.random.uniform(-1.0, 1.0, (de+nchd, nh2)).astype(theano.config.floatX))
  1256 + self.W_h2_y = theano.shared(r * np.random.uniform(-1.0, 1.0, (nh2, nc)).astype(theano.config.floatX))
  1257 +
  1258 + self.W_sh_h = theano.shared(r * np.random.uniform(-1.0, 1.0, (de+nchd, nchd)).astype(theano.config.floatX))
  1259 +
  1260 + self.b_y = theano.shared(r * np.random.uniform(-1.0, 1.0, nc).astype(theano.config.floatX))
  1261 +
  1262 +
  1263 + def one_step(word_id, word_children_positions, y_true, i, hidden_states, learning_rate):
  1264 +
  1265 + schh = hidden_states[-1] #+ 0.5 # czyli wektor zerowy
  1266 +
  1267 + tmp = word_children_positions>=0.0
  1268 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  1269 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  1270 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  1271 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  1272 + schh = schh/number_of_children
  1273 +
  1274 +
  1275 + h = T.tanh(T.concatenate([self.emb[word_id], T.dot(schh,self.W_sh_h)]))
  1276 +
  1277 + h2 = T.tanh(T.dot(h, self.W_h_h2))
  1278 +
  1279 +
  1280 + current_hidden_state = hidden_states[i]
  1281 + hidden_states_new = T.set_subtensor(current_hidden_state, h)
  1282 +
  1283 + y_prob = T.nnet.softmax(T.dot(h2,self.W_h2_y) + self.b_y)[0]
  1284 +
  1285 + #l2_norm = T.sum(self.W_h_h2**2) + T.sum(self.W_h2_y**2) + T.sum(self.W_e_h**2) + T.sum(self.W_sh_h**2) + T.sum(self.emb**2) + T.sum(self.b_h**2) + T.sum(self.b_y**2)
  1286 +
  1287 + cross_entropy = -T.log(y_prob[y_true]) # + norm_coefficient * l2_norm
  1288 +
  1289 + return cross_entropy, hidden_states_new
  1290 +
  1291 +
  1292 + y = T.vector('y',dtype=dataType)
  1293 + learning_rate = T.scalar('lr',dtype=theano.config.floatX)
  1294 + words = T.vector(dtype=dataType)
  1295 + children_positions = T.matrix(dtype=dataType)
  1296 + words_indexes = T.vector(dtype=dataType)
  1297 +
  1298 + cross_entropy_vector, _ = theano.scan(fn=one_step, \
  1299 + sequences = [words, children_positions,y,words_indexes],
  1300 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,de+nchd), dtype = theano.config.floatX))],
  1301 + non_sequences = learning_rate,
  1302 + n_steps = words.shape[0])
  1303 +
  1304 + cost = T.sum(cross_entropy_vector[0])
  1305 +
  1306 + updates = OrderedDict([
  1307 + (self.W_h_h2, self.W_h_h2-learning_rate*T.grad(cost, self.W_h_h2)),
  1308 + (self.W_h2_y, self.W_h2_y-learning_rate*T.grad(cost, self.W_h2_y)),
  1309 + (self.W_sh_h, self.W_sh_h-learning_rate*T.grad(cost, self.W_sh_h)),
  1310 + (self.emb, self.emb-learning_rate*T.grad(cost, self.emb)), #updated_current_emb), #
  1311 + (self.b_y, self.b_y-learning_rate*T.grad(cost,self.b_y))
  1312 + ])
  1313 +
  1314 + self.train = theano.function( inputs = [words, children_positions, y, words_indexes, learning_rate],
  1315 + outputs = [],
  1316 + updates = updates,
  1317 + allow_input_downcast=True,
  1318 + mode='FAST_RUN'
  1319 + )
  1320 +
  1321 +
  1322 + def one_step_classify(word_id, word_children_positions, i, hidden_states):
  1323 +
  1324 + schh = hidden_states[-1] #+ 0.5# czyli wektor zerowy
  1325 +
  1326 + tmp = word_children_positions>=0.0
  1327 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  1328 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  1329 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  1330 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  1331 + schh = schh/number_of_children
  1332 +
  1333 + h = T.tanh(T.concatenate([self.emb[word_id], T.dot(schh,self.W_sh_h)]))
  1334 +
  1335 + h2 = T.tanh(T.dot(h, self.W_h_h2))
  1336 +
  1337 +
  1338 + current_hidden_state = hidden_states[i]
  1339 + hidden_states_new = T.set_subtensor(current_hidden_state, h)
  1340 +
  1341 + y_prob = T.nnet.softmax(T.dot(h2,self.W_h2_y) + self.b_y)[0]
  1342 +
  1343 + return y_prob, hidden_states_new
  1344 +
  1345 +
  1346 + [y_probs_classify, hidden_states ], _ = theano.scan(
  1347 + fn=one_step_classify,
  1348 + sequences = [words, children_positions,words_indexes],
  1349 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,de+nchd), dtype = theano.config.floatX))])
  1350 +
  1351 + predictions, _ = theano.scan(lambda i: T.argmax(y_probs_classify[i]),
  1352 + sequences = [words_indexes])
  1353 +
  1354 + self.classify = theano.function(inputs=[words,children_positions,words_indexes],
  1355 + outputs=predictions,
  1356 + allow_input_downcast=True,
  1357 + mode='FAST_RUN'
  1358 + )
  1359 +
  1360 +
  1361 +
  1362 +
  1363 +class model55_pf11(object):
  1364 + def __init__(self, nchd, nh2, nh3, nc, w2v_model_path, max_phrase_length):
  1365 + '''
  1366 + nh :: dimension of hidden state
  1367 + nc :: number of classes
  1368 + ne :: number of word embeddings in the vocabulary
  1369 + de :: dimension of the word embeddings
  1370 + ds :: dimension of the sentiment state
  1371 + '''
  1372 + self.max_phrase_length = max_phrase_length
  1373 + w2vecs = pickle.load(open(w2v_model_path,"r"))
  1374 + self.emb = theano.shared(w2vecs["vectors"].astype(theano.config.floatX))
  1375 + self.words2ids = w2vecs["words2ids"]
  1376 +
  1377 + #ne = len(w2vecs["words2ids"])
  1378 + de = w2vecs["vectors"].shape[1]
  1379 + del w2vecs
  1380 +
  1381 + r = 0.05
  1382 +
  1383 +
  1384 + self.W_h_h2 = theano.shared(r * np.random.uniform(-1.0, 1.0, (de+nchd, nh2)).astype(theano.config.floatX))
  1385 + self.W_h2_h3 = theano.shared(r * np.random.uniform(-1.0, 1.0, (nh2, nh3)).astype(theano.config.floatX))
  1386 + self.W_h3_y = theano.shared(r * np.random.uniform(-1.0, 1.0, (nh3, nc)).astype(theano.config.floatX))
  1387 +
  1388 + self.W_sh_h = theano.shared(r * np.random.uniform(-1.0, 1.0, (de+nchd, nchd)).astype(theano.config.floatX))
  1389 +
  1390 +
  1391 + self.b_y = theano.shared(r * np.random.uniform(-1.0, 1.0, nc).astype(theano.config.floatX))
  1392 +
  1393 +
  1394 + def one_step(word_id, word_children_positions, y_true, i, hidden_states, learning_rate):
  1395 +
  1396 + schh = hidden_states[-1] #+ 0.5 # czyli wektor zerowy
  1397 +
  1398 + tmp = word_children_positions>=0.0
  1399 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  1400 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  1401 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  1402 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  1403 + schh = schh/number_of_children
  1404 +
  1405 +
  1406 + h = T.tanh(T.concatenate([self.emb[word_id], T.dot(schh,self.W_sh_h)]))
  1407 +
  1408 + h2 = T.tanh(T.dot(h, self.W_h_h2))
  1409 +
  1410 +
  1411 + h3 = T.tanh(T.dot(h2, self.W_h2_h3))
  1412 +
  1413 + current_hidden_state = hidden_states[i]
  1414 + hidden_states_new = T.set_subtensor(current_hidden_state, h)
  1415 +
  1416 + y_prob = T.nnet.softmax(T.dot(h3,self.W_h3_y) + self.b_y)[0]
  1417 +
  1418 + #l2_norm = T.sum(self.W_h_h2**2) + T.sum(self.W_h2_y**2) + T.sum(self.W_e_h**2) + T.sum(self.W_sh_h**2) + T.sum(self.emb**2) + T.sum(self.b_h**2) + T.sum(self.b_y**2)
  1419 +
  1420 + cross_entropy = -T.log(y_prob[y_true]) # + norm_coefficient * l2_norm
  1421 +
  1422 + return cross_entropy, hidden_states_new
  1423 +
  1424 +
  1425 + y = T.vector('y',dtype=dataType)
  1426 + learning_rate = T.scalar('lr',dtype=theano.config.floatX)
  1427 + words = T.vector(dtype=dataType)
  1428 + children_positions = T.matrix(dtype=dataType)
  1429 + words_indexes = T.vector(dtype=dataType)
  1430 +
  1431 + cross_entropy_vector, _ = theano.scan(fn=one_step, \
  1432 + sequences = [words, children_positions,y,words_indexes],
  1433 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,de+nchd), dtype = theano.config.floatX))],
  1434 + non_sequences = learning_rate,
  1435 + n_steps = words.shape[0])
  1436 +
  1437 + cost = T.sum(cross_entropy_vector[0])
  1438 +
  1439 + updates = OrderedDict([
  1440 + (self.W_h_h2, self.W_h_h2-learning_rate*T.grad(cost, self.W_h_h2)),
  1441 + (self.W_h2_h3, self.W_h2_h3-learning_rate*T.grad(cost, self.W_h2_h3)),
  1442 + (self.W_h3_y, self.W_h3_y-learning_rate*T.grad(cost, self.W_h3_y)),
  1443 + (self.W_sh_h, self.W_sh_h-learning_rate*T.grad(cost, self.W_sh_h)),
  1444 + (self.emb, self.emb-learning_rate*T.grad(cost, self.emb)), #updated_current_emb), #
  1445 + (self.b_y, self.b_y-learning_rate*T.grad(cost,self.b_y))
  1446 + ])
  1447 +
  1448 + self.train = theano.function( inputs = [words, children_positions, y, words_indexes, learning_rate],
  1449 + outputs = [],
  1450 + updates = updates,
  1451 + allow_input_downcast=True,
  1452 + mode='FAST_RUN'
  1453 + )
  1454 +
  1455 +
  1456 + def one_step_classify(word_id, word_children_positions, i, hidden_states):
  1457 +
  1458 + schh = hidden_states[-1] #+ 0.5# czyli wektor zerowy
  1459 +
  1460 + tmp = word_children_positions>=0.0
  1461 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  1462 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  1463 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  1464 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  1465 + schh = schh/number_of_children
  1466 +
  1467 +
  1468 + h = T.tanh(T.concatenate([self.emb[word_id], T.dot(schh,self.W_sh_h)]))
  1469 +
  1470 + h2 = T.tanh(T.dot(h, self.W_h_h2))
  1471 +
  1472 + h3 = T.tanh(T.dot(h2, self.W_h2_h3))
  1473 +
  1474 + current_hidden_state = hidden_states[i]
  1475 + hidden_states_new = T.set_subtensor(current_hidden_state, h)
  1476 +
  1477 + y_prob = T.nnet.softmax(T.dot(h3,self.W_h3_y) + self.b_y)[0]
  1478 +
  1479 + return y_prob, hidden_states_new
  1480 +
  1481 +
  1482 + [y_probs_classify, hidden_states ], _ = theano.scan(
  1483 + fn=one_step_classify,
  1484 + sequences = [words, children_positions,words_indexes],
  1485 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,de+nchd), dtype = theano.config.floatX))])
  1486 +
  1487 + predictions, _ = theano.scan(lambda i: T.argmax(y_probs_classify[i]),
  1488 + sequences = [words_indexes])
  1489 +
  1490 + self.classify = theano.function(inputs=[words,children_positions,words_indexes],
  1491 + outputs=predictions,
  1492 + allow_input_downcast=True,
  1493 + mode='FAST_RUN'
  1494 + )
  1495 +
  1496 +
  1497 +
  1498 +
  1499 +
  1500 +class model55_pf12(object):
  1501 + def __init__(self, neh, ne, nshh, nchd, nh2, nh3, nh4, nc, w2v_model_path, max_phrase_length):
  1502 + '''
  1503 + nh :: dimension of hidden state
  1504 + nc :: number of classes
  1505 + ne :: number of word embeddings in the vocabulary
  1506 + de :: dimension of the word embeddings
  1507 + ds :: dimension of the sentiment state
  1508 + '''
  1509 + self.max_phrase_length = max_phrase_length
  1510 + w2vecs = pickle.load(open(w2v_model_path,"r"))
  1511 + self.emb = theano.shared(w2vecs["vectors"].astype(theano.config.floatX))
  1512 + self.words2ids = w2vecs["words2ids"]
  1513 +
  1514 + #ne = len(w2vecs["words2ids"])
  1515 + de = w2vecs["vectors"].shape[1]
  1516 + del w2vecs
  1517 +
  1518 + r = 0.05
  1519 + self.W_e_eh = theano.shared(r * np.random.uniform(-1.0, 1.0, (de, neh)).astype(theano.config.floatX))
  1520 +
  1521 + self.W_eh_h = theano.shared(r * np.random.uniform(-1.0, 1.0, (neh, ne)).astype(theano.config.floatX))
  1522 +
  1523 + self.W_h_h2 = theano.shared(r * np.random.uniform(-1.0, 1.0, (ne+nchd, nh2)).astype(theano.config.floatX))
  1524 + self.W_h2_h3 = theano.shared(r * np.random.uniform(-1.0, 1.0, (nh2, nh3)).astype(theano.config.floatX))
  1525 + self.W_h3_h4 = theano.shared(r * np.random.uniform(-1.0, 1.0, (nh3, nh4)).astype(theano.config.floatX))
  1526 + self.W_h4_y = theano.shared(r * np.random.uniform(-1.0, 1.0, (nh4, nc)).astype(theano.config.floatX))
  1527 +
  1528 + self.W_sh_shh = theano.shared(r * np.random.uniform(-1.0, 1.0, (ne+nchd, nshh)).astype(theano.config.floatX))
  1529 +
  1530 + self.W_shh_h = theano.shared(r * np.random.uniform(-1.0, 1.0, (nshh, nchd)).astype(theano.config.floatX))
  1531 +
  1532 + self.b_y = theano.shared(r * np.random.uniform(-1.0, 1.0, nc).astype(theano.config.floatX))
  1533 +
  1534 +
  1535 + def one_step(word_id, word_children_positions, y_true, i, hidden_states, learning_rate):
  1536 +
  1537 + schh = hidden_states[-1] #+ 0.5 # czyli wektor zerowy
  1538 +
  1539 + tmp = word_children_positions>=0.0
  1540 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  1541 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  1542 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  1543 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  1544 + schh = schh/number_of_children
  1545 +
  1546 + eh = T.tanh(T.dot(self.emb[word_id],self.W_e_eh))
  1547 + shh = T.tanh(T.dot(schh,self.W_sh_shh))
  1548 +
  1549 + h = T.tanh(T.concatenate([T.dot(eh, self.W_eh_h), T.dot(shh,self.W_shh_h)]))
  1550 +
  1551 + h2 = T.tanh(T.dot(h, self.W_h_h2))
  1552 +
  1553 +
  1554 + h3 = T.tanh(T.dot(h2, self.W_h2_h3))
  1555 + h4 = T.tanh(T.dot(h3, self.W_h3_h4))
  1556 +
  1557 + current_hidden_state = hidden_states[i]
  1558 + hidden_states_new = T.set_subtensor(current_hidden_state, h)
  1559 +
  1560 + y_prob = T.nnet.softmax(T.dot(h4,self.W_h4_y) + self.b_y)[0]
  1561 +
  1562 + #l2_norm = T.sum(self.W_h_h2**2) + T.sum(self.W_h2_y**2) + T.sum(self.W_e_h**2) + T.sum(self.W_sh_h**2) + T.sum(self.emb**2) + T.sum(self.b_h**2) + T.sum(self.b_y**2)
  1563 +
  1564 + cross_entropy = -T.log(y_prob[y_true]) # + norm_coefficient * l2_norm
  1565 +
  1566 + return cross_entropy, hidden_states_new
  1567 +
  1568 +
  1569 + y = T.vector('y',dtype=dataType)
  1570 + learning_rate = T.scalar('lr',dtype=theano.config.floatX)
  1571 + words = T.vector(dtype=dataType)
  1572 + children_positions = T.matrix(dtype=dataType)
  1573 + words_indexes = T.vector(dtype=dataType)
  1574 +
  1575 + cross_entropy_vector, _ = theano.scan(fn=one_step, \
  1576 + sequences = [words, children_positions,y,words_indexes],
  1577 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,ne+nchd), dtype = theano.config.floatX))],
  1578 + non_sequences = learning_rate,
  1579 + n_steps = words.shape[0])
  1580 +
  1581 + cost = T.sum(cross_entropy_vector[0])
  1582 +
  1583 + updates = OrderedDict([
  1584 + (self.W_h_h2, self.W_h_h2-learning_rate*T.grad(cost, self.W_h_h2)),
  1585 + (self.W_h2_h3, self.W_h2_h3-learning_rate*T.grad(cost, self.W_h2_h3)),
  1586 + (self.W_h3_h4, self.W_h3_h4-learning_rate*T.grad(cost, self.W_h3_h4)),
  1587 + (self.W_h4_y, self.W_h4_y-learning_rate*T.grad(cost, self.W_h4_y)),
  1588 + (self.W_e_eh, self.W_e_eh-learning_rate*T.grad(cost, self.W_e_eh)),
  1589 + (self.W_eh_h, self.W_eh_h-learning_rate*T.grad(cost, self.W_eh_h)),
  1590 + (self.W_shh_h, self.W_shh_h-learning_rate*T.grad(cost, self.W_shh_h)),
  1591 + (self.W_sh_shh, self.W_sh_shh-learning_rate*T.grad(cost, self.W_sh_shh)),
  1592 + (self.emb, self.emb-learning_rate*T.grad(cost, self.emb)), #updated_current_emb), #
  1593 + (self.b_y, self.b_y-learning_rate*T.grad(cost,self.b_y))
  1594 + ])
  1595 +
  1596 + self.train = theano.function( inputs = [words, children_positions, y, words_indexes, learning_rate],
  1597 + outputs = [],
  1598 + updates = updates,
  1599 + allow_input_downcast=True,
  1600 + mode='FAST_RUN'
  1601 + )
  1602 +
  1603 +
  1604 + def one_step_classify(word_id, word_children_positions, i, hidden_states):
  1605 +
  1606 + schh = hidden_states[-1] #+ 0.5# czyli wektor zerowy
  1607 +
  1608 + tmp = word_children_positions>=0.0
  1609 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  1610 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  1611 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  1612 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  1613 + schh = schh/number_of_children
  1614 +
  1615 + eh = T.tanh(T.dot(self.emb[word_id],self.W_e_eh))
  1616 + shh = T.tanh(T.dot(schh,self.W_sh_shh))
  1617 +
  1618 + h = T.tanh(T.concatenate([T.dot(eh, self.W_eh_h), T.dot(shh,self.W_shh_h)]))
  1619 +
  1620 + h2 = T.tanh(T.dot(h, self.W_h_h2))
  1621 +
  1622 + h3 = T.tanh(T.dot(h2, self.W_h2_h3))
  1623 + h4 = T.tanh(T.dot(h3, self.W_h3_h4))
  1624 +
  1625 + current_hidden_state = hidden_states[i]
  1626 + hidden_states_new = T.set_subtensor(current_hidden_state, h)
  1627 +
  1628 + y_prob = T.nnet.softmax(T.dot(h4,self.W_h4_y) + self.b_y)[0]
  1629 +
  1630 + return y_prob, hidden_states_new
  1631 +
  1632 +
  1633 + [y_probs_classify, hidden_states ], _ = theano.scan(
  1634 + fn=one_step_classify,
  1635 + sequences = [words, children_positions,words_indexes],
  1636 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,ne+nchd), dtype = theano.config.floatX))])
  1637 +
  1638 + predictions, _ = theano.scan(lambda i: T.argmax(y_probs_classify[i]),
  1639 + sequences = [words_indexes])
  1640 +
  1641 + self.classify = theano.function(inputs=[words,children_positions,words_indexes],
  1642 + outputs=predictions,
  1643 + allow_input_downcast=True,
  1644 + mode='FAST_RUN'
  1645 + )
  1646 +
  1647 +
  1648 +
  1649 +class model55_pf13(object):
  1650 + def __init__(self, neh, neh2, ne, nshh, nshh2, nchd, nc, w2v_model_path, max_phrase_length):
  1651 + '''
  1652 + nh :: dimension of hidden state
  1653 + nc :: number of classes
  1654 + ne :: number of word embeddings in the vocabulary
  1655 + de :: dimension of the word embeddings
  1656 + ds :: dimension of the sentiment state
  1657 + '''
  1658 + self.max_phrase_length = max_phrase_length
  1659 + w2vecs = pickle.load(open(w2v_model_path,"r"))
  1660 + self.emb = theano.shared(w2vecs["vectors"].astype(theano.config.floatX))
  1661 + self.words2ids = w2vecs["words2ids"]
  1662 +
  1663 + #ne = len(w2vecs["words2ids"])
  1664 + de = w2vecs["vectors"].shape[1]
  1665 + del w2vecs
  1666 +
  1667 + r = 0.05
  1668 + self.W_e_eh = theano.shared(r * np.random.uniform(-1.0, 1.0, (de, neh)).astype(theano.config.floatX))
  1669 + self.W_eh_eh2 = theano.shared(r * np.random.uniform(-1.0, 1.0, (neh, neh2)).astype(theano.config.floatX))
  1670 + self.W_eh2_h = theano.shared(r * np.random.uniform(-1.0, 1.0, (neh2, ne)).astype(theano.config.floatX))
  1671 +
  1672 + self.W_sh_shh = theano.shared(r * np.random.uniform(-1.0, 1.0, (ne+nchd, nshh)).astype(theano.config.floatX))
  1673 + self.W_shh_shh2 = theano.shared(r * np.random.uniform(-1.0, 1.0, (nshh, nshh2)).astype(theano.config.floatX))
  1674 + self.W_shh2_h = theano.shared(r * np.random.uniform(-1.0, 1.0, (nshh2, nchd)).astype(theano.config.floatX))
  1675 +
  1676 + self.W_h_y = theano.shared(r * np.random.uniform(-1.0, 1.0, (ne+nchd, nc)).astype(theano.config.floatX))
  1677 +
  1678 + self.b_y = theano.shared(r * np.random.uniform(-1.0, 1.0, nc).astype(theano.config.floatX))
  1679 +
  1680 +
  1681 + def one_step(word_id, word_children_positions, y_true, i, hidden_states, learning_rate):
  1682 +
  1683 + schh = hidden_states[-1] #+ 0.5 # czyli wektor zerowy
  1684 +
  1685 + tmp = word_children_positions>=0.0
  1686 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  1687 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  1688 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  1689 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  1690 + schh = schh/number_of_children
  1691 +
  1692 + eh = T.tanh(T.dot(self.emb[word_id],self.W_e_eh))
  1693 + eh2 = T.tanh(T.dot(eh,self.W_eh_eh2))
  1694 +
  1695 + shh = T.tanh(T.dot(schh,self.W_sh_shh))
  1696 + shh2 = T.tanh(T.dot(shh,self.W_shh_shh2))
  1697 +
  1698 +
  1699 + h = T.tanh(T.concatenate([T.dot(eh2, self.W_eh2_h), T.dot(shh2,self.W_shh2_h)]))
  1700 +
  1701 +
  1702 + current_hidden_state = hidden_states[i]
  1703 + hidden_states_new = T.set_subtensor(current_hidden_state, h)
  1704 +
  1705 + y_prob = T.nnet.softmax(T.dot(h,self.W_h_y) + self.b_y)[0]
  1706 +
  1707 + #l2_norm = T.sum(self.W_h_h2**2) + T.sum(self.W_h2_y**2) + T.sum(self.W_e_h**2) + T.sum(self.W_sh_h**2) + T.sum(self.emb**2) + T.sum(self.b_h**2) + T.sum(self.b_y**2)
  1708 +
  1709 + cross_entropy = -T.log(y_prob[y_true]) # + norm_coefficient * l2_norm
  1710 +
  1711 + return cross_entropy, hidden_states_new
  1712 +
  1713 +
  1714 + y = T.vector('y',dtype=dataType)
  1715 + learning_rate = T.scalar('lr',dtype=theano.config.floatX)
  1716 + words = T.vector(dtype=dataType)
  1717 + children_positions = T.matrix(dtype=dataType)
  1718 + words_indexes = T.vector(dtype=dataType)
  1719 +
  1720 + cross_entropy_vector, _ = theano.scan(fn=one_step, \
  1721 + sequences = [words, children_positions,y,words_indexes],
  1722 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,ne+nchd), dtype = theano.config.floatX))],
  1723 + non_sequences = learning_rate,
  1724 + n_steps = words.shape[0])
  1725 +
  1726 + cost = T.sum(cross_entropy_vector[0])
  1727 +
  1728 + updates = OrderedDict([
  1729 + (self.W_h_y, self.W_h_y-learning_rate*T.grad(cost, self.W_h_y)),
  1730 + (self.W_e_eh, self.W_e_eh-learning_rate*T.grad(cost, self.W_e_eh)),
  1731 + (self.W_eh_eh2, self.W_eh_eh2-learning_rate*T.grad(cost, self.W_eh_eh2)),
  1732 + (self.W_eh2_h, self.W_eh2_h-learning_rate*T.grad(cost, self.W_eh2_h)),
  1733 + (self.W_shh2_h, self.W_shh2_h-learning_rate*T.grad(cost, self.W_shh2_h)),
  1734 + (self.W_sh_shh, self.W_sh_shh-learning_rate*T.grad(cost, self.W_sh_shh)),
  1735 + (self.W_shh_shh2, self.W_shh_shh2-learning_rate*T.grad(cost, self.W_shh_shh2)),
  1736 + (self.emb, self.emb-learning_rate*T.grad(cost, self.emb)), #updated_current_emb), #
  1737 + (self.b_y, self.b_y-learning_rate*T.grad(cost,self.b_y))
  1738 + ])
  1739 +
  1740 + self.train = theano.function( inputs = [words, children_positions, y, words_indexes, learning_rate],
  1741 + outputs = [],
  1742 + updates = updates,
  1743 + allow_input_downcast=True,
  1744 + mode='FAST_RUN'
  1745 + )
  1746 +
  1747 +
  1748 + def one_step_classify(word_id, word_children_positions, i, hidden_states):
  1749 +
  1750 + schh = hidden_states[-1] #+ 0.5# czyli wektor zerowy
  1751 +
  1752 + tmp = word_children_positions>=0.0
  1753 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  1754 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  1755 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  1756 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  1757 + schh = schh/number_of_children
  1758 +
  1759 + eh = T.tanh(T.dot(self.emb[word_id],self.W_e_eh))
  1760 + eh2 = T.tanh(T.dot(eh,self.W_eh_eh2))
  1761 +
  1762 + shh = T.tanh(T.dot(schh,self.W_sh_shh))
  1763 + shh2 = T.tanh(T.dot(shh,self.W_shh_shh2))
  1764 +
  1765 +
  1766 + h = T.tanh(T.concatenate([T.dot(eh2, self.W_eh2_h), T.dot(shh2,self.W_shh2_h)]))
  1767 +
  1768 +
  1769 + current_hidden_state = hidden_states[i]
  1770 + hidden_states_new = T.set_subtensor(current_hidden_state, h)
  1771 +
  1772 + y_prob = T.nnet.softmax(T.dot(h,self.W_h_y) + self.b_y)[0]
  1773 +
  1774 +
  1775 + return y_prob, hidden_states_new
  1776 +
  1777 +
  1778 + [y_probs_classify, hidden_states ], _ = theano.scan(
  1779 + fn=one_step_classify,
  1780 + sequences = [words, children_positions,words_indexes],
  1781 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,ne+nchd), dtype = theano.config.floatX))])
  1782 +
  1783 + predictions, _ = theano.scan(lambda i: T.argmax(y_probs_classify[i]),
  1784 + sequences = [words_indexes])
  1785 +
  1786 + self.classify = theano.function(inputs=[words,children_positions,words_indexes],
  1787 + outputs=predictions,
  1788 + allow_input_downcast=True,
  1789 + mode='FAST_RUN'
  1790 + )
  1791 +
  1792 +
  1793 +
  1794 +class model55_pf14(object):
  1795 + def __init__(self, neh, neh2, ne, nshh, nshh2, nchd, nh2, nc, w2v_model_path, max_phrase_length):
  1796 + '''
  1797 + nh :: dimension of hidden state
  1798 + nc :: number of classes
  1799 + ne :: number of word embeddings in the vocabulary
  1800 + de :: dimension of the word embeddings
  1801 + ds :: dimension of the sentiment state
  1802 + '''
  1803 + self.max_phrase_length = max_phrase_length
  1804 + w2vecs = pickle.load(open(w2v_model_path,"r"))
  1805 + self.emb = theano.shared(w2vecs["vectors"].astype(theano.config.floatX))
  1806 + self.words2ids = w2vecs["words2ids"]
  1807 +
  1808 + #ne = len(w2vecs["words2ids"])
  1809 + de = w2vecs["vectors"].shape[1]
  1810 + del w2vecs
  1811 +
  1812 + r = 0.05
  1813 + self.W_e_eh = theano.shared(r * np.random.uniform(-1.0, 1.0, (de, neh)).astype(theano.config.floatX))
  1814 + self.W_eh_eh2 = theano.shared(r * np.random.uniform(-1.0, 1.0, (neh, neh2)).astype(theano.config.floatX))
  1815 + self.W_eh2_h = theano.shared(r * np.random.uniform(-1.0, 1.0, (neh2, ne)).astype(theano.config.floatX))
  1816 +
  1817 + self.W_sh_shh = theano.shared(r * np.random.uniform(-1.0, 1.0, (ne+nchd, nshh)).astype(theano.config.floatX))
  1818 + self.W_shh_shh2 = theano.shared(r * np.random.uniform(-1.0, 1.0, (nshh, nshh2)).astype(theano.config.floatX))
  1819 + self.W_shh2_h = theano.shared(r * np.random.uniform(-1.0, 1.0, (nshh2, nchd)).astype(theano.config.floatX))
  1820 +
  1821 + self.W_h_h2 = theano.shared(r * np.random.uniform(-1.0, 1.0, (ne+nchd, nh2)).astype(theano.config.floatX))
  1822 + self.W_h2_y = theano.shared(r * np.random.uniform(-1.0, 1.0, (nh2, nc)).astype(theano.config.floatX))
  1823 +
  1824 + self.b_y = theano.shared(r * np.random.uniform(-1.0, 1.0, nc).astype(theano.config.floatX))
  1825 +
  1826 +
  1827 + def one_step(word_id, word_children_positions, y_true, i, hidden_states, learning_rate):
  1828 +
  1829 + schh = hidden_states[-1] #+ 0.5 # czyli wektor zerowy
  1830 +
  1831 + tmp = word_children_positions>=0.0
  1832 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  1833 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  1834 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  1835 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  1836 + schh = schh/number_of_children
  1837 +
  1838 + eh = T.tanh(T.dot(self.emb[word_id],self.W_e_eh))
  1839 + eh2 = T.tanh(T.dot(eh,self.W_eh_eh2))
  1840 +
  1841 + shh = T.tanh(T.dot(schh,self.W_sh_shh))
  1842 + shh2 = T.tanh(T.dot(shh,self.W_shh_shh2))
  1843 +
  1844 +
  1845 + h = T.tanh(T.concatenate([T.dot(eh2, self.W_eh2_h), T.dot(shh2,self.W_shh2_h)]))
  1846 +
  1847 + h2 = T.tanh(T.dot(h, self.W_h_h2))
  1848 +
  1849 + current_hidden_state = hidden_states[i]
  1850 + hidden_states_new = T.set_subtensor(current_hidden_state, h)
  1851 +
  1852 + y_prob = T.nnet.softmax(T.dot(h2,self.W_h2_y) + self.b_y)[0]
  1853 +
  1854 + #l2_norm = T.sum(self.W_h_h2**2) + T.sum(self.W_h2_y**2) + T.sum(self.W_e_h**2) + T.sum(self.W_sh_h**2) + T.sum(self.emb**2) + T.sum(self.b_h**2) + T.sum(self.b_y**2)
  1855 +
  1856 + cross_entropy = -T.log(y_prob[y_true]) # + norm_coefficient * l2_norm
  1857 +
  1858 + return cross_entropy, hidden_states_new
  1859 +
  1860 +
  1861 + y = T.vector('y',dtype=dataType)
  1862 + learning_rate = T.scalar('lr',dtype=theano.config.floatX)
  1863 + words = T.vector(dtype=dataType)
  1864 + children_positions = T.matrix(dtype=dataType)
  1865 + words_indexes = T.vector(dtype=dataType)
  1866 +
  1867 + cross_entropy_vector, _ = theano.scan(fn=one_step, \
  1868 + sequences = [words, children_positions,y,words_indexes],
  1869 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,ne+nchd), dtype = theano.config.floatX))],
  1870 + non_sequences = learning_rate,
  1871 + n_steps = words.shape[0])
  1872 +
  1873 + cost = T.sum(cross_entropy_vector[0])
  1874 +
  1875 + updates = OrderedDict([
  1876 + (self.W_h_h2, self.W_h_h2-learning_rate*T.grad(cost, self.W_h_h2)),
  1877 + (self.W_h2_y, self.W_h2_y-learning_rate*T.grad(cost, self.W_h2_y)),
  1878 + (self.W_e_eh, self.W_e_eh-learning_rate*T.grad(cost, self.W_e_eh)),
  1879 + (self.W_eh_eh2, self.W_eh_eh2-learning_rate*T.grad(cost, self.W_eh_eh2)),
  1880 + (self.W_eh2_h, self.W_eh2_h-learning_rate*T.grad(cost, self.W_eh2_h)),
  1881 + (self.W_shh2_h, self.W_shh2_h-learning_rate*T.grad(cost, self.W_shh2_h)),
  1882 + (self.W_sh_shh, self.W_sh_shh-learning_rate*T.grad(cost, self.W_sh_shh)),
  1883 + (self.W_shh_shh2, self.W_shh_shh2-learning_rate*T.grad(cost, self.W_shh_shh2)),
  1884 + (self.emb, self.emb-learning_rate*T.grad(cost, self.emb)), #updated_current_emb), #
  1885 + (self.b_y, self.b_y-learning_rate*T.grad(cost,self.b_y))
  1886 + ])
  1887 +
  1888 + self.train = theano.function( inputs = [words, children_positions, y, words_indexes, learning_rate],
  1889 + outputs = [],
  1890 + updates = updates,
  1891 + allow_input_downcast=True,
  1892 + mode='FAST_RUN'
  1893 + )
  1894 +
  1895 +
  1896 + def one_step_classify(word_id, word_children_positions, i, hidden_states):
  1897 +
  1898 + schh = hidden_states[-1] #+ 0.5# czyli wektor zerowy
  1899 +
  1900 + tmp = word_children_positions>=0.0
  1901 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  1902 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  1903 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  1904 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  1905 + schh = schh/number_of_children
  1906 +
  1907 + eh = T.tanh(T.dot(self.emb[word_id],self.W_e_eh))
  1908 + eh2 = T.tanh(T.dot(eh,self.W_eh_eh2))
  1909 +
  1910 + shh = T.tanh(T.dot(schh,self.W_sh_shh))
  1911 + shh2 = T.tanh(T.dot(shh,self.W_shh_shh2))
  1912 +
  1913 +
  1914 + h = T.tanh(T.concatenate([T.dot(eh2, self.W_eh2_h), T.dot(shh2,self.W_shh2_h)]))
  1915 +
  1916 + h2 = T.tanh(T.dot(h, self.W_h_h2))
  1917 +
  1918 + current_hidden_state = hidden_states[i]
  1919 + hidden_states_new = T.set_subtensor(current_hidden_state, h)
  1920 +
  1921 + y_prob = T.nnet.softmax(T.dot(h2,self.W_h2_y) + self.b_y)[0]
  1922 +
  1923 +
  1924 + return y_prob, hidden_states_new
  1925 +
  1926 +
  1927 + [y_probs_classify, hidden_states ], _ = theano.scan(
  1928 + fn=one_step_classify,
  1929 + sequences = [words, children_positions,words_indexes],
  1930 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,ne+nchd), dtype = theano.config.floatX))])
  1931 +
  1932 + predictions, _ = theano.scan(lambda i: T.argmax(y_probs_classify[i]),
  1933 + sequences = [words_indexes])
  1934 +
  1935 + self.classify = theano.function(inputs=[words,children_positions,words_indexes],
  1936 + outputs=predictions,
  1937 + allow_input_downcast=True,
  1938 + mode='FAST_RUN'
  1939 + )
  1940 +
  1941 +
  1942 +
  1943 +
  1944 +class model55_pf15(object):
  1945 + def __init__(self, neh, neh2, ne, nshh, nshh2, nchd, nh2, nh3, nc, w2v_model_path, max_phrase_length):
  1946 + '''
  1947 + nh :: dimension of hidden state
  1948 + nc :: number of classes
  1949 + ne :: number of word embeddings in the vocabulary
  1950 + de :: dimension of the word embeddings
  1951 + ds :: dimension of the sentiment state
  1952 + '''
  1953 + self.max_phrase_length = max_phrase_length
  1954 + w2vecs = pickle.load(open(w2v_model_path,"r"))
  1955 + self.emb = theano.shared(w2vecs["vectors"].astype(theano.config.floatX))
  1956 + self.words2ids = w2vecs["words2ids"]
  1957 +
  1958 + #ne = len(w2vecs["words2ids"])
  1959 + de = w2vecs["vectors"].shape[1]
  1960 + del w2vecs
  1961 +
  1962 + r = 0.05
  1963 + self.W_e_eh = theano.shared(r * np.random.uniform(-1.0, 1.0, (de, neh)).astype(theano.config.floatX))
  1964 + self.W_eh_eh2 = theano.shared(r * np.random.uniform(-1.0, 1.0, (neh, neh2)).astype(theano.config.floatX))
  1965 + self.W_eh2_h = theano.shared(r * np.random.uniform(-1.0, 1.0, (neh2, ne)).astype(theano.config.floatX))
  1966 +
  1967 + self.W_sh_shh = theano.shared(r * np.random.uniform(-1.0, 1.0, (ne+nchd, nshh)).astype(theano.config.floatX))
  1968 + self.W_shh_shh2 = theano.shared(r * np.random.uniform(-1.0, 1.0, (nshh, nshh2)).astype(theano.config.floatX))
  1969 + self.W_shh2_h = theano.shared(r * np.random.uniform(-1.0, 1.0, (nshh2, nchd)).astype(theano.config.floatX))
  1970 +
  1971 + self.W_h_h2 = theano.shared(r * np.random.uniform(-1.0, 1.0, (ne+nchd, nh2)).astype(theano.config.floatX))
  1972 + self.W_h2_h3 = theano.shared(r * np.random.uniform(-1.0, 1.0, (nh2, nh3)).astype(theano.config.floatX))
  1973 + self.W_h3_y = theano.shared(r * np.random.uniform(-1.0, 1.0, (nh3, nc)).astype(theano.config.floatX))
  1974 +
  1975 + self.b_y = theano.shared(r * np.random.uniform(-1.0, 1.0, nc).astype(theano.config.floatX))
  1976 +
  1977 +
  1978 + def one_step(word_id, word_children_positions, y_true, i, hidden_states, learning_rate):
  1979 +
  1980 + schh = hidden_states[-1] #+ 0.5 # czyli wektor zerowy
  1981 +
  1982 + tmp = word_children_positions>=0.0
  1983 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  1984 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  1985 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  1986 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  1987 + schh = schh/number_of_children
  1988 +
  1989 + eh = T.tanh(T.dot(self.emb[word_id],self.W_e_eh))
  1990 + eh2 = T.tanh(T.dot(eh,self.W_eh_eh2))
  1991 +
  1992 + shh = T.tanh(T.dot(schh,self.W_sh_shh))
  1993 + shh2 = T.tanh(T.dot(shh,self.W_shh_shh2))
  1994 +
  1995 +
  1996 + h = T.tanh(T.concatenate([T.dot(eh2, self.W_eh2_h), T.dot(shh2,self.W_shh2_h)]))
  1997 +
  1998 + h2 = T.tanh(T.dot(h, self.W_h_h2))
  1999 + h3 = T.tanh(T.dot(h2, self.W_h2_h3))
  2000 +
  2001 + current_hidden_state = hidden_states[i]
  2002 + hidden_states_new = T.set_subtensor(current_hidden_state, h)
  2003 +
  2004 + y_prob = T.nnet.softmax(T.dot(h3,self.W_h3_y) + self.b_y)[0]
  2005 +
  2006 + #l2_norm = T.sum(self.W_h_h2**2) + T.sum(self.W_h2_y**2) + T.sum(self.W_e_h**2) + T.sum(self.W_sh_h**2) + T.sum(self.emb**2) + T.sum(self.b_h**2) + T.sum(self.b_y**2)
  2007 +
  2008 + cross_entropy = -T.log(y_prob[y_true]) # + norm_coefficient * l2_norm
  2009 +
  2010 + return cross_entropy, hidden_states_new
  2011 +
  2012 +
  2013 + y = T.vector('y',dtype=dataType)
  2014 + learning_rate = T.scalar('lr',dtype=theano.config.floatX)
  2015 + words = T.vector(dtype=dataType)
  2016 + children_positions = T.matrix(dtype=dataType)
  2017 + words_indexes = T.vector(dtype=dataType)
  2018 +
  2019 + cross_entropy_vector, _ = theano.scan(fn=one_step, \
  2020 + sequences = [words, children_positions,y,words_indexes],
  2021 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,ne+nchd), dtype = theano.config.floatX))],
  2022 + non_sequences = learning_rate,
  2023 + n_steps = words.shape[0])
  2024 +
  2025 + cost = T.sum(cross_entropy_vector[0])
  2026 +
  2027 + updates = OrderedDict([
  2028 + (self.W_h_h2, self.W_h_h2-learning_rate*T.grad(cost, self.W_h_h2)),
  2029 + (self.W_h2_h3, self.W_h2_h3-learning_rate*T.grad(cost, self.W_h2_h3)),
  2030 + (self.W_h3_y, self.W_h3_y-learning_rate*T.grad(cost, self.W_h3_y)),
  2031 + (self.W_e_eh, self.W_e_eh-learning_rate*T.grad(cost, self.W_e_eh)),
  2032 + (self.W_eh_eh2, self.W_eh_eh2-learning_rate*T.grad(cost, self.W_eh_eh2)),
  2033 + (self.W_eh2_h, self.W_eh2_h-learning_rate*T.grad(cost, self.W_eh2_h)),
  2034 + (self.W_shh2_h, self.W_shh2_h-learning_rate*T.grad(cost, self.W_shh2_h)),
  2035 + (self.W_sh_shh, self.W_sh_shh-learning_rate*T.grad(cost, self.W_sh_shh)),
  2036 + (self.W_shh_shh2, self.W_shh_shh2-learning_rate*T.grad(cost, self.W_shh_shh2)),
  2037 + (self.emb, self.emb-learning_rate*T.grad(cost, self.emb)), #updated_current_emb), #
  2038 + (self.b_y, self.b_y-learning_rate*T.grad(cost,self.b_y))
  2039 + ])
  2040 +
  2041 + self.train = theano.function( inputs = [words, children_positions, y, words_indexes, learning_rate],
  2042 + outputs = [],
  2043 + updates = updates,
  2044 + allow_input_downcast=True,
  2045 + mode='FAST_RUN'
  2046 + )
  2047 +
  2048 +
  2049 + def one_step_classify(word_id, word_children_positions, i, hidden_states):
  2050 +
  2051 + schh = hidden_states[-1] #+ 0.5# czyli wektor zerowy
  2052 +
  2053 + tmp = word_children_positions>=0.0
  2054 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  2055 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  2056 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  2057 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  2058 + schh = schh/number_of_children
  2059 +
  2060 + eh = T.tanh(T.dot(self.emb[word_id],self.W_e_eh))
  2061 + eh2 = T.tanh(T.dot(eh,self.W_eh_eh2))
  2062 +
  2063 + shh = T.tanh(T.dot(schh,self.W_sh_shh))
  2064 + shh2 = T.tanh(T.dot(shh,self.W_shh_shh2))
  2065 +
  2066 +
  2067 + h = T.tanh(T.concatenate([T.dot(eh2, self.W_eh2_h), T.dot(shh2,self.W_shh2_h)]))
  2068 +
  2069 + h2 = T.tanh(T.dot(h, self.W_h_h2))
  2070 + h3 = T.tanh(T.dot(h2, self.W_h2_h3))
  2071 +
  2072 + current_hidden_state = hidden_states[i]
  2073 + hidden_states_new = T.set_subtensor(current_hidden_state, h)
  2074 +
  2075 + y_prob = T.nnet.softmax(T.dot(h3,self.W_h3_y) + self.b_y)[0]
  2076 +
  2077 +
  2078 + return y_prob, hidden_states_new
  2079 +
  2080 +
  2081 + [y_probs_classify, hidden_states ], _ = theano.scan(
  2082 + fn=one_step_classify,
  2083 + sequences = [words, children_positions,words_indexes],
  2084 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,ne+nchd), dtype = theano.config.floatX))])
  2085 +
  2086 + predictions, _ = theano.scan(lambda i: T.argmax(y_probs_classify[i]),
  2087 + sequences = [words_indexes])
  2088 +
  2089 + self.classify = theano.function(inputs=[words,children_positions,words_indexes],
  2090 + outputs=predictions,
  2091 + allow_input_downcast=True,
  2092 + mode='FAST_RUN'
  2093 + )
  2094 +
  2095 +
  2096 +
  2097 +
  2098 +class model55_pf16(object):
  2099 + def __init__(self, nchd, nc, w2v_model_path, max_phrase_length):
  2100 + '''
  2101 + nh :: dimension of hidden state
  2102 + nc :: number of classes
  2103 + ne :: number of word embeddings in the vocabulary
  2104 + de :: dimension of the word embeddings
  2105 + ds :: dimension of the sentiment state
  2106 + '''
  2107 + self.max_phrase_length = max_phrase_length
  2108 + w2vecs = pickle.load(open(w2v_model_path,"r"))
  2109 + self.emb = theano.shared(w2vecs["vectors"].astype(theano.config.floatX))
  2110 + self.words2ids = w2vecs["words2ids"]
  2111 +
  2112 + #ne = len(w2vecs["words2ids"])
  2113 + de = w2vecs["vectors"].shape[1]
  2114 + del w2vecs
  2115 +
  2116 + r = 0.05
  2117 +
  2118 + self.W_h_y = theano.shared(r * np.random.uniform(-1.0, 1.0, (de+nchd, nc)).astype(theano.config.floatX))
  2119 +
  2120 + self.W_sh_h = theano.shared(r * np.random.uniform(-1.0, 1.0, (de+nchd, nchd)).astype(theano.config.floatX))
  2121 +
  2122 + self.b_y = theano.shared(r * np.random.uniform(-1.0, 1.0, nc).astype(theano.config.floatX))
  2123 +
  2124 +
  2125 + def one_step(word_id, word_children_positions, y_true, i, hidden_states, learning_rate):
  2126 +
  2127 + schh = hidden_states[-1] #+ 0.5 # czyli wektor zerowy
  2128 +
  2129 + tmp = word_children_positions>=0.0
  2130 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  2131 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  2132 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  2133 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  2134 + schh = schh/number_of_children
  2135 +
  2136 +
  2137 + h = T.tanh(T.concatenate([self.emb[word_id], T.dot(schh,self.W_sh_h)]))
  2138 +
  2139 +
  2140 +
  2141 + current_hidden_state = hidden_states[i]
  2142 + hidden_states_new = T.set_subtensor(current_hidden_state, h)
  2143 +
  2144 + y_prob = T.nnet.softmax(T.dot(h,self.W_h_y) + self.b_y)[0]
  2145 +
  2146 + #l2_norm = T.sum(self.W_h_h2**2) + T.sum(self.W_h2_y**2) + T.sum(self.W_e_h**2) + T.sum(self.W_sh_h**2) + T.sum(self.emb**2) + T.sum(self.b_h**2) + T.sum(self.b_y**2)
  2147 +
  2148 + cross_entropy = -T.log(y_prob[y_true]) # + norm_coefficient * l2_norm
  2149 +
  2150 + return cross_entropy, hidden_states_new
  2151 +
  2152 +
  2153 + y = T.vector('y',dtype=dataType)
  2154 + learning_rate = T.scalar('lr',dtype=theano.config.floatX)
  2155 + words = T.vector(dtype=dataType)
  2156 + children_positions = T.matrix(dtype=dataType)
  2157 + words_indexes = T.vector(dtype=dataType)
  2158 +
  2159 + cross_entropy_vector, _ = theano.scan(fn=one_step, \
  2160 + sequences = [words, children_positions,y,words_indexes],
  2161 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,de+nchd), dtype = theano.config.floatX))],
  2162 + non_sequences = learning_rate,
  2163 + n_steps = words.shape[0])
  2164 +
  2165 + cost = T.sum(cross_entropy_vector[0])
  2166 +
  2167 + updates = OrderedDict([
  2168 + (self.W_h_y, self.W_h_y-learning_rate*T.grad(cost, self.W_h_y)),
  2169 + (self.W_sh_h, self.W_sh_h-learning_rate*T.grad(cost, self.W_sh_h)),
  2170 + (self.emb, self.emb-learning_rate*T.grad(cost, self.emb)), #updated_current_emb), #
  2171 + (self.b_y, self.b_y-learning_rate*T.grad(cost,self.b_y))
  2172 + ])
  2173 +
  2174 + self.train = theano.function( inputs = [words, children_positions, y, words_indexes, learning_rate],
  2175 + outputs = [],
  2176 + updates = updates,
  2177 + allow_input_downcast=True,
  2178 + mode='FAST_RUN'
  2179 + )
  2180 +
  2181 +
  2182 + def one_step_classify(word_id, word_children_positions, i, hidden_states):
  2183 +
  2184 + schh = hidden_states[-1] #+ 0.5# czyli wektor zerowy
  2185 +
  2186 + tmp = word_children_positions>=0.0
  2187 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  2188 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  2189 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  2190 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  2191 + schh = schh/number_of_children
  2192 +
  2193 + h = T.tanh(T.concatenate([self.emb[word_id], T.dot(schh,self.W_sh_h)]))
  2194 +
  2195 + current_hidden_state = hidden_states[i]
  2196 + hidden_states_new = T.set_subtensor(current_hidden_state, h)
  2197 +
  2198 + y_prob = T.nnet.softmax(T.dot(h,self.W_h_y) + self.b_y)[0]
  2199 +
  2200 + return y_prob, hidden_states_new
  2201 +
  2202 +
  2203 + [y_probs_classify, hidden_states ], _ = theano.scan(
  2204 + fn=one_step_classify,
  2205 + sequences = [words, children_positions,words_indexes],
  2206 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,de+nchd), dtype = theano.config.floatX))])
  2207 +
  2208 + predictions, _ = theano.scan(lambda i: T.argmax(y_probs_classify[i]),
  2209 + sequences = [words_indexes])
  2210 +
  2211 + self.classify = theano.function(inputs=[words,children_positions,words_indexes],
  2212 + outputs=predictions,
  2213 + allow_input_downcast=True,
  2214 + mode='FAST_RUN'
  2215 + )
  2216 +
  2217 +
  2218 +
  2219 +class model55_pf17(object):
  2220 + def __init__(self, neh, neh2, ne, nshh, nshh2, nchd, nh2, nh3, nh4, nc, w2v_model_path, max_phrase_length):
  2221 + '''
  2222 + nh :: dimension of hidden state
  2223 + nc :: number of classes
  2224 + ne :: number of word embeddings in the vocabulary
  2225 + de :: dimension of the word embeddings
  2226 + ds :: dimension of the sentiment state
  2227 + '''
  2228 + self.max_phrase_length = max_phrase_length
  2229 + w2vecs = pickle.load(open(w2v_model_path,"r"))
  2230 + self.emb = theano.shared(w2vecs["vectors"].astype(theano.config.floatX))
  2231 + self.words2ids = w2vecs["words2ids"]
  2232 +
  2233 + #ne = len(w2vecs["words2ids"])
  2234 + de = w2vecs["vectors"].shape[1]
  2235 + del w2vecs
  2236 +
  2237 + r = 0.05
  2238 + self.W_e_eh = theano.shared(r * np.random.uniform(-1.0, 1.0, (de, neh)).astype(theano.config.floatX))
  2239 + self.W_eh_eh2 = theano.shared(r * np.random.uniform(-1.0, 1.0, (neh, neh2)).astype(theano.config.floatX))
  2240 + self.W_eh2_h = theano.shared(r * np.random.uniform(-1.0, 1.0, (neh2, ne)).astype(theano.config.floatX))
  2241 +
  2242 + self.W_sh_shh = theano.shared(r * np.random.uniform(-1.0, 1.0, (ne+nchd, nshh)).astype(theano.config.floatX))
  2243 + self.W_shh_shh2 = theano.shared(r * np.random.uniform(-1.0, 1.0, (nshh, nshh2)).astype(theano.config.floatX))
  2244 + self.W_shh2_h = theano.shared(r * np.random.uniform(-1.0, 1.0, (nshh2, nchd)).astype(theano.config.floatX))
  2245 +
  2246 + self.W_h_h2 = theano.shared(r * np.random.uniform(-1.0, 1.0, (ne+nchd, nh2)).astype(theano.config.floatX))
  2247 + self.W_h2_h3 = theano.shared(r * np.random.uniform(-1.0, 1.0, (nh2, nh3)).astype(theano.config.floatX))
  2248 + self.W_h3_h4 = theano.shared(r * np.random.uniform(-1.0, 1.0, (nh3, nh4)).astype(theano.config.floatX))
  2249 + self.W_h4_y = theano.shared(r * np.random.uniform(-1.0, 1.0, (nh4, nc)).astype(theano.config.floatX))
  2250 +
  2251 + self.b_y = theano.shared(r * np.random.uniform(-1.0, 1.0, nc).astype(theano.config.floatX))
  2252 +
  2253 +
  2254 + def one_step(word_id, word_children_positions, y_true, i, hidden_states, learning_rate):
  2255 +
  2256 + schh = hidden_states[-1] #+ 0.5 # czyli wektor zerowy
  2257 +
  2258 + tmp = word_children_positions>=0.0
  2259 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  2260 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  2261 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  2262 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  2263 + schh = schh/number_of_children
  2264 +
  2265 + eh = T.tanh(T.dot(self.emb[word_id],self.W_e_eh))
  2266 + eh2 = T.tanh(T.dot(eh,self.W_eh_eh2))
  2267 +
  2268 + shh = T.tanh(T.dot(schh,self.W_sh_shh))
  2269 + shh2 = T.tanh(T.dot(shh,self.W_shh_shh2))
  2270 +
  2271 +
  2272 + h = T.tanh(T.concatenate([T.dot(eh2, self.W_eh2_h), T.dot(shh2,self.W_shh2_h)]))
  2273 +
  2274 + h2 = T.tanh(T.dot(h, self.W_h_h2))
  2275 + h3 = T.tanh(T.dot(h2, self.W_h2_h3))
  2276 + h4 = T.tanh(T.dot(h3, self.W_h3_h4))
  2277 +
  2278 + current_hidden_state = hidden_states[i]
  2279 + hidden_states_new = T.set_subtensor(current_hidden_state, h)
  2280 +
  2281 + y_prob = T.nnet.softmax(T.dot(h4,self.W_h4_y) + self.b_y)[0]
  2282 +
  2283 + #l2_norm = T.sum(self.W_h_h2**2) + T.sum(self.W_h2_y**2) + T.sum(self.W_e_h**2) + T.sum(self.W_sh_h**2) + T.sum(self.emb**2) + T.sum(self.b_h**2) + T.sum(self.b_y**2)
  2284 +
  2285 + cross_entropy = -T.log(y_prob[y_true]) # + norm_coefficient * l2_norm
  2286 +
  2287 + return cross_entropy, hidden_states_new
  2288 +
  2289 +
  2290 + y = T.vector('y',dtype=dataType)
  2291 + learning_rate = T.scalar('lr',dtype=theano.config.floatX)
  2292 + words = T.vector(dtype=dataType)
  2293 + children_positions = T.matrix(dtype=dataType)
  2294 + words_indexes = T.vector(dtype=dataType)
  2295 +
  2296 + cross_entropy_vector, _ = theano.scan(fn=one_step, \
  2297 + sequences = [words, children_positions,y,words_indexes],
  2298 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,ne+nchd), dtype = theano.config.floatX))],
  2299 + non_sequences = learning_rate,
  2300 + n_steps = words.shape[0])
  2301 +
  2302 + cost = T.sum(cross_entropy_vector[0])
  2303 +
  2304 + updates = OrderedDict([
  2305 + (self.W_h_h2, self.W_h_h2-learning_rate*T.grad(cost, self.W_h_h2)),
  2306 + (self.W_h2_h3, self.W_h2_h3-learning_rate*T.grad(cost, self.W_h2_h3)),
  2307 + (self.W_h3_h4, self.W_h3_h4-learning_rate*T.grad(cost, self.W_h3_h4)),
  2308 + (self.W_h4_y, self.W_h4_y-learning_rate*T.grad(cost, self.W_h4_y)),
  2309 + (self.W_e_eh, self.W_e_eh-learning_rate*T.grad(cost, self.W_e_eh)),
  2310 + (self.W_eh_eh2, self.W_eh_eh2-learning_rate*T.grad(cost, self.W_eh_eh2)),
  2311 + (self.W_eh2_h, self.W_eh2_h-learning_rate*T.grad(cost, self.W_eh2_h)),
  2312 + (self.W_shh2_h, self.W_shh2_h-learning_rate*T.grad(cost, self.W_shh2_h)),
  2313 + (self.W_sh_shh, self.W_sh_shh-learning_rate*T.grad(cost, self.W_sh_shh)),
  2314 + (self.W_shh_shh2, self.W_shh_shh2-learning_rate*T.grad(cost, self.W_shh_shh2)),
  2315 + (self.emb, self.emb-learning_rate*T.grad(cost, self.emb)), #updated_current_emb), #
  2316 + (self.b_y, self.b_y-learning_rate*T.grad(cost,self.b_y))
  2317 + ])
  2318 +
  2319 + self.train = theano.function( inputs = [words, children_positions, y, words_indexes, learning_rate],
  2320 + outputs = [],
  2321 + updates = updates,
  2322 + allow_input_downcast=True,
  2323 + mode='FAST_RUN'
  2324 + )
  2325 +
  2326 +
  2327 + def one_step_classify(word_id, word_children_positions, i, hidden_states):
  2328 +
  2329 + schh = hidden_states[-1] #+ 0.5# czyli wektor zerowy
  2330 +
  2331 + tmp = word_children_positions>=0.0
  2332 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  2333 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  2334 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  2335 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  2336 + schh = schh/number_of_children
  2337 +
  2338 + eh = T.tanh(T.dot(self.emb[word_id],self.W_e_eh))
  2339 + eh2 = T.tanh(T.dot(eh,self.W_eh_eh2))
  2340 +
  2341 + shh = T.tanh(T.dot(schh,self.W_sh_shh))
  2342 + shh2 = T.tanh(T.dot(shh,self.W_shh_shh2))
  2343 +
  2344 +
  2345 + h = T.tanh(T.concatenate([T.dot(eh2, self.W_eh2_h), T.dot(shh2,self.W_shh2_h)]))
  2346 +
  2347 + h2 = T.tanh(T.dot(h, self.W_h_h2))
  2348 + h3 = T.tanh(T.dot(h2, self.W_h2_h3))
  2349 + h4 = T.tanh(T.dot(h3, self.W_h3_h4))
  2350 +
  2351 + current_hidden_state = hidden_states[i]
  2352 + hidden_states_new = T.set_subtensor(current_hidden_state, h)
  2353 +
  2354 + y_prob = T.nnet.softmax(T.dot(h4,self.W_h4_y) + self.b_y)[0]
  2355 +
  2356 +
  2357 + return y_prob, hidden_states_new
  2358 +
  2359 +
  2360 + [y_probs_classify, hidden_states ], _ = theano.scan(
  2361 + fn=one_step_classify,
  2362 + sequences = [words, children_positions,words_indexes],
  2363 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,ne+nchd), dtype = theano.config.floatX))])
  2364 +
  2365 + predictions, _ = theano.scan(lambda i: T.argmax(y_probs_classify[i]),
  2366 + sequences = [words_indexes])
  2367 +
  2368 + self.classify = theano.function(inputs=[words,children_positions,words_indexes],
  2369 + outputs=predictions,
  2370 + allow_input_downcast=True,
  2371 + mode='FAST_RUN'
  2372 + )
  2373 +
  2374 +
... ...
modules/rnn/models.pyc 0 → 100644
No preview for this file type
modules/rnn/models_with_relations.py 0 → 100644
  1 +
  2 +import numpy as np
  3 +import time
  4 +import sys
  5 +import subprocess
  6 +import os
  7 +import random
  8 +
  9 +#from modules.data import load
  10 +#from modules.rnn.many_models import *
  11 +#from modules.metrics.accuracy import conlleval
  12 +from modules.utils.tools import load_stanford_data4
  13 +
  14 +from theano import pp
  15 +
  16 +import theano.tensor as T
  17 +import theano
  18 +from theano.sandbox.rng_mrg import MRG_RandomStreams #as MRG_RandomStreams
  19 +
  20 +import itertools
  21 +
  22 +import os.path
  23 +import pickle
  24 +
  25 +from collections import Counter
  26 +
  27 +
  28 +
  29 +from theano import tensor as T, printing
  30 +from collections import OrderedDict
  31 +from theano.ifelse import ifelse
  32 +
  33 +from keras.preprocessing import sequence as seq
  34 +
  35 +dataType = 'int64'
  36 +
  37 +
  38 +
  39 +
  40 +class MLP_2_1(object):
  41 +
  42 + # punkt wyjscia modelu: model55_pf1
  43 +
  44 + def __init__(self, ne, nchd, nc, w2v_model_path, max_phrase_length, number_of_relations):
  45 + '''
  46 + nh :: dimension of hidden state
  47 + nc :: number of classes
  48 + de :: dimension of the word embeddings
  49 + ds :: dimension of the sentiment state
  50 + '''
  51 + self.max_phrase_length = max_phrase_length
  52 + w2vecs = pickle.load(open(w2v_model_path,"r"))
  53 + self.emb = theano.shared(w2vecs["vectors"].astype(theano.config.floatX))
  54 + self.words2ids = w2vecs["words2ids"]
  55 +
  56 + #ne = len(w2vecs["words2ids"])
  57 + de = w2vecs["vectors"].shape[1]
  58 + del w2vecs
  59 +
  60 + r = 0.05
  61 + self.W_e_h = theano.shared(r * np.random.uniform(-1.0, 1.0, (de, ne)).astype(theano.config.floatX))
  62 + self.W_h_y = theano.shared(r * np.random.uniform(-1.0, 1.0, (ne+nchd, nc)).astype(theano.config.floatX))
  63 +
  64 + self.W_sh_h = theano.shared(r * np.random.uniform(-1.0, 1.0, (ne+nchd, nchd)).astype(theano.config.floatX))
  65 +
  66 + self.b_y = theano.shared(r * np.random.uniform(-1.0, 1.0, nc).astype(theano.config.floatX))
  67 +
  68 + self.relations_weights = theano.shared(r * np.random.uniform(-1.0, 1.0, (number_of_relations+1, ne+nchd, ne+nchd)).astype(theano.config.floatX))
  69 +
  70 + def one_step(word_id, word_children_positions, y_true, relation, i, hidden_states, learning_rate):
  71 +
  72 + schh = hidden_states[-1] #+ 0.5 # czyli wektor zerowy
  73 +
  74 + tmp = word_children_positions>=0.0
  75 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  76 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  77 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  78 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  79 + schh = schh/number_of_children
  80 +
  81 + h = T.tanh(T.concatenate([T.dot(self.emb[word_id],self.W_e_h), T.dot(schh,self.W_sh_h)]))
  82 +
  83 + current_hidden_state = hidden_states[i]
  84 + hidden_states_new = T.set_subtensor(current_hidden_state, T.dot(h, self.relations_weights[relation,:,:]))
  85 +
  86 + y_prob = T.nnet.softmax(T.dot(h,self.W_h_y) + self.b_y)[0]
  87 +
  88 + #l2_norm = T.sum(self.W_h_h2**2) + T.sum(self.W_h2_y**2) + T.sum(self.W_e_h**2) + T.sum(self.W_sh_h**2) + T.sum(self.emb**2) + T.sum(self.b_h**2) + T.sum(self.b_y**2)
  89 +
  90 + cross_entropy = -T.log(y_prob[y_true]) # + norm_coefficient * l2_norm
  91 +
  92 + return cross_entropy, hidden_states_new
  93 +
  94 +
  95 + y = T.vector('y',dtype=dataType)
  96 + learning_rate = T.scalar('lr',dtype=theano.config.floatX)
  97 + words = T.vector(dtype=dataType)
  98 + children_positions = T.matrix(dtype=dataType)
  99 +
  100 + relations = T.vector(dtype=dataType)
  101 +
  102 + words_indexes = T.vector(dtype=dataType)
  103 +
  104 + cross_entropy_vector, _ = theano.scan(fn=one_step, \
  105 + sequences = [words, children_positions,y, relations, words_indexes],
  106 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,ne+nchd), dtype = theano.config.floatX))],
  107 + non_sequences = learning_rate,
  108 + n_steps = words.shape[0])
  109 + cost = T.sum(cross_entropy_vector[0])
  110 +
  111 + updates = OrderedDict([
  112 + (self.W_h_y, self.W_h_y-learning_rate*T.grad(cost, self.W_h_y)),
  113 + (self.W_e_h, self.W_e_h-learning_rate*T.grad(cost, self.W_e_h)),
  114 + (self.W_sh_h, self.W_sh_h-learning_rate*T.grad(cost, self.W_sh_h)),
  115 + (self.relations_weights, self.relations_weights - learning_rate*T.grad(cost,self.relations_weights)),
  116 + (self.emb, self.emb-learning_rate*T.grad(cost, self.emb)), #updated_current_emb), #
  117 + (self.b_y, self.b_y-learning_rate*T.grad(cost,self.b_y))
  118 + ])
  119 +
  120 + self.train = theano.function( inputs = [words, children_positions, y, relations, words_indexes, learning_rate],
  121 + outputs = [],
  122 + updates = updates,
  123 + allow_input_downcast=True,
  124 + mode='FAST_RUN'
  125 + )
  126 +
  127 +
  128 + def one_step_classify(word_id, word_children_positions, relation, i, hidden_states):
  129 +
  130 + schh = hidden_states[-1] #+ 0.5# czyli wektor zerowy
  131 +
  132 + tmp = word_children_positions>=0.0
  133 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  134 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  135 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  136 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  137 + schh = schh/number_of_children
  138 +
  139 + h = T.tanh(T.concatenate([T.dot(self.emb[word_id],self.W_e_h), T.dot(schh,self.W_sh_h)]) )
  140 +
  141 + current_hidden_state = hidden_states[i]
  142 + hidden_states_new = T.set_subtensor(current_hidden_state, T.dot(h, self.relations_weights[relation,:,:]))
  143 +
  144 + y_prob = T.nnet.softmax(T.dot(h,self.W_h_y) + self.b_y)[0]
  145 +
  146 + return y_prob, hidden_states_new
  147 +
  148 +
  149 + [y_probs_classify, hidden_states ], _ = theano.scan(
  150 + fn=one_step_classify,
  151 + sequences = [words, children_positions, relations, words_indexes],
  152 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,ne+nchd), dtype = theano.config.floatX))])
  153 +
  154 + predictions, _ = theano.scan(lambda i: T.argmax(y_probs_classify[i]),
  155 + sequences = [words_indexes])
  156 +
  157 + self.classify = theano.function(inputs=[words,children_positions,relations, words_indexes],
  158 + outputs=predictions,
  159 + allow_input_downcast=True,
  160 + mode='FAST_RUN'
  161 + )
  162 +
  163 +
  164 +
  165 +class MLP_2_2(object):
  166 +
  167 + # punkt wyjscia modelu: model55_pf2
  168 +
  169 + def __init__(self, ne, nchd, nh2, nc, w2v_model_path, max_phrase_length, number_of_relations):
  170 + '''
  171 + nh :: dimension of hidden state
  172 + nc :: number of classes
  173 + de :: dimension of the word embeddings
  174 + ds :: dimension of the sentiment state
  175 + '''
  176 + self.max_phrase_length = max_phrase_length
  177 + w2vecs = pickle.load(open(w2v_model_path,"r"))
  178 + self.emb = theano.shared(w2vecs["vectors"].astype(theano.config.floatX))
  179 + self.words2ids = w2vecs["words2ids"]
  180 +
  181 + de = w2vecs["vectors"].shape[1]
  182 + del w2vecs
  183 +
  184 + r = 0.05
  185 +
  186 + self.W_e_h = theano.shared(r * np.random.uniform(-1.0, 1.0, (de, ne)).astype(theano.config.floatX))
  187 +
  188 + self.W_sh_h = theano.shared(r * np.random.uniform(-1.0, 1.0, (ne+nchd, nchd)).astype(theano.config.floatX))
  189 +
  190 + self.W_h_h2 = theano.shared(r * np.random.uniform(-1.0, 1.0, (ne+nchd, nh2)).astype(theano.config.floatX))
  191 + self.W_h2_y = theano.shared(r * np.random.uniform(-1.0, 1.0, (nh2, nc)).astype(theano.config.floatX))
  192 +
  193 + self.b_y = theano.shared(r * np.random.uniform(-1.0, 1.0, nc).astype(theano.config.floatX))
  194 +
  195 + self.relations_weights = theano.shared(r * np.random.uniform(-1.0, 1.0, (number_of_relations+1, ne+nchd, ne+nchd)).astype(theano.config.floatX))
  196 +
  197 + def one_step(word_id, word_children_positions, y_true, relation, i, hidden_states, learning_rate):
  198 +
  199 + schh = hidden_states[-1] #+ 0.5 # czyli wektor zerowy
  200 +
  201 + tmp = word_children_positions>=0.0
  202 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  203 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  204 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  205 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  206 + schh = schh/number_of_children
  207 +
  208 + h = T.tanh(T.concatenate([T.dot(self.emb[word_id],self.W_e_h), T.dot(schh,self.W_sh_h)]))
  209 + #h = T.nnet.sigmoid(T.dot(self.emb[word_id],self.W_eh) + T.dot(schh,self.W_shsh) + self.bh)
  210 +
  211 + current_hidden_state = hidden_states[i]
  212 + hidden_states_new = T.set_subtensor(current_hidden_state, T.dot(h, self.relations_weights[relation,:,:]))
  213 +
  214 + h2 = T.tanh(T.dot(h, self.W_h_h2))
  215 +
  216 + y_prob = T.nnet.softmax(T.dot(h2,self.W_h2_y) + self.b_y)[0]
  217 +
  218 + cross_entropy = -T.log(y_prob[y_true]) # + norm_coefficient * l2_norm
  219 +
  220 + return cross_entropy, hidden_states_new
  221 +
  222 +
  223 + y = T.vector('y',dtype=dataType)
  224 + learning_rate = T.scalar('lr',dtype=theano.config.floatX)
  225 + words = T.vector(dtype=dataType)
  226 + children_positions = T.matrix(dtype=dataType)
  227 + relations = T.vector(dtype=dataType)
  228 + words_indexes = T.vector(dtype=dataType)
  229 +
  230 + cross_entropy_vector, _ = theano.scan(fn=one_step, \
  231 + sequences = [words, children_positions,y,relations,words_indexes],
  232 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,ne+nchd), dtype = theano.config.floatX))],
  233 + non_sequences = learning_rate,
  234 + n_steps = words.shape[0])
  235 + cost = T.sum(cross_entropy_vector[0])
  236 +
  237 + updates = OrderedDict([
  238 + (self.W_h_h2, self.W_h_h2-learning_rate*T.grad(cost, self.W_h_h2)),
  239 + (self.W_h2_y, self.W_h2_y-learning_rate*T.grad(cost, self.W_h2_y)),
  240 + (self.W_e_h, self.W_e_h-learning_rate*T.grad(cost, self.W_e_h)),
  241 + (self.W_sh_h, self.W_sh_h-learning_rate*T.grad(cost, self.W_sh_h)),
  242 + (self.relations_weights, self.relations_weights - learning_rate*T.grad(cost,self.relations_weights)),
  243 + (self.emb, self.emb-learning_rate*T.grad(cost, self.emb)), #updated_current_emb), #
  244 + (self.b_y, self.b_y-learning_rate*T.grad(cost,self.b_y))
  245 + ])
  246 +
  247 + self.train = theano.function( inputs = [words, children_positions, y, relations, words_indexes, learning_rate],
  248 + outputs = [],
  249 + updates = updates,
  250 + allow_input_downcast=True,
  251 + mode='FAST_RUN'
  252 + )
  253 +
  254 +
  255 + def one_step_classify(word_id, word_children_positions, relation, i, hidden_states):
  256 +
  257 + schh = hidden_states[-1] #+ 0.5# czyli wektor zerowy
  258 +
  259 + tmp = word_children_positions>=0.0
  260 + idx_tmp = tmp.nonzero() #indeksy realne dzieci - czyli te, gdzie nie ma -1
  261 + schh = schh + hidden_states[word_children_positions[idx_tmp]].sum(axis=0) #suma stanow ukrytych dzieci
  262 + number_of_children = tmp.sum(dtype = theano.config.floatX)
  263 + number_of_children = ifelse( T.gt(number_of_children, 1.0),number_of_children, 1.0)
  264 + schh = schh/number_of_children
  265 +
  266 + h = T.tanh(T.concatenate([T.dot(self.emb[word_id],self.W_e_h), T.dot(schh,self.W_sh_h)]))
  267 +
  268 + current_hidden_state = hidden_states[i]
  269 + hidden_states_new = T.set_subtensor(current_hidden_state, T.dot(h, self.relations_weights[relation,:,:]))
  270 +
  271 + h2 = T.tanh(T.dot(h, self.W_h_h2))
  272 +
  273 + y_prob = T.nnet.softmax(T.dot(h2,self.W_h2_y) + self.b_y)[0]
  274 +
  275 + return y_prob, hidden_states_new
  276 +
  277 +
  278 + [y_probs_classify, hidden_states ], _ = theano.scan(
  279 + fn=one_step_classify,
  280 + sequences = [words, children_positions, relations, words_indexes],
  281 + outputs_info = [None, theano.shared(np.zeros((self.max_phrase_length+1,ne+nchd), dtype = theano.config.floatX))])
  282 +
  283 + predictions, _ = theano.scan(lambda i: T.argmax(y_probs_classify[i]),
  284 + sequences = [words_indexes])
  285 +
  286 + self.classify = theano.function(inputs=[words,children_positions,relations,words_indexes],
  287 + outputs=predictions,
  288 + allow_input_downcast=True,
  289 + mode='FAST_RUN'
  290 + )
  291 +
  292 +
  293 +
  294 +
... ...
modules/rnn/models_with_relations.pyc 0 → 100644
No preview for this file type
modules/rnn/tmp.py deleted
1   -import theano
2   -import numpy as np
3   -import os
4   -import pickle
5   -
6   -from theano import tensor as T, printing
7   -from collections import OrderedDict
8   -from theano.ifelse import ifelse
9   -
10   -theano.config.floatX = 'float64'
11   -dataType = 'int64'
12   -
13   -class model(object):
14   -
15   - def __init__(self, nh, nc, ds, w2v_model_path, max_phrase_length):
16   - '''
17   - nh :: dimension of hidden state
18   - nc :: number of classes
19   - ne :: number of word embeddings in the vocabulary
20   - de :: dimension of the word embeddings
21   -
22   - ds :: dimension of the sentiment state
23   - '''
24   -
25   -
26   -
27   - self.max_phrase_length = max_phrase_length
28   -
29   - ###ne = len(model.index2word)
30   - ###de = model.vector_size
31   -
32   - ###vectors = np.zeros((ne,de))
33   - ###self.words2ids = {}
34   - ###for i in range(len(model.index2word)):
35   - ### self.words2ids[model.index2word[i]] = i
36   - ### vectors[i] = model[model.index2word[i]]
37   -
38   - w2vecs = pickle.load(open(w2v_model_path,"r"))
39   - #self.emb = theano.shared(w2vecs["vectors"].astype(theano.config.floatX))
40   - self.emb = theano.shared(np.load("saved_models4/embeddings.npy").astype(theano.config.floatX))
41   - self.words2ids = w2vecs["words2ids"]
42   -
43   - ne = len(w2vecs["words2ids"])
44   - de = w2vecs["vectors"].shape[1]
45   -
46   - del w2vecs
47   -
48   - #self.words2ids = {}
49   - #vectors = []
50   - #i = 0
51   - #for line in open(w2v_model_path,"r"):
52   - # toks = line.strip("\n").split(" ")
53   - # word = toks[0]
54   - # v = map(float, toks[1:])
55   - # vectors.append(v)
56   - # self.words2ids[word] = i
57   - # i = i + 1
58   - #vectors.append(np.zeros((len(vectors[0]))))
59   - #vectors = np.array(vectors)
60   - #print(vectors.shape)
61   - #self.emb = theano.shared(vectors.astype(theano.config.floatX))
62   -
63   - #ne = i
64   - #de = len(vectors[0])
65   -
66   - #bedzie trzeba obsluzyc przypadek, gdy slowo w danych nie ma embeddina w modelu
67   -
68   - ###del model
69   - #del vectors
70   -
71   - #self.sent_states = theano.shared(0.2 * np.concatenate((
72   - # np.random.uniform(-1.0, 1.0,(ne, ds)),np.zeros((1,ds))),axis=0).astype(theano.config.floatX))
73   - # dodajemy jeden wektor zerowy potrzebny dla wyznaczenia sumy
74   - # dzieci z liscii (czyli lisc symbolicznie ma dziecko bedace nullem - i to ma zerowy sentyment)
75   - # uzyc go tez do reprezentacji rzadkich slow na zbiorze treningowym?
76   - # porownac dzialanie: 1) przyjecie wektora zerowego dla nowych slow w zbiorze tren; 2) wziecie wartosci ze slowa najbardziej podobnego wzgledem embeddingu wystepujacego w zbiorze tren
77   - # trzeba bedzie to uwzglednic w stosowaniu sieci
78   -
79   - r = 0.05
80   -
81   -
82   - #self.W_e_h = theano.shared(r * np.random.uniform(-1.0, 1.0,\
83   - # (de, nh)).astype(theano.config.floatX))
84   - self.W_e_h = theano.shared(np.load("saved_models4/W_eh25.npy").astype(theano.config.floatX))
85   -
86   - self.W_sh = theano.shared(r * np.random.uniform(-1.0, 1.0,\
87   - (ds, nh)).astype(theano.config.floatX))
88   -
89   - #self.W_h2_y = theano.shared(r * np.random.uniform(-1.0, 1.0,\
90   - # (2*nh, nc)).astype(theano.config.floatX))
91   - self.W_h2_y = theano.shared(np.load("saved_models4/W_hh225.npy").astype(theano.config.floatX))
92   -
93   - #self.W_h_h2 = theano.shared(r * np.random.uniform(-1.0, 1.0,\
94   - # (2*nh, 2*nh)).astype(theano.config.floatX))
95   - self.W_h_h2 = theano.shared(np.load("saved_models4/W_h2y25.npy").astype(theano.config.floatX))
96   -
97   - self.W_ssy = theano.shared(r * np.random.uniform(-1.0, 1.0,\
98   - (ds, nc)).astype(theano.config.floatX))
99   -
100   - #self.W_sh_h = theano.shared(r * np.random.uniform(-1.0, 1.0,\
101   - # (2*nh, nh)).astype(theano.config.floatX))
102   - self.W_sh_h = theano.shared(np.load("saved_models4/W_shsh25.npy").astype(theano.config.floatX))
103   -
104   - self.bh = theano.shared(np.zeros(nh, dtype=theano.config.floatX))
105   - self.b = theano.shared(np.zeros(nc, dtype=theano.config.floatX))
106   -
107   -
108   - # bundle
109   - self.params = [ self.W_h2_y, self.W_h_h2, self.W_e_h, self.W_sh_h,self.emb]#, self.bh, self.b ]
110   - self.names = [ "W_hh2", 'W_h2y', 'W_eh', 'W_shsh', "embeddings"]#, 'bh', 'b']#, 'h0']
111   -
112   -
113   - # liczy sentyment obecnego slowa / predykcja
114   - # word_id = obecne slowo
115   - # i = indeks w zdaniu slowa word_id
116   - # word_children_ids = id-ki dzieci obecnego slowa
117   - # word_children_positions = pozycje word_children_ids
118   - def one_step(word_id, word_children_ids, word_children_positions, i, hidden_states):
119   -
120   -
121   -
122   - idx_tmp = (word_children_positions>=0).nonzero()
123   - tmp = T.zeros_like(word_children_positions)
124   - tmp2 = T.set_subtensor(tmp[idx_tmp], 1)
125   - number_of_children = tmp2.sum()
126   -
127   - #pnoc = theano.printing.Print('Number of children: ')
128   - #printed_number_of_children = pnoc(number_of_children)
129   -
130   -
131   - # sprobowac zamiast zer, wstawic wektor wartosci 0.5
132   - schh = hidden_states[word_children_positions].sum(axis=0) /( number_of_children + 0.000001) #dodane 0..1, zeby nie bawic sie w ify, gdy nie ma dzieci (wtedy suma i tak jest zero, wiece dzielenie nie ma znaczenia)
133   - h = T.concatenate([T.dot(self.emb[word_id],self.W_e_h), T.dot(schh,self.W_sh_h)]) # bez biasa i sigmoida
134   -
135   - #h = T.nnet.sigmoid(T.dot(self.emb[word_id],self.W_eh) + T.dot(schh,self.W_shsh) + self.bh)
136   -
137   - #h_s = T.zeros_like(hidden_states)
138   - #zeros_subtensor = h_s[i]
139   - #new_h_s = T.set_subtensor(zeros_subtensor, h)
140   -
141   - zeros_subtensor = hidden_states[i]
142   - hidden_states_new = T.set_subtensor(zeros_subtensor, h)
143   -
144   - h2 = T.dot(h, self.W_h_h2)
145   -
146   - y_prob = T.nnet.softmax(T.dot(h2,self.W_h2_y))# + self.b)
147   -
148   -
149   - # powyzsze jest niezbyt sensownie zrobione, bo jesli jest kilka "-1" w dzieciach to tyle razy jest dodawany ten wektor
150   - # czy da sie to zamienic na petle, zeby nie dodawac wektora -1 kilka razy?
151   - # w tej chwili to nie ma znaczenia, bo ten wektor i tak jest stale rowny 0 - nie zmienia sie podczas uczenia
152   -
153   - return i+1, hidden_states_new, y_prob
154   -
155   -
156   -
157   - words = T.vector(dtype=dataType)
158   - children_ids = T.matrix(dtype=dataType)
159   - children_positions = T.matrix(dtype=dataType)
160   -
161   - y_probs, _ = theano.scan(fn=one_step, \
162   - sequences = [words, children_ids, children_positions],
163   - outputs_info = [theano.shared(0),
164   - theano.shared(np.zeros((self.max_phrase_length+1,2*nh), dtype = theano.config.floatX)),
165   - None],
166   - n_steps = words.shape[0])
167   -
168   -
169   - estimated_probs = y_probs[-1][-1][0]
170   -
171   - y_pred = T.argmax(estimated_probs) # y_probs[-1][-1][0] zwraca wektor [P(y=0), P(y=1)] -> argmax zwraca predykce klasy
172   - # dostajemy sie do predykcji dla ostatniego slowa, a klasyfikacja ostatniego slowa odpowiada klasyfikacji frazy,
173   - # bo slowa sa ustawione w takiej kolejnosci, ze korzen jest ostatnim slowem
174   -
175   -
176   - y = T.scalar('y',dtype=dataType)
177   -
178   - # cost and gradients and learning rate
179   - lr = T.scalar('lr',dtype=theano.config.floatX)
180   -
181   - nll = -T.log(estimated_probs)[y] #to samo co (sprawdzone):
182   - #nll = T.nnet.nnet.categorical_crossentropy(estimated_probs,T.extra_ops.to_one_hot(y.dimshuffle('x'), 5)[0])
183   -
184   - gradients = T.grad( nll, self.params )
185   - updates = OrderedDict(( p, p-lr*g ) for p, g in zip( self.params , gradients))
186   -
187   - # uwaga: ostani rzad macierzy sent_states - wektor odpowiadajacy dziecku, ktorego nie ma - jest stale rowny zero
188   -
189   -
190   - # theano functions
191   - self.classify = theano.function(inputs=[words,children_ids,children_positions], outputs=y_pred,
192   - allow_input_downcast=True,
193   - mode='FAST_RUN' )
194   -
195   - self.train = theano.function( inputs = [words,children_ids, children_positions, y, lr],
196   - outputs = nll,
197   - updates = updates,
198   - allow_input_downcast=True,
199   - mode='FAST_RUN' )
200   -
201   -
202   - #self.normalize = theano.function( inputs = [], #uwazac na dzielenie przez 0 - ostatni wiersz sent_states jest zerowy
203   - # updates = {self.sent_states:\
204   - # self.sent_states/T.sqrt((self.sent_states**2).sum(axis=1))})#.dimshuffle(0,'x')})
205   -
206   - def save(self, folder, e):
207   - for param, name in zip(self.params, self.names):
208   - np.save(os.path.join(folder, name + str(e) + '.npy'), param.get_value())
209   -
210   -
211   -
212   -
213   -
214   -
215   -class model2(object):
216   -
217   - '''
218   -
219   - '''
220   -
221   -
222   - def __init__(self, nh, nc, ds, w2v_model_path, max_phrase_length):
223   - '''
224   - nh :: dimension of hidden state
225   - nc :: number of classes
226   - ne :: number of word embeddings in the vocabulary
227   - de :: dimension of the word embeddings
228   -
229   - ds :: dimension of the sentiment state
230   - '''
231   -
232   - self.max_phrase_length = max_phrase_length
233   -
234   - w2vecs = pickle.load(open(w2v_model_path,"r"))
235   -
236   - self.emb = theano.shared(w2vecs["vectors"].astype(theano.config.floatX))
237   - #self.emb = theano.shared(np.load("saved_models_final1/embeddings"+str(e)+"_200.npy").astype(theano.config.floatX))
238   -
239   - self.words2ids = w2vecs["words2ids"]
240   -
241   - ne = len(w2vecs["words2ids"])
242   - de = w2vecs["vectors"].shape[1]
243   -
244   - del w2vecs
245   -
246   - #self.sent_states = theano.shared(0.2 * np.concatenate((
247   - # np.random.uniform(-1.0, 1.0,(ne, ds)),np.zeros((1,ds))),axis=0).astype(theano.config.floatX))
248   - # dodajemy jeden wektor zerowy potrzebny dla wyznaczenia sumy
249   - # dzieci z liscii (czyli lisc symbolicznie ma dziecko bedace nullem - i to ma zerowy sentyment)
250   - # uzyc go tez do reprezentacji rzadkich slow na zbiorze treningowym?
251   - # porownac dzialanie: 1) przyjecie wektora zerowego dla nowych slow w zbiorze tren; 2) wziecie wartosci ze slowa najbardziej podobnego wzgledem embeddingu wystepujacego w zbiorze tren
252   - # trzeba bedzie to uwzglednic w stosowaniu sieci
253   -
254   - r = 0.05
255   -
256   -
257   - self.W_e_h = theano.shared(r * np.random.uniform(-1.0, 1.0,\
258   - (de, nh)).astype(theano.config.floatX))
259   - #self.W_e_h = theano.shared(np.load("saved_models_final1/W_eh"+str(e)+"_200.npy").astype(theano.config.floatX))
260   -
261   - self.W_sh = theano.shared(r * np.random.uniform(-1.0, 1.0,\
262   - (ds, nh)).astype(theano.config.floatX))
263   -
264   - self.W_h2_y = theano.shared(r * np.random.uniform(-1.0, 1.0,\
265   - (2*nh, nc)).astype(theano.config.floatX))
266   - #self.W_h2_y = theano.shared(np.load("saved_models_final1/W_h2y"+str(e)+"_200.npy").astype(theano.config.floatX))
267   -
268   - self.W_h_h2 = theano.shared(r * np.random.uniform(-1.0, 1.0,\
269   - (2*nh, 2*nh)).astype(theano.config.floatX))
270   - #self.W_h_h2 = theano.shared(np.load("saved_models_final1/W_hh2"+str(e)+"_200.npy").astype(theano.config.floatX))
271   -
272   - self.W_ssy = theano.shared(r * np.random.uniform(-1.0, 1.0,\
273   - (ds, nc)).astype(theano.config.floatX))
274   -
275   - self.W_sh_h = theano.shared(r * np.random.uniform(-1.0, 1.0,\
276   - (2*nh, nh)).astype(theano.config.floatX))
277   - #self.W_sh_h = theano.shared(np.load("saved_models_final1/W_shsh"+str(e)+"_200.npy").astype(theano.config.floatX))
278   -
279   -
280   - self.W_h_y = theano.shared(r * np.random.uniform(-1.0, 1.0,\
281   - (2*nh, nc)).astype(theano.config.floatX))
282   -
283   - self.bh = theano.shared(np.zeros(nh, dtype=theano.config.floatX))
284   - self.b = theano.shared(np.zeros(nc, dtype=theano.config.floatX))
285   -
286   -
287   - # bundle
288   - self.params = [ self.W_h_y, self.W_e_h, self.W_sh_h, self.emb]# self.W_h2_y, self.W_h_h2,
289   - self.names = [ "W_h_y", 'W_eh', 'W_shsh', "embeddings"]# 'W_h2y', "W_hh2",
290   -
291   -
292   - shared_zero = theano.shared(0)
293   - shared_one = theano.shared(1)
294   -
295   - # liczy sentyment obecnego slowa / predykcja
296   - # word_id = obecne slowo
297   - # i = indeks w zdaniu slowa word_id
298   - # word_children_ids = id-ki dzieci obecnego slowa
299   - # word_children_positions = pozycje word_children_ids
300   - def one_step(word_id, word_children_ids, word_children_positions, y_true, i, hidden_states, learning_rate):
301   -
302   - p = printing.Print('word_children_positions: ')
303   - word_children_positions = p(word_children_positions)
304   -
305   -
306   - idx_tmp = (word_children_positions>=0).nonzero()
307   - tmp = T.zeros_like(word_children_positions)
308   - tmp2 = T.set_subtensor(tmp[idx_tmp], 1)
309   - number_of_children = tmp2.sum(dtype = dataType)
310   -
311   - number_of_children = ifelse(T.eq(number_of_children, shared_zero), shared_one, number_of_children)
312   - # sprobowac zamiast zer, wstawic wektor wartosci 0.5
313   -
314   - hello_world_op = printing.Print('number_of_children: ')
315   - number_of_children = hello_world_op(number_of_children)
316   -
317   -
318   - schh = hidden_states[word_children_positions].sum(axis=0) / number_of_children#( number_of_children + 0.000001)
319   -#dodane 0..1, zeby nie bawic sie w ify, gdy nie ma dzieci (wtedy suma i tak jest zero, wiece dzielenie nie ma znaczenia)
320   - h = T.concatenate([T.dot(self.emb[word_id],self.W_e_h), T.dot(schh,self.W_sh_h)]) # bez biasa i sigmoida
321   -
322   - #h = T.nnet.sigmoid(T.dot(self.emb[word_id],self.W_eh) + T.dot(schh,self.W_shsh) + self.bh)
323   -
324   - #h_s = T.zeros_like(hidden_states)
325   - #zeros_subtensor = h_s[i]
326   - #new_h_s = T.set_subtensor(zeros_subtensor, h)
327   -
328   - zeros_subtensor = hidden_states[i]
329   - hidden_states_new = T.set_subtensor(zeros_subtensor, h)
330   -
331   - #h2 = T.dot(h, self.W_h_h2)
332   -
333   - #y_prob = T.nnet.softmax(T.dot(h2,self.W_h2_y))# + self.b)
334   -
335   - y_prob = T.nnet.softmax(T.dot(h,self.W_h_y))# + self.b)
336   -
337   - cce = -T.log(y_prob[0][y_true])
338   -
339   - #learning_rate = 0.01
340   -
341   - updates = OrderedDict([#(self.W_h2_y, self.W_h2_y-learning_rate*T.grad(cce, self.W_h2_y)),
342   - (self.W_h_y, self.W_h_y-learning_rate*T.grad(cce, self.W_h_y)),
343   - #(self.W_h_h2, self.W_h_h2-learning_rate*T.grad(cce, self.W_h_h2)),
344   - (self.W_e_h, self.W_e_h-learning_rate*T.grad(cce, self.W_e_h)),
345   - (self.W_sh_h, self.W_sh_h-learning_rate*T.grad(cce, self.W_sh_h)),
346   - (self.emb, self.emb-learning_rate*T.grad(cce, self.emb))
347   - ])
348   -
349   -
350   - return (i+1,hidden_states_new, y_prob), updates
351   -
352   -
353   -
354   -
355   - y = T.vector('y',dtype=dataType)
356   -
357   - lr = T.scalar('lr',dtype=theano.config.floatX)
358   -
359   - words = T.vector(dtype=dataType)
360   - children_ids = T.matrix(dtype=dataType)
361   - children_positions = T.matrix(dtype=dataType)
362   - #words_indexes = T.vector(dtype=dataType)
363   -
364   - y_probs, upd = theano.scan(fn=one_step, \
365   - sequences = [words, children_ids, children_positions,y],#,words_indexes],
366   - outputs_info = [theano.shared(0),
367   - theano.shared(np.zeros((self.max_phrase_length+1,2*nh), dtype = theano.config.floatX)),
368   - None],
369   - non_sequences = lr,
370   - n_steps = words.shape[0])
371   -
372   -
373   - def one_step_classify(word_id, word_children_ids, word_children_positions, i, hidden_states):
374   -
375   -
376   - idx_tmp = (word_children_positions>=0).nonzero()
377   - tmp = T.zeros_like(word_children_positions)
378   - tmp2 = T.set_subtensor(tmp[idx_tmp], 1)
379   - number_of_children = tmp2.sum()
380   -
381   - schh = hidden_states[word_children_positions].sum(axis=0) / ifelse(T.eq(number_of_children, shared_zero), shared_one, number_of_children)
382   - h = T.concatenate([T.dot(self.emb[word_id],self.W_e_h), T.dot(schh,self.W_sh_h)]) # bez biasa i sigmoida
383   -
384   - zeros_subtensor = hidden_states[i]
385   - hidden_states_new = T.set_subtensor(zeros_subtensor, h)
386   -
387   - #h2 = T.dot(h, self.W_h_h2)
388   - #y_prob = T.nnet.softmax(T.dot(h2,self.W_h2_y))# + self.b)
389   - y_prob = T.nnet.softmax(T.dot(h,self.W_h_y))
390   -
391   - return i+1, hidden_states_new, y_prob
392   -
393   -
394   -
395   - y_probs_classify, _ = theano.scan(fn=one_step_classify, \
396   - sequences = [words, children_ids, children_positions],
397   - outputs_info = [theano.shared(0),
398   - theano.shared(np.zeros((self.max_phrase_length+1,2*nh), dtype = theano.config.floatX)),
399   - None],
400   - n_steps = words.shape[0])
401   -
402   -
403   -
404   -
405   - predictions, _ = theano.scan(lambda i: (i+1, T.argmax(y_probs_classify[2][i][0])), outputs_info = [theano.shared(0), None], n_steps = y_probs_classify[2].shape[0])
406   -
407   - #res2 , _ = theano.scan(lambda x,i : (i+1, T.argmax(x)),
408   - # sequences = [estimated_probs[1]],
409   - # outputs_info = [theano.shared(0), None]
410   - # )
411   -
412   -
413   -# minus_log_true_class_prob = res[1]
414   - #prediction_class = res2[1]
415   -
416   -
417   -# nll = minus_log_true_class_prob.sum()
418   -
419   - #y_pred = T.argmax(estimated_probs) # y_probs[-1][-1][0] zwraca wektor [P(y=0), P(y=1), ...] -> argmax zwraca predykce klasy
420   - # dostajemy sie do predykcji dla ostatniego slowa, a klasyfikacja ostatniego slowa odpowiada klasyfikacji frazy,
421   - # bo slowa sa ustawione w takiej kolejnosci, ze korzen jest ostatnim slowem
422   -
423   -
424   -
425   -
426   - # cost and gradients and learning rate
427   - #nll = -T.log(estimated_probs[1])[y] #to samo co (sprawdzone):
428   - #nll = T.nnet.nnet.categorical_crossentropy(estimated_probs,T.extra_ops.to_one_hot(y.dimshuffle('x'), 5)[0])
429   -
430   -# gradients = T.grad( nll, self.params )
431   -# updates = OrderedDict(( p, p-lr*g ) for p, g in zip( self.params , gradients))
432   -
433   - # uwaga: ostani rzad macierzy sent_states - wektor odpowiadajacy dziecku, ktorego nie ma - jest stale rowny zero
434   -
435   -
436   - # theano functions
437   - self.classify = theano.function(inputs=[words,children_ids,children_positions], outputs=predictions[1],
438   - allow_input_downcast=True,
439   - mode='FAST_RUN' )
440   -
441   - self.train = theano.function( inputs = [words,children_ids, children_positions, y, lr],#, words_indexes
442   - outputs = [],#nll,
443   - updates = upd,#updates,
444   - allow_input_downcast=True,
445   - mode='FAST_RUN' )
446   -
447   -
448   - #self.normalize = theano.function( inputs = [], #uwazac na dzielenie przez 0 - ostatni wiersz sent_states jest zerowy
449   - # updates = {self.sent_states:\
450   - # self.sent_states/T.sqrt((self.sent_states**2).sum(axis=1))})#.dimshuffle(0,'x')})
451   -
452   - def save(self, folder, e, i):
453   - for param, name in zip(self.params, self.names):
454   - np.save(os.path.join(folder, name + str(e) + "_" + str(i) + '.npy'), param.get_value())
modules/rnn/tmp.pyc deleted
No preview for this file type
modules/utils/tools.py
... ... @@ -3,8 +3,12 @@ import numpy
3 3 from keras.preprocessing import sequence as seq
4 4 import theano
5 5  
  6 +from collections import Counter
  7 +
6 8 import pickle
7 9  
  10 +
  11 +
8 12 def shuffle(lol, seed):
9 13 '''
10 14 lol :: list of list as input
... ... @@ -70,7 +74,6 @@ def filter_embeddings(datasets, embedding_path, destination):
70 74  
71 75  
72 76  
73   -
74 77 def words_in_from_down_to_top_order(sentence_tree):
75 78 #print sentence_tree
76 79 levels = numpy.setdiff1d(range(len(sentence_tree)),numpy.unique(sentence_tree)) # - zwraca slowo/a, ktore nie jest niczyim dzieckiem - czyli powinno/y byc korzeniem/korzeniami frazy/fraz
... ... @@ -81,7 +84,8 @@ def words_in_from_down_to_top_order(sentence_tree):
81 84 for i in range(len(sentence_tree)):
82 85 #print i
83 86 #print levels[i]
84   - levels.extend(numpy.setdiff1d(sentence_tree[levels[i]],-1))
  87 + x = numpy.setdiff1d(sentence_tree[levels[i]],-1)
  88 + levels.extend(x[x<len(sentence_tree)])
85 89  
86 90 ordered_words = numpy.array(levels)[levels != numpy.array(-1)][::-1] #odwaracmy kolejnosc na poczatku beda slowa znajdujace sie najglebiej
87 91  
... ... @@ -94,7 +98,6 @@ def words_in_from_down_to_top_order(sentence_tree):
94 98  
95 99  
96 100  
97   -
98 101 def load_conll_data(conll_format_data, words2ids):
99 102  
100 103  
... ... @@ -633,12 +636,7 @@ def load_stanford_data3(labels, parents, tokens, words2ids, use_batch, batch_siz
633 636  
634 637 def load_stanford_data4(labels, parents, tokens, words2ids, use_batch, batch_size, nb_classes):
635 638  
636   -
637   -
638   -
639 639 def transform_labels(x, nb_classes):
640   -
641   -
642 640 if nb_classes == 3:
643 641 if x =='#' or int(x) == 0:
644 642 return 1
... ... @@ -685,19 +683,12 @@ def load_stanford_data4(labels, parents, tokens, words2ids, use_batch, batch_siz
685 683  
686 684 for labels_i,parents_i,tokens_i in zip(labels,parents,tokens):
687 685  
688   -
689   -
690 686 k = k + 1
691   -
692   -
  687 +
693 688 s = []
694 689 for i in range(len(tokens_i)):
695 690 s.append([i,int(parents_i[i]),labels_i[i],tokens_i[i]])
696 691  
697   -
698   -
699   -
700   -
701 692 if len(s) == 1 and use_batch == False: #przypadek gdy fraza sklada sie z jednego tokena
702 693  
703 694 #if nb_classes == 2:
... ... @@ -743,7 +734,6 @@ def load_stanford_data4(labels, parents, tokens, words2ids, use_batch, batch_siz
743 734 # if current_sentence[3][-1] <0:
744 735 # continue
745 736  
746   -
747 737 if use_batch == True:
748 738  
749 739 # w tej chwili len(current_sentence[0]) nie jest nigdzie wykorzystywane
... ... @@ -770,8 +760,7 @@ def load_stanford_data4(labels, parents, tokens, words2ids, use_batch, batch_siz
770 760 #wyrzucamy macierz id dzieci batch_children_ids.append(current_batch[sent][0][1][tok])
771 761 batch_labels.append(current_batch[sent][0][2][tok])
772 762 batch_words.append(current_batch[sent][0][3][tok])
773   -
774   -
  763 +
775 764 #wyrzucamy macierz id dzieci batch_children_ids = seq.pad_sequences(batch_children_ids, padding='post', value = -1)
776 765 batch_children_positions = seq.pad_sequences(batch_children_positions, padding='post', value = -1)
777 766  
... ... @@ -785,8 +774,7 @@ def load_stanford_data4(labels, parents, tokens, words2ids, use_batch, batch_siz
785 774  
786 775 current_batch, batch_tokens, batch_children_positions, batch_labels = [], [], [], []
787 776 batch_words = []
788   -
789   -
  777 +
790 778 else:
791 779  
792 780 sentences.append(current_sentence)
... ... @@ -826,15 +814,13 @@ def load_stanford_data4(labels, parents, tokens, words2ids, use_batch, batch_siz
826 814 numpy.array(batch_labels)
827 815 ,numpy.array(batch_words)
828 816 ])
829   -
830   -
831   -
832   -
  817 +
833 818 return sentences
834 819  
835 820  
836 821  
837 822  
  823 +
838 824 def load_stanford_data5(labels, parents, tokens, words2ids, use_batch, batch_size, nb_classes):
839 825  
840 826  
... ... @@ -1033,5 +1019,200 @@ def load_stanford_data5(labels, parents, tokens, words2ids, use_batch, batch_siz
1033 1019  
1034 1020  
1035 1021  
  1022 +def load_stanford_data6(labels, parents, tokens, relations, words2ids, use_batch, batch_size, nb_classes, k_most_common_relations):
  1023 +
  1024 + def transform_labels(x, nb_classes):
  1025 + if nb_classes == 3:
  1026 + if x =='#' or int(x) == 0:
  1027 + return 1
  1028 + elif int(x) < 0:
  1029 + return 0
  1030 + else:
  1031 + return 2
  1032 + elif nb_classes == 5:
  1033 + if x =='#':
  1034 + return 2
  1035 + else:
  1036 + return int(x)+2
  1037 + # elif nb_classes == 2: #jesli chcemy miec dwie klasy to neutralne wyrzucamy ze zbioru,
  1038 + # if x =='#' or int(x) == 0:
  1039 + # return -1
  1040 + # elif int(x) < 0:
  1041 + # return 0
  1042 + # else:
  1043 + # return 1
  1044 +
  1045 + sentences = []
  1046 +
  1047 + l = open(labels, "r")
  1048 + # 5 klas: labels = [[2 if y=='#' else int(y)+2 for y in x.split()] for x in l.readlines()]
  1049 +
  1050 + # Na ten moment przyjmujemy wartosc "2" w miejsce "#"
  1051 +
  1052 + labels = [[transform_labels(y,nb_classes) for y in x.split()] for x in l.readlines()]
  1053 + l.close()
  1054 +
  1055 + p = open(parents,"r")
  1056 + parents = [[int(y) for y in x.split()] for x in p.readlines()]
  1057 + p.close()
  1058 +
  1059 + t = open(tokens,"r")
  1060 + tokens = [x.split() for x in t.readlines()]
  1061 + t.close()
  1062 +
  1063 +
  1064 + rels = open(relations,"r")
  1065 + relations = [[y for y in x.split()] for x in rels.readlines()]
  1066 + rels.close()
  1067 + most_common_rels = [x[0] for x in Counter(numpy.concatenate(relations)).most_common(k_most_common_relations)]
  1068 + transform_rels = dict(zip(most_common_rels,range(len(most_common_rels))))
  1069 + relations = [[transform_rels.get(x, k_most_common_relations) for x in sent] for sent in relations]
  1070 +
  1071 +
  1072 + k = 0
  1073 + sentence_length = 0
  1074 + current_batch, batch_tokens, batch_children_ids, batch_children_positions, batch_labels, batch_relations = [], [], [], [], [], []
  1075 + batch_words = []
  1076 +
  1077 + for labels_i, parents_i, tokens_i, relations_i in zip(labels,parents,tokens,relations):
  1078 +
  1079 + k = k + 1
  1080 +
  1081 + s = []
  1082 + for i in range(len(tokens_i)):
  1083 + s.append([i,int(parents_i[i]),labels_i[i],tokens_i[i],relations_i[i]])
  1084 +
  1085 + if len(s) == 1 and use_batch == False: #przypadek gdy fraza sklada sie z jednego tokena
  1086 +
  1087 + #if nb_classes == 2:
  1088 + # if s[0][-1] < 0:
  1089 + # continue
  1090 +
  1091 + sentences.append([\
  1092 + numpy.array([words2ids.get(tokens[0], -1)]),\
  1093 + #wyrzucamy macierz id dzieci numpy.array([-1], ndmin=2),\
  1094 + numpy.array([-1], ndmin=2), \
  1095 + numpy.array(labels_i[0]), \
  1096 + numpy.array(relations_i[0])
  1097 + ])
  1098 +
  1099 + else:
  1100 +
  1101 + for i in range(len(s)): # nie wiem czy sie nie wywali dla frazy dlugosci 1
  1102 + children = []
  1103 + for j in range(len(s)):
  1104 + if s[j][1] == i+1:
  1105 + children.append(s[j][0])
  1106 + s[i].append(children)
  1107 +
  1108 + words = [x[0] for x in s]
  1109 + children = seq.pad_sequences([x[-1] for x in s], padding='post', value = -1)
  1110 + tokens = [x[3] for x in s]
  1111 + labels_in_batch = [x[2] for x in s]
  1112 + relations = [x[4] for x in s]
  1113 +
  1114 + ordered_words, order = words_in_from_down_to_top_order(children)
  1115 +
  1116 + if ordered_words is None:
  1117 + continue
  1118 +
  1119 + current_sentence = [
  1120 + numpy.array([words2ids.get(x,-1) for x in tokens])[ordered_words],
  1121 + #wyrzucamy macierz id dzieci numpy.array([[words2ids.get(tokens[w],-1) if w>=0 else -1 for w in x]
  1122 + # for x in children[ordered_words]]),
  1123 + numpy.array([[order[w] if w>= 0 else -1 for w in x] for x in children[ordered_words]]),
  1124 + numpy.array(labels_in_batch)[ordered_words],
  1125 + numpy.array(relations)[ordered_words] ,
  1126 + numpy.array(words)
  1127 + ]
  1128 + #if nb_classes == 2:
  1129 + # if current_sentence[3][-1] <0:
  1130 + # continue
  1131 +
  1132 + if use_batch == True:
  1133 +
  1134 + # w tej chwili len(current_sentence[0]) nie jest nigdzie wykorzystywane
  1135 + current_batch.append((current_sentence, len(current_sentence[0])))
  1136 +
  1137 + if len(current_batch) % batch_size == 0:
  1138 +
  1139 + shift = 0
  1140 +
  1141 + for sent in range(batch_size):
  1142 +
  1143 + ##if sent > 0:
  1144 + ## shift = shift + current_batch[sent-1][1]
  1145 +
  1146 + for tok in range(len(current_batch[sent][0][0])):
  1147 +
  1148 + if sent == 0:
  1149 + batch_children_positions.append(current_batch[sent][0][1][tok])
  1150 + else:
  1151 + batch_children_positions.append([chd+shift if chd>=0 else -1 for chd in current_batch[sent][0][1][tok]])
  1152 + #batch_children_positions.append(current_batch[sent][0][2][tok])
1036 1153  
  1154 + batch_tokens.append(current_batch[sent][0][0][tok])
  1155 + #wyrzucamy macierz id dzieci batch_children_ids.append(current_batch[sent][0][1][tok])
  1156 + batch_labels.append(current_batch[sent][0][2][tok])
  1157 + batch_relations.append(current_batch[sent][0][3][tok])
  1158 + batch_words.append(current_batch[sent][0][4][tok])
  1159 +
  1160 + #wyrzucamy macierz id dzieci batch_children_ids = seq.pad_sequences(batch_children_ids, padding='post', value = -1)
  1161 + batch_children_positions = seq.pad_sequences(batch_children_positions, padding='post', value = -1)
  1162 +
  1163 + sentences.append([
  1164 + numpy.array(batch_tokens),
  1165 + #wyrzucamy macierz id dzieci numpy.array(batch_children_ids),
  1166 + numpy.array(batch_children_positions),
  1167 + numpy.array(batch_labels),
  1168 + numpy.array(batch_relations)
  1169 + ,numpy.array(batch_words)
  1170 + ])
  1171 +
  1172 + current_batch, batch_tokens, batch_children_positions, batch_labels, batch_relations = [], [], [], [], []
  1173 + batch_words = []
  1174 +
  1175 + else:
  1176 +
  1177 + sentences.append(current_sentence)
  1178 +
  1179 +
  1180 + # gdy liczba zdan nie jest wilokrotnosci licznosci batch, to na koncu trzeba dodac pozostale zdania:
  1181 + if use_batch == True and len(current_batch) > 0:
  1182 +
  1183 + shift = 0
  1184 +
  1185 + for sent in range(len(current_batch)):
  1186 +
  1187 + #if sent > 0:
  1188 + # shift = shift + current_batch[sent-1][1]
  1189 +
  1190 + for tok in range(len(current_batch[sent][0][0])):
  1191 +
  1192 + if sent == 0:
  1193 + batch_children_positions.append(current_batch[sent][0][1][tok])
  1194 + else:
  1195 + batch_children_positions.append([chd+shift if chd>=0 else -1 for chd in current_batch[sent][0][1][tok]])
  1196 + #batch_children_positions.append(current_batch[sent][0][2][tok])
  1197 +
  1198 + batch_tokens.append(current_batch[sent][0][0][tok])
  1199 + #wyrzucamy macierz id dzieci batch_children_ids.append(current_batch[sent][0][1][tok])
  1200 + batch_labels.append(current_batch[sent][0][2][tok])
  1201 + batch_relations.append(current_batch[sent][0][3][tok])
  1202 + batch_words.append(current_batch[sent][0][4][tok])
  1203 +
  1204 +
  1205 + #wyrzucamy macierz id dzieci batch_children_ids = seq.pad_sequences(batch_children_ids, padding='post', value = -1)
  1206 + batch_children_positions = seq.pad_sequences(batch_children_positions, padding='post', value = -1)
  1207 +
  1208 + sentences.append([
  1209 + numpy.array(batch_tokens),
  1210 + #wyrzucamy macierz id dzieci numpy.array(batch_children_ids),
  1211 + numpy.array(batch_children_positions),
  1212 + numpy.array(batch_labels),
  1213 + numpy.array(batch_relations)
  1214 + ,numpy.array(batch_words)
  1215 + ])
  1216 +
  1217 + return sentences
1037 1218  
... ...
modules/utils/tools.pyc
No preview for this file type
modules/rnn/many_models.py renamed to nieaktualne/many_models.py
modules/rnn/many_models.pyc renamed to nieaktualne/many_models.pyc
No preview for this file type
modules/rnn/nnet_for_dependency_trees.py renamed to nieaktualne/nnet_for_dependency_trees.py
modules/rnn/nnet_for_dependency_trees.pyc renamed to nieaktualne/nnet_for_dependency_trees.pyc
No preview for this file type