Commit db32de75dc92a99c150c294da15ac46f9b0f850b
1 parent
0d8e647d
Slightly different neural network model.
Showing
3 changed files
with
12 additions
and
10 deletions
conf.py
... | ... | @@ -6,7 +6,6 @@ from gensim.models.word2vec import Word2Vec |
6 | 6 | |
7 | 7 | |
8 | 8 | CONTEXT = 5 |
9 | -# THRESHOLD = 0.001 | |
10 | 9 | RANDOM_WORD_VECTORS = True |
11 | 10 | W2V_SIZE = 50 |
12 | 11 | W2V_MODEL_NAME = 'w2v_allwiki_nkjpfull_50.model' |
... | ... | @@ -14,7 +13,7 @@ W2V_MODEL_NAME = 'w2v_allwiki_nkjpfull_50.model' |
14 | 13 | # simple or siamese |
15 | 14 | NEURAL_MODEL_ARCHITECTURE = 'simple' |
16 | 15 | NUMBER_OF_FEATURES = 1190 |
17 | -NEURAL_MODEL_NAME = 'model_1190_features.h5' | |
16 | +NEURAL_MODEL_NAME = 'model_with_singletons.h5' | |
18 | 17 | |
19 | 18 | FREQ_LIST_NAME = 'base.lst' |
20 | 19 | LEMMA2SYNONYMS_NAME = 'lemma2synonyms.map' |
... | ... |
corneferencer/resolvers/features.py
... | ... | @@ -174,14 +174,14 @@ def distances_vec(ante, ana): |
174 | 174 | words_dist = [0] * 11 |
175 | 175 | words_bucket = 0 |
176 | 176 | if mnts_intersect != 1: |
177 | - words_bucket = get_distance_bucket(ana.start_in_words - ante.end_in_words - 1) | |
177 | + words_bucket = get_distance_bucket(ana.start_in_words - ante.end_in_words) | |
178 | 178 | words_dist[words_bucket] = 1 |
179 | 179 | vec.extend(words_dist) |
180 | 180 | |
181 | 181 | mentions_dist = [0] * 11 |
182 | 182 | mentions_bucket = 0 |
183 | 183 | if mnts_intersect != 1: |
184 | - mentions_bucket = get_distance_bucket(ana.position_in_mentions - ante.position_in_mentions - 1) | |
184 | + mentions_bucket = get_distance_bucket(ana.position_in_mentions - ante.position_in_mentions) | |
185 | 185 | if words_bucket == 10: |
186 | 186 | mentions_bucket = 10 |
187 | 187 | mentions_dist[mentions_bucket] = 1 |
... | ... |
corneferencer/utils.py
... | ... | @@ -27,17 +27,20 @@ def initialize_neural_model(architecture, number_of_features, path_to_model): |
27 | 27 | def initialize_simple_model(number_of_features, path_to_model): |
28 | 28 | inputs = Input(shape=(number_of_features,)) |
29 | 29 | |
30 | - output_from_1st_layer = Dense(1000, activation='relu')(inputs) | |
31 | - output_from_1st_layer = Dropout(0.2)(output_from_1st_layer) | |
30 | + output_from_1st_layer = Dense(500)(inputs) | |
32 | 31 | output_from_1st_layer = BatchNormalization()(output_from_1st_layer) |
32 | + output_from_1st_layer = Activation('relu')(output_from_1st_layer) | |
33 | + output_from_1st_layer = Dropout(0.2)(output_from_1st_layer) | |
33 | 34 | |
34 | - output_from_2nd_layer = Dense(500, activation='relu')(output_from_1st_layer) | |
35 | - output_from_2nd_layer = Dropout(0.2)(output_from_2nd_layer) | |
35 | + output_from_2nd_layer = Dense(200)(output_from_1st_layer) | |
36 | 36 | output_from_2nd_layer = BatchNormalization()(output_from_2nd_layer) |
37 | + output_from_2nd_layer = Activation('relu')(output_from_2nd_layer) | |
38 | + output_from_2nd_layer = Dropout(0.2)(output_from_2nd_layer) | |
37 | 39 | |
38 | - output_from_3rd_layer = Dense(300, activation='relu')(output_from_2nd_layer) | |
39 | - output_from_3rd_layer = Dropout(0.2)(output_from_3rd_layer) | |
40 | + output_from_3rd_layer = Dense(100)(output_from_2nd_layer) | |
40 | 41 | output_from_3rd_layer = BatchNormalization()(output_from_3rd_layer) |
42 | + output_from_3rd_layer = Activation('relu')(output_from_3rd_layer) | |
43 | + output_from_3rd_layer = Dropout(0.2)(output_from_3rd_layer) | |
41 | 44 | |
42 | 45 | output = Dense(1, activation='sigmoid')(output_from_3rd_layer) |
43 | 46 | |
... | ... |