Commit ba27568ff1602427739fbebaf45ce90e08b504ff
1 parent
4a097c4f
Minor fixes.
Showing
5 changed files
with
24 additions
and
26 deletions
conf.py
... | ... | @@ -20,4 +20,4 @@ W2V_MODEL_PATH = os.path.join(os.path.dirname(__file__), 'models', W2V_MODEL_NAM |
20 | 20 | W2V_MODEL = Word2Vec.load(W2V_MODEL_PATH) |
21 | 21 | |
22 | 22 | NEURAL_MODEL_PATH = os.path.join(os.path.dirname(__file__), 'models', NEURAL_MODEL_NAME) |
23 | -NEURAL_MODEL = initialize_neural_model(NUMBER_OF_FEATURES) | |
23 | +NEURAL_MODEL = initialize_neural_model(NUMBER_OF_FEATURES, NEURAL_MODEL_PATH) | |
... | ... |
corneferencer/entities.py
corneferencer/resolvers/resolve.py
1 | +import numpy | |
2 | + | |
1 | 3 | from conf import NEURAL_MODEL, THRESHOLD |
2 | -from corneferencer.resolvers.vectors import create_pair_vector | |
4 | +from corneferencer.resolvers.vectors import get_pair_vector | |
3 | 5 | |
4 | 6 | |
5 | 7 | # incremental resolve algorithm |
6 | 8 | def incremental(text): |
7 | - last_set_id = 1 | |
9 | + last_set_id = 0 | |
8 | 10 | for i, ana in enumerate(text.mentions): |
9 | 11 | if i > 0: |
10 | 12 | best_prediction = 0.0 |
11 | 13 | best_ante = None |
12 | - for ante in text.mentions[:i:-1]: | |
13 | - pair_vec = create_pair_vector(ante, ana) | |
14 | - prediction = NEURAL_MODEL.predict(pair_vec) | |
15 | - accuracy = prediction[0] | |
16 | - if accuracy > THRESHOLD and accuracy > best_prediction: | |
17 | - best_prediction = accuracy | |
14 | + for ante in text.mentions[:i]: | |
15 | + pair_vec = get_pair_vector(ante, ana) | |
16 | + sample = numpy.asarray([pair_vec], dtype=numpy.float32) | |
17 | + prediction = NEURAL_MODEL.predict(sample)[0] | |
18 | + if prediction > THRESHOLD and prediction >= best_prediction: | |
19 | + best_prediction = prediction | |
18 | 20 | best_ante = ante |
19 | 21 | if best_ante is not None: |
20 | 22 | if best_ante.set: |
... | ... | @@ -29,7 +31,7 @@ def incremental(text): |
29 | 31 | # entity based resolve algorithm |
30 | 32 | def entity_based(text): |
31 | 33 | sets = [] |
32 | - last_set_id = 1 | |
34 | + last_set_id = 0 | |
33 | 35 | for i, ana in enumerate(text.mentions): |
34 | 36 | if i > 0: |
35 | 37 | best_fit = get_best_set(sets, ana) |
... | ... | @@ -64,13 +66,13 @@ def get_best_set(sets, ana): |
64 | 66 | |
65 | 67 | |
66 | 68 | def predict_set(mentions, ana): |
67 | - accuracy_sum = 0.0 | |
69 | + prediction_sum = 0.0 | |
68 | 70 | for mnt in mentions: |
69 | - pair_vec = create_pair_vector(mnt, ana) | |
70 | - prediction = NEURAL_MODEL.predict(pair_vec) | |
71 | - accuracy = prediction[0] | |
72 | - accuracy_sum += accuracy | |
73 | - return accuracy_sum / float(len(mentions)) | |
71 | + pair_vec = get_pair_vector(mnt, ana) | |
72 | + sample = numpy.asarray([pair_vec], dtype=numpy.float32) | |
73 | + prediction = NEURAL_MODEL.predict(sample)[0] | |
74 | + prediction_sum += prediction | |
75 | + return prediction_sum / float(len(mentions)) | |
74 | 76 | |
75 | 77 | |
76 | 78 | def remove_singletons(sets): |
... | ... |
corneferencer/resolvers/vectors.py
1 | -import numpy | |
2 | - | |
3 | 1 | from corneferencer.resolvers import features |
4 | 2 | |
5 | -# input_1 to have shape (None, 1126) but got array with shape (1126, 1) | |
6 | -def create_pair_vector(ante, ana): | |
3 | + | |
4 | +def get_pair_vector(ante, ana): | |
7 | 5 | vec = [] |
8 | - # ante_features = get_mention_features(ante) | |
9 | - # vec.extend(ante_features) | |
10 | - # ana_features = get_mention_features(ana) | |
11 | - # vec.extend(ana_features) | |
12 | 6 | vec.extend(ante.features) |
13 | 7 | vec.extend(ana.features) |
14 | 8 | pair_features = get_pair_features(ante, ana) |
15 | 9 | vec.extend(pair_features) |
16 | - return numpy.asarray([vec], dtype=numpy.float32) | |
10 | + return vec | |
17 | 11 | |
18 | 12 | |
19 | 13 | def get_mention_features(mention): |
... | ... |
corneferencer/utils.py
... | ... | @@ -10,7 +10,7 @@ def eprint(*args, **kwargs): |
10 | 10 | print(*args, file=sys.stderr, **kwargs) |
11 | 11 | |
12 | 12 | |
13 | -def initialize_neural_model(number_of_features): | |
13 | +def initialize_neural_model(number_of_features, path_to_model): | |
14 | 14 | inputs = Input(shape=(number_of_features,)) |
15 | 15 | output_from_1st_layer = Dense(1000, activation='relu')(inputs) |
16 | 16 | output_from_1st_layer = Dropout(0.5)(output_from_1st_layer) |
... | ... | @@ -22,4 +22,5 @@ def initialize_neural_model(number_of_features): |
22 | 22 | |
23 | 23 | model = Model(inputs, output) |
24 | 24 | model.compile(optimizer='Adam', loss='binary_crossentropy', metrics=['accuracy']) |
25 | + model.load_weights(path_to_model) | |
25 | 26 | return model |
... | ... |