Skip to content

Commit 11cadbf

Browse files
committed
added cleaned pre-trained weights
1 parent 261f88f commit 11cadbf

26 files changed

+137451
-29
lines changed

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,5 @@ __pycache__/
1515
/lib/
1616
/pip-selfcheck.json
1717
neuralcoref/data/*
18-
neuralcoref/weights/*
1918
neuralcoref/train/*
2019
.cache

neuralcoref/algorithm.py

Lines changed: 19 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -27,27 +27,20 @@ class Model:
2727
Coreference neural model
2828
'''
2929
def __init__(self, model_path):
30-
self.antecedent_matrix = np.load(model_path + "antecedent_matrix.npy")
31-
self.anaphor_matrix = np.load(model_path + "anaphor_matrix.npy")
32-
self.pair_features_matrix = np.load(model_path + "pair_features_matrix.npy")
33-
self.pairwise_first_layer_bias = np.load(model_path + "pairwise_first_layer_bias.npy")
34-
self.anaphoricity_model = []
35-
weights = []
36-
biases = []
30+
weights, biases = [], []
3731
for file in sorted(os.listdir(model_path)):
38-
if file.startswith("anaphoricity_model_weights"):
32+
if file.startswith("single_mention_weights"):
3933
weights.append(np.load(os.path.join(model_path, file)))
40-
if file.startswith("anaphoricity_model_bias"):
34+
if file.startswith("single_mention_bias"):
4135
biases.append(np.load(os.path.join(model_path, file)))
42-
self.anaphoricity_model = list(zip(weights, biases))
43-
weights = []
44-
biases = []
36+
self.single_mention_model = list(zip(weights, biases))
37+
weights, biases = [], []
4538
for file in sorted(os.listdir(model_path)):
46-
if file.startswith("pairwise_model_weights"):
39+
if file.startswith("pair_mentions_weights"):
4740
weights.append(np.load(os.path.join(model_path, file)))
48-
if file.startswith("pairwise_model_bias"):
41+
if file.startswith("pair_mentions_bias"):
4942
biases.append(np.load(os.path.join(model_path, file)))
50-
self.pairwise_model = list(zip(weights, biases))
43+
self.pair_mentions_model = list(zip(weights, biases))
5144

5245
def _score(self, features, layers):
5346
for weights, bias in layers:
@@ -56,18 +49,16 @@ def _score(self, features, layers):
5649
features = np.maximum(features, 0) # ReLU
5750
return np.sum(features)
5851

59-
def get_anaphoricity_score(self, mention_embedding, anaphoricity_features):
60-
''' Anaphoricity score for an anaphor '''
61-
first_layer_output = np.concatenate([mention_embedding, anaphoricity_features], axis=0)[:, np.newaxis]
62-
return self._score(first_layer_output, self.anaphoricity_model)
52+
def get_single_mention_score(self, mention_embedding, anaphoricity_features):
53+
first_layer_input = np.concatenate([mention_embedding,
54+
anaphoricity_features], axis=0)[:, np.newaxis]
55+
return self._score(first_layer_input, self.single_mention_model)
6356

64-
def get_pairwise_score(self, antecedent, mention, pair_features):
65-
antecedent_embedding = np.matmul(self.antecedent_matrix, antecedent.embedding)
66-
anaphor_embedding = np.matmul(self.anaphor_matrix, mention.embedding)
67-
first_layer_output = antecedent_embedding + anaphor_embedding \
68-
+ np.matmul(self.pair_features_matrix, pair_features) + self.pairwise_first_layer_bias
69-
first_layer_output = np.maximum(first_layer_output, 0)[:, np.newaxis] # ReLU
70-
return self._score(first_layer_output, self.pairwise_model)
57+
def get_pair_mentions_score(self, antecedent, mention, pair_features):
58+
first_layer_input = np.concatenate([antecedent.embedding,
59+
mention.embedding,
60+
pair_features], axis=0)[:, np.newaxis]
61+
return self._score(first_layer_input, self.pair_mentions_model)
7162

7263

7364
class Algorithm:
@@ -159,15 +150,15 @@ def run_coref_on_mentions(self, mentions):
159150
for mention_idx, ant_list in self.data.get_candidate_pairs(mentions, self.max_dist, self.max_dist_match):
160151
mention = self.data[mention_idx]
161152
feats_, ana_feats = self.data.get_anaphoricity_features(mention)
162-
anaphoricity_score = self.coref_model.get_anaphoricity_score(mention.embedding, ana_feats)
153+
anaphoricity_score = self.coref_model.get_single_mention_score(mention.embedding, ana_feats)
163154
self.mentions_single_scores[mention_idx] = anaphoricity_score
164155
self.mentions_single_features[mention_idx] = {"spansEmbeddings": mention.spans_embeddings_, "wordsEmbeddings": mention.words_embeddings_, "features": feats_}
165156

166157
best_score = anaphoricity_score - 50 * (self.greedyness - 0.5)
167158
for ant_idx in ant_list:
168159
antecedent = self.data[ant_idx]
169160
feats_, pwf = self.data.get_pair_features(antecedent, mention)
170-
score = self.coref_model.get_pairwise_score(antecedent, mention, pwf)
161+
score = self.coref_model.get_pair_mentions_score(antecedent, mention, pwf)
171162
self.mentions_pairs_scores[mention_idx][ant_idx] = score
172163
self.mentions_pairs_features[mention_idx][ant_idx] = {"pairFeatures": feats_, "antecedentSpansEmbeddings": antecedent.spans_embeddings_,
173164
"antecedentWordsEmbeddings": antecedent.words_embeddings_,
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
39.3 MB
Binary file not shown.

0 commit comments

Comments
 (0)