Skip to content

Commit adc447d

Browse files
committed
Merge branch 'release-1.0.1'
2 parents fb3f303 + 4f0e2ae commit adc447d

25 files changed

+29
-10
lines changed

gensim/models/keyedvectors.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -455,14 +455,21 @@ def most_similar_cosmul(self, positive=[], negative=[], topn=10):
455455
# allow calls like most_similar_cosmul('dog'), as a shorthand for most_similar_cosmul(['dog'])
456456
positive = [positive]
457457

458+
all_words = set([self.vocab[word].index for word in positive+negative
459+
if not isinstance(word, ndarray) and word in self.vocab])
460+
461+
positive = [
462+
self.word_vec(word, use_norm=True) if isinstance(word, string_types) else word
463+
for word in positive
464+
]
465+
negative = [
466+
self.word_vec(word, use_norm=True) if isinstance(word, string_types) else word
467+
for word in negative
468+
]
458469

459-
positive = [self.word_vec(word, use_norm=True) for word in positive]
460-
negative = [self.word_vec(word, use_norm=True) for word in negative]
461470
if not positive:
462471
raise ValueError("cannot compute similarity with no input")
463472

464-
all_words = set([self.vocab[word].index for word in positive+negative if word in self.vocab])
465-
466473
# equation (4) of Levy & Goldberg "Linguistic Regularities...",
467474
# with distances shifted to [0,1] per footnote (7)
468475
pos_dists = [((1 + dot(self.syn0norm, term)) / 2) for term in positive]

gensim/models/word2vec.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1291,14 +1291,15 @@ def load(cls, *args, **kwargs):
12911291
return model
12921292

12931293
def _load_specials(self, *args, **kwargs):
1294+
super(Word2Vec, self)._load_specials(*args, **kwargs)
12941295
# loading from a pre-KeyedVectors word2vec model
12951296
if not hasattr(self, 'wv'):
12961297
wv = KeyedVectors()
12971298
wv.syn0 = self.__dict__.get('syn0', [])
1299+
wv.syn0norm = self.__dict__.get('syn0norm', None)
12981300
wv.vocab = self.__dict__.get('vocab', {})
12991301
wv.index2word = self.__dict__.get('index2word', [])
13001302
self.wv = wv
1301-
super(Word2Vec, self)._load_specials(*args, **kwargs)
13021303

13031304
@classmethod
13041305
def load_word2vec_format(cls, fname, fvocab=None, binary=False, encoding='utf8', unicode_errors='strict',
-139 KB
Binary file not shown.
-145 KB
Binary file not shown.
-13.4 KB
Binary file not shown.
-105 KB
Binary file not shown.
128 Bytes
Binary file not shown.
152 Bytes
Binary file not shown.
-6.8 KB
Binary file not shown.
-13.6 KB
Binary file not shown.

0 commit comments

Comments
 (0)