Skip to content

Commit 33ef202

Browse files
committed
loss: always tally; split to epoch_loss/minibatch_loss; use wider float
1 parent 817cac9 commit 33ef202

File tree

5 files changed

+81
-117
lines changed

5 files changed

+81
-117
lines changed

gensim/models/word2vec.py

Lines changed: 11 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -170,19 +170,19 @@
170170
CORPUSFILE_VERSION = -1
171171

172172
def train_epoch_sg(model, corpus_file, offset, _cython_vocab, _cur_epoch, _expected_examples, _expected_words,
173-
_work, _neu1, compute_loss):
173+
_work, _neu1):
174174
raise RuntimeError("Training with corpus_file argument is not supported")
175175

176176
def train_epoch_cbow(model, corpus_file, offset, _cython_vocab, _cur_epoch, _expected_examples, _expected_words,
177-
_work, _neu1, compute_loss):
177+
_work, _neu1):
178178
raise RuntimeError("Training with corpus_file argument is not supported")
179179

180180

181181
class Word2Vec(utils.SaveLoad):
182182
def __init__(self, sentences=None, corpus_file=None, vector_size=100, alpha=0.025, window=5, min_count=5,
183183
max_vocab_size=None, sample=1e-3, seed=1, workers=3, min_alpha=0.0001,
184184
sg=0, hs=0, negative=5, ns_exponent=0.75, cbow_mean=1, hashfxn=hash, epochs=5, null_word=0,
185-
trim_rule=None, sorted_vocab=1, batch_words=MAX_WORDS_IN_BATCH, compute_loss=False, callbacks=(),
185+
trim_rule=None, sorted_vocab=1, batch_words=MAX_WORDS_IN_BATCH, callbacks=(),
186186
comment=None, max_final_vocab=None):
187187
"""Train, use and evaluate neural networks described in https://code.google.com/p/word2vec/.
188188
@@ -282,9 +282,6 @@ def __init__(self, sentences=None, corpus_file=None, vector_size=100, alpha=0.02
282282
Target size (in words) for batches of examples passed to worker threads (and
283283
thus cython routines).(Larger batches will be passed if individual
284284
texts are longer than 10000 words, but the standard cython code truncates to that maximum.)
285-
compute_loss: bool, optional
286-
If True, computes and stores loss value which can be retrieved using
287-
:meth:`~gensim.models.word2vec.Word2Vec.get_latest_training_loss`.
288285
callbacks : iterable of :class:`~gensim.models.callbacks.CallbackAny2Vec`, optional
289286
Sequence of callbacks to be executed at specific stages during training.
290287
@@ -325,8 +322,7 @@ def __init__(self, sentences=None, corpus_file=None, vector_size=100, alpha=0.02
325322
self.negative = int(negative)
326323
self.ns_exponent = ns_exponent
327324
self.cbow_mean = int(cbow_mean)
328-
self.compute_loss = bool(compute_loss)
329-
self.running_training_loss = 0
325+
self.epoch_loss = 0.0
330326
self.min_alpha_yet_reached = float(alpha)
331327
self.corpus_count = 0
332328
self.corpus_total_words = 0
@@ -380,7 +376,7 @@ def build_vocab_and_train(self, corpus_iterable=None, corpus_file=None, trim_rul
380376
self.train(
381377
corpus_iterable=corpus_iterable, corpus_file=corpus_file, total_examples=self.corpus_count,
382378
total_words=self.corpus_total_words, epochs=self.epochs, start_alpha=self.alpha,
383-
end_alpha=self.min_alpha, compute_loss=self.compute_loss, callbacks=callbacks)
379+
end_alpha=self.min_alpha, callbacks=callbacks)
384380

385381
def build_vocab(self, corpus_iterable=None, corpus_file=None, update=False, progress_per=10000,
386382
keep_raw_vocab=False, trim_rule=None, **kwargs):
@@ -838,10 +834,10 @@ def _do_train_epoch(self, corpus_file, thread_id, offset, cython_vocab, thread_p
838834

839835
if self.sg:
840836
examples, tally, raw_tally = train_epoch_sg(self, corpus_file, offset, cython_vocab, cur_epoch,
841-
total_examples, total_words, work, neu1, self.compute_loss)
837+
total_examples, total_words, work, neu1)
842838
else:
843839
examples, tally, raw_tally = train_epoch_cbow(self, corpus_file, offset, cython_vocab, cur_epoch,
844-
total_examples, total_words, work, neu1, self.compute_loss)
840+
total_examples, total_words, work, neu1)
845841

846842
return examples, tally, raw_tally
847843

@@ -866,9 +862,9 @@ def _do_train_job(self, sentences, alpha, inits):
866862
work, neu1 = inits
867863
tally = 0
868864
if self.sg:
869-
tally += train_batch_sg(self, sentences, alpha, work, self.compute_loss)
865+
tally += train_batch_sg(self, sentences, alpha, work)
870866
else:
871-
tally += train_batch_cbow(self, sentences, alpha, work, neu1, self.compute_loss)
867+
tally += train_batch_cbow(self, sentences, alpha, work, neu1)
872868
return tally, self._raw_word_count(sentences)
873869

874870
def _clear_post_train(self):
@@ -877,7 +873,7 @@ def _clear_post_train(self):
877873

878874
def train(self, corpus_iterable=None, corpus_file=None, total_examples=None, total_words=None,
879875
epochs=None, start_alpha=None, end_alpha=None, word_count=0,
880-
queue_factor=2, report_delay=1.0, compute_loss=False, callbacks=(),
876+
queue_factor=2, report_delay=1.0, callbacks=(),
881877
**kwargs):
882878
"""Update the model's neural weights from a sequence of sentences.
883879
@@ -931,9 +927,6 @@ def train(self, corpus_iterable=None, corpus_file=None, total_examples=None, tot
931927
Multiplier for size of queue (number of workers * queue_factor).
932928
report_delay : float, optional
933929
Seconds to wait before reporting progress.
934-
compute_loss: bool, optional
935-
If True, computes and stores loss value which can be retrieved using
936-
:meth:`~gensim.models.word2vec.Word2Vec.get_latest_training_loss`.
937930
callbacks : iterable of :class:`~gensim.models.callbacks.CallbackAny2Vec`, optional
938931
Sequence of callbacks to be executed at specific stages during training.
939932
@@ -959,8 +952,7 @@ def train(self, corpus_iterable=None, corpus_file=None, total_examples=None, tot
959952
total_examples=total_examples,
960953
total_words=total_words)
961954

962-
self.compute_loss = compute_loss
963-
self.running_training_loss = 0.0
955+
self.epoch_loss = 0.0
964956

965957
for callback in callbacks:
966958
callback.on_train_begin(self)
@@ -1820,17 +1812,6 @@ def save(self, *args, **kwargs):
18201812
kwargs['ignore'] = kwargs.get('ignore', []) + ['cum_table', ]
18211813
super(Word2Vec, self).save(*args, **kwargs)
18221814

1823-
def get_latest_training_loss(self):
1824-
"""Get current value of the training loss.
1825-
1826-
Returns
1827-
-------
1828-
float
1829-
Current training loss.
1830-
1831-
"""
1832-
return self.running_training_loss
1833-
18341815
@classmethod
18351816
def load(cls, *args, rethrow=False, **kwargs):
18361817
"""Load a previously saved :class:`~gensim.models.word2vec.Word2Vec` model.

gensim/models/word2vec_corpusfile.pyx

Lines changed: 13 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ cdef REAL_t get_next_alpha(
251251

252252

253253
def train_epoch_sg(model, corpus_file, offset, _cython_vocab, _cur_epoch, _expected_examples, _expected_words, _work,
254-
_neu1, compute_loss):
254+
_neu1):
255255
"""Train Skipgram model for one epoch by training on an input stream. This function is used only in multistream mode.
256256
257257
Called internally from :meth:`~gensim.models.word2vec.Word2Vec.train`.
@@ -268,8 +268,6 @@ def train_epoch_sg(model, corpus_file, offset, _cython_vocab, _cur_epoch, _expec
268268
Private working memory for each worker.
269269
_neu1 : np.ndarray
270270
Private working memory for each worker.
271-
compute_loss : bool
272-
Whether or not the training loss should be computed in this batch.
273271
274272
Returns
275273
-------
@@ -297,7 +295,7 @@ def train_epoch_sg(model, corpus_file, offset, _cython_vocab, _cur_epoch, _expec
297295
cdef long long total_effective_words = 0, total_words = 0
298296
cdef int sent_idx, idx_start, idx_end
299297

300-
init_w2v_config(&c, model, _alpha, compute_loss, _work)
298+
init_w2v_config(&c, model, _alpha, _work)
301299

302300
cdef vector[vector[string]] sentences
303301

@@ -330,14 +328,14 @@ def train_epoch_sg(model, corpus_file, offset, _cython_vocab, _cur_epoch, _expec
330328
if c.hs:
331329
w2v_fast_sentence_sg_hs(
332330
c.points[i], c.codes[i], c.codelens[i], c.syn0, c.syn1, c.size, c.indexes[j],
333-
c.alpha, c.work, c.words_lockf, c.words_lockf_len, c.compute_loss,
334-
&c.running_training_loss)
331+
c.alpha, c.work, c.words_lockf, c.words_lockf_len,
332+
&c.minibatch_loss)
335333
if c.negative:
336334
c.next_random = w2v_fast_sentence_sg_neg(
337335
c.negative, c.cum_table, c.cum_table_len, c.syn0, c.syn1neg, c.size,
338336
c.indexes[i], c.indexes[j], c.alpha, c.work, c.next_random,
339337
c.words_lockf, c.words_lockf_len,
340-
c.compute_loss, &c.running_training_loss)
338+
&c.minibatch_loss)
341339

342340
total_sentences += sentences.size()
343341
total_effective_words += effective_words
@@ -346,12 +344,12 @@ def train_epoch_sg(model, corpus_file, offset, _cython_vocab, _cur_epoch, _expec
346344
start_alpha, end_alpha, total_sentences, total_words,
347345
expected_examples, expected_words, cur_epoch, num_epochs)
348346

349-
model.running_training_loss = c.running_training_loss
347+
model.epoch_loss += c.minibatch_loss
350348
return total_sentences, total_effective_words, total_words
351349

352350

353351
def train_epoch_cbow(model, corpus_file, offset, _cython_vocab, _cur_epoch, _expected_examples, _expected_words, _work,
354-
_neu1, compute_loss):
352+
_neu1):
355353
"""Train CBOW model for one epoch by training on an input stream. This function is used only in multistream mode.
356354
357355
Called internally from :meth:`~gensim.models.word2vec.Word2Vec.train`.
@@ -368,8 +366,6 @@ def train_epoch_cbow(model, corpus_file, offset, _cython_vocab, _cur_epoch, _exp
368366
Private working memory for each worker.
369367
_neu1 : np.ndarray
370368
Private working memory for each worker.
371-
compute_loss : bool
372-
Whether or not the training loss should be computed in this batch.
373369
374370
Returns
375371
-------
@@ -397,7 +393,7 @@ def train_epoch_cbow(model, corpus_file, offset, _cython_vocab, _cur_epoch, _exp
397393
cdef long long total_effective_words = 0, total_words = 0
398394
cdef int sent_idx, idx_start, idx_end
399395

400-
init_w2v_config(&c, model, _alpha, compute_loss, _work, _neu1)
396+
init_w2v_config(&c, model, _alpha, _work, _neu1)
401397

402398
cdef vector[vector[string]] sentences
403399

@@ -427,15 +423,15 @@ def train_epoch_cbow(model, corpus_file, offset, _cython_vocab, _cur_epoch, _exp
427423
if c.hs:
428424
w2v_fast_sentence_cbow_hs(
429425
c.points[i], c.codes[i], c.codelens, c.neu1, c.syn0, c.syn1, c.size, c.indexes, c.alpha,
430-
c.work, i, j, k, c.cbow_mean, c.words_lockf, c.words_lockf_len, c.compute_loss,
431-
&c.running_training_loss)
426+
c.work, i, j, k, c.cbow_mean, c.words_lockf, c.words_lockf_len,
427+
&c.minibatch_loss)
432428

433429
if c.negative:
434430
c.next_random = w2v_fast_sentence_cbow_neg(
435431
c.negative, c.cum_table, c.cum_table_len, c.codelens, c.neu1, c.syn0,
436432
c.syn1neg, c.size, c.indexes, c.alpha, c.work, i, j, k, c.cbow_mean,
437-
c.next_random, c.words_lockf, c.words_lockf_len, c.compute_loss,
438-
&c.running_training_loss)
433+
c.next_random, c.words_lockf, c.words_lockf_len,
434+
&c.minibatch_loss)
439435

440436
total_sentences += sentences.size()
441437
total_effective_words += effective_words
@@ -444,7 +440,7 @@ def train_epoch_cbow(model, corpus_file, offset, _cython_vocab, _cur_epoch, _exp
444440
start_alpha, end_alpha, total_sentences, total_words,
445441
expected_examples, expected_words, cur_epoch, num_epochs)
446442

447-
model.running_training_loss = c.running_training_loss
443+
model.epoch_loss += c.minibatch_loss
448444
return total_sentences, total_effective_words, total_words
449445

450446

gensim/models/word2vec_inner.pxd

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,9 @@ cdef our_saxpy_ptr our_saxpy
4949

5050

5151
cdef struct Word2VecConfig:
52-
int hs, negative, sample, compute_loss, size, window, cbow_mean, workers
53-
REAL_t running_training_loss, alpha
52+
int hs, negative, sample, size, window, cbow_mean, workers
53+
REAL_t alpha
54+
np.float64_t minibatch_loss
5455

5556
REAL_t *syn0
5657
REAL_t *words_lockf
@@ -96,31 +97,31 @@ cdef void w2v_fast_sentence_sg_hs(
9697
const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen,
9798
REAL_t *syn0, REAL_t *syn1, const int size,
9899
const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work, REAL_t *words_lockf,
99-
const np.uint32_t lockf_len, const int _compute_loss, REAL_t *_running_training_loss_param) nogil
100+
const np.uint32_t lockf_len, np.float64_t *minibatch_loss_ptr) nogil
100101

101102

102103
cdef unsigned long long w2v_fast_sentence_sg_neg(
103104
const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len,
104105
REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index,
105106
const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work,
106107
unsigned long long next_random, REAL_t *words_lockf,
107-
const np.uint32_t lockf_len, const int _compute_loss, REAL_t *_running_training_loss_param) nogil
108+
const np.uint32_t lockf_len, np.float64_t *minibatch_loss_ptr) nogil
108109

109110

110111
cdef void w2v_fast_sentence_cbow_hs(
111112
const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN],
112113
REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size,
113114
const np.uint32_t indexes[MAX_SENTENCE_LEN], const REAL_t alpha, REAL_t *work,
114115
int i, int j, int k, int cbow_mean, REAL_t *words_lockf,
115-
const np.uint32_t lockf_len, const int _compute_loss, REAL_t *_running_training_loss_param) nogil
116+
const np.uint32_t lockf_len, np.float64_t *minibatch_loss_ptr) nogil
116117

117118

118119
cdef unsigned long long w2v_fast_sentence_cbow_neg(
119120
const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, int codelens[MAX_SENTENCE_LEN],
120121
REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int size,
121122
const np.uint32_t indexes[MAX_SENTENCE_LEN], const REAL_t alpha, REAL_t *work,
122123
int i, int j, int k, int cbow_mean, unsigned long long next_random, REAL_t *words_lockf,
123-
const np.uint32_t lockf_len, const int _compute_loss, REAL_t *_running_training_loss_param) nogil
124+
const np.uint32_t lockf_len, np.float64_t *minibatch_loss_ptr) nogil
124125

125126

126-
cdef init_w2v_config(Word2VecConfig *c, model, alpha, compute_loss, _work, _neu1=*)
127+
cdef init_w2v_config(Word2VecConfig *c, model, alpha, _work, _neu1=*)

0 commit comments

Comments
 (0)