@@ -305,13 +305,13 @@ def forward(self, encoder_out, inputs=None,
305
305
w = self .key_position_rate
306
306
# TODO: may be useful to have projection per attention layer
307
307
if self .speaker_proj1 is not None :
308
- w = w * F .sigmoid (self .speaker_proj1 (speaker_embed )).view (- 1 )
308
+ w = w * torch .sigmoid (self .speaker_proj1 (speaker_embed )).view (- 1 )
309
309
text_pos_embed = self .embed_keys_positions (text_positions , w )
310
310
keys = keys + text_pos_embed
311
311
if frame_positions is not None :
312
312
w = self .query_position_rate
313
313
if self .speaker_proj2 is not None :
314
- w = w * F .sigmoid (self .speaker_proj2 (speaker_embed )).view (- 1 )
314
+ w = w * torch .sigmoid (self .speaker_proj2 (speaker_embed )).view (- 1 )
315
315
frame_pos_embed = self .embed_query_positions (frame_positions , w )
316
316
317
317
# transpose only once to speed up attention layers
@@ -357,10 +357,10 @@ def forward(self, encoder_out, inputs=None,
357
357
x = x .transpose (1 , 2 )
358
358
359
359
# project to mel-spectorgram
360
- outputs = F .sigmoid (x )
360
+ outputs = torch .sigmoid (x )
361
361
362
362
# Done flag
363
- done = F .sigmoid (self .fc (x ))
363
+ done = torch .sigmoid (self .fc (x ))
364
364
365
365
return outputs , torch .stack (alignments ), done , decoder_states
366
366
@@ -373,7 +373,7 @@ def incremental_forward(self, encoder_out, text_positions, speaker_embed=None,
373
373
w = self .key_position_rate
374
374
# TODO: may be useful to have projection per attention layer
375
375
if self .speaker_proj1 is not None :
376
- w = w * F .sigmoid (self .speaker_proj1 (speaker_embed )).view (- 1 )
376
+ w = w * torch .sigmoid (self .speaker_proj1 (speaker_embed )).view (- 1 )
377
377
text_pos_embed = self .embed_keys_positions (text_positions , w )
378
378
keys = keys + text_pos_embed
379
379
@@ -399,7 +399,7 @@ def incremental_forward(self, encoder_out, text_positions, speaker_embed=None,
399
399
frame_pos = keys .data .new (B , 1 ).fill_ (t + 1 ).long ()
400
400
w = self .query_position_rate
401
401
if self .speaker_proj2 is not None :
402
- w = w * F .sigmoid (self .speaker_proj2 (speaker_embed )).view (- 1 )
402
+ w = w * torch .sigmoid (self .speaker_proj2 (speaker_embed )).view (- 1 )
403
403
frame_pos_embed = self .embed_query_positions (frame_pos , w )
404
404
405
405
if test_inputs is not None :
@@ -457,8 +457,8 @@ def incremental_forward(self, encoder_out, text_positions, speaker_embed=None,
457
457
ave_alignment = ave_alignment .div_ (num_attention_layers )
458
458
459
459
# Ooutput & done flag predictions
460
- output = F .sigmoid (x )
461
- done = F .sigmoid (self .fc (x ))
460
+ output = torch .sigmoid (x )
461
+ done = torch .sigmoid (self .fc (x ))
462
462
463
463
decoder_states += [decoder_state ]
464
464
outputs += [output ]
@@ -601,4 +601,4 @@ def forward(self, x, speaker_embed=None):
601
601
# Back to B x T x C
602
602
x = x .transpose (1 , 2 )
603
603
604
- return F .sigmoid (x )
604
+ return torch .sigmoid (x )
0 commit comments