Skip to content

Commit 2834647

Browse files
committed
upload files
1 parent ab3edd8 commit 2834647

File tree

3 files changed

+8
-27
lines changed

3 files changed

+8
-27
lines changed

VC_inference.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@
33
import torch
44
from torch import no_grad, LongTensor
55
import argparse
6-
from models_infer import spectrogram_torch
6+
from mel_processing import spectrogram_torch
77
import utils
88
from models_infer import SynthesizerTrn
99
import gradio as gr
10-
import torchaudio
10+
import librosa
1111
import webbrowser
1212
device = "cuda:0" if torch.cuda.is_available() else "cpu"
1313

@@ -20,15 +20,16 @@ def vc_fn(original_speaker, target_speaker, record_audio, upload_audio):
2020
original_speaker_id = speaker_ids[original_speaker]
2121
target_speaker_id = speaker_ids[target_speaker]
2222

23-
audio = torch.tensor(audio).type(torch.float32)
24-
audio = audio.squeeze().unsqueeze(0)
25-
audio = audio / max(-audio.min(), audio.max()) / 0.99
23+
audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
24+
if len(audio.shape) > 1:
25+
audio = librosa.to_mono(audio.transpose(1, 0))
2626
if sampling_rate != hps.data.sampling_rate:
27-
audio = torchaudio.transforms.Resample(orig_freq=sampling_rate, new_freq=22050)(audio)
27+
audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=hps.data.sampling_rate)
2828
with no_grad():
2929
y = torch.FloatTensor(audio)
3030
y = y / max(-y.min(), y.max()) / 0.99
3131
y = y.to(device)
32+
y = y.unsqueeze(0)
3233
spec = spectrogram_torch(y, hps.data.filter_length,
3334
hps.data.sampling_rate, hps.data.hop_length, hps.data.win_length,
3435
center=False).to(device)

models_infer.py

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -400,24 +400,3 @@ def voice_conversion(self, y, y_lengths, sid_src, sid_tgt):
400400
o_hat = self.dec(z_hat * y_mask, g=g_tgt)
401401
return o_hat, y_mask, (z, z_p, z_hat)
402402

403-
def spectrogram_torch(y, n_fft, sampling_rate, hop_size, win_size, center=False):
404-
if torch.min(y) < -1.:
405-
print('min value is ', torch.min(y))
406-
if torch.max(y) > 1.:
407-
print('max value is ', torch.max(y))
408-
409-
global hann_window
410-
dtype_device = str(y.dtype) + '_' + str(y.device)
411-
wnsize_dtype_device = str(win_size) + '_' + dtype_device
412-
if wnsize_dtype_device not in hann_window:
413-
hann_window[wnsize_dtype_device] = torch.hann_window(win_size).to(dtype=y.dtype, device=y.device)
414-
415-
y = torch.nn.functional.pad(y.unsqueeze(1), (int((n_fft - hop_size) / 2), int((n_fft - hop_size) / 2)),
416-
mode='reflect')
417-
y = y.squeeze(1)
418-
419-
spec = torch.stft(y, n_fft, hop_length=hop_size, win_length=win_size, window=hann_window[wnsize_dtype_device],
420-
center=center, pad_mode='reflect', normalized=False, onesided=True)
421-
422-
spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6)
423-
return spec

requirements_infer.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
Cython
2+
librosa
23
numpy
34
scipy
45
torch

0 commit comments

Comments
 (0)