-
Notifications
You must be signed in to change notification settings - Fork 134
Open
Description
Hi,
is there a reason why the synthesis window is not applied?
See also the attached sketch based on the lowLatencySpeechEnhancement.ipynb
example.
fix = False
- your version
- clearly visible modulation in the OLA output
- output gain != 1
fix = True
- adequate hop size
- unity gain
- synthesis window after irfft
from matplotlib.pyplot import *
from numpy import *
from numpy.fft import rfft, irfft
# Apply the fix
fix = False
# Preprocessing params
fftSize = 1024
# Asymmetric windowing params
analysisWindowSize = fftSize
synthesisWindowSize = 128
asymmetricHopSize = synthesisWindowSize // 4 if fix else (synthesisWindowSize * 3) // 4
m = synthesisWindowSize // 2
k = analysisWindowSize
d = 0
# Symmetric windowing params
symmetricWindowSize = fftSize
symmetricHopSize = asymmetricHopSize # to better compare results
# Generate test signal
stereoSamples = ones((1, fftSize*10))
numChannels, numSamples = stereoSamples.shape
def getAsymmetricAnalysisWindow(k, m, d):
risingSqrtHann = sqrt( hanning(2*(k-m-d)+1)[:2*(k-m-d)] )
fallingSqrtHann = sqrt( hanning(2*m+1)[:2*m] )
window = zeros(k)
window[:d] = 0
window[d:k-m] = risingSqrtHann[:k-m-d]
window[k-m:] = fallingSqrtHann[-m:]
return window
def getAsymmetricSynthesisWindow(k, m, d):
risingSqrtHannAnalysis = sqrt( hanning(2*(k-m-d)+1)[:2*(k-m-d)] )
risingNoramlizedHann = hanning(2*m+1)[:m] / risingSqrtHannAnalysis[k-2*m-d:k-m-d]
fallingSqrtHann = sqrt( hanning(2*m+1)[:2*m] )
window = zeros(k)
window[:-2*m] = 0
window[-2*m:-m] = risingNoramlizedHann
window[-m:] = fallingSqrtHann[-m:]
return window
def performOnlineSpeechEnhancement(analysisWindow, synthesisWindow, hopSize):
# Setup variables to save speech enhancement results
numFrequencies = len(rfft(zeros(len(analysisWindow))))
numFrames = (numSamples-len(synthesisWindow)) // hopSize
if fix:
gainFactor = hopSize / sum(analysisWindow * synthesisWindow)
else:
gainFactor = hopSize / float(len(synthesisWindow)) * 2
targetEstimateSamplesOLA = zeros_like(stereoSamples)
inputSpectrogram = zeros( (2, numFrequencies, numFrames), 'complex64')
outputSpectrogram = zeros( (2, numFrequencies, numFrames), 'complex64')
for frameIndex in range(numFrames):
# compute FFT
frameStart = frameIndex * hopSize
frameEnd = frameStart + analysisWindowSize
stereoSTFTFrame = rfft( stereoSamples[:, frameStart:frameEnd] * analysisWindow )
inputSpectrogram[..., frameIndex] = stereoSTFTFrame
outputSpectrogram[..., frameIndex] = stereoSTFTFrame
# reconstruct time domain samples
recStereoSTFTFrame = irfft(stereoSTFTFrame)
if fix:
# apply synthesis window as well
recStereoSTFTFrame *= synthesisWindow
# overlap-add to output samples
targetEstimateSamplesOLA[:, frameStart:frameEnd] += recStereoSTFTFrame
targetEstimateSamplesOLA *= gainFactor
return inputSpectrogram, outputSpectrogram, targetEstimateSamplesOLA
analysisWindow = getAsymmetricAnalysisWindow(k, m, d)
synthesisWindow = getAsymmetricSynthesisWindow(k, m, d)
symmetricWindow = sqrt(hanning(symmetricWindowSize))
symmetricResults = performOnlineSpeechEnhancement(symmetricWindow, symmetricWindow, symmetricHopSize)
asymmetricResults = performOnlineSpeechEnhancement(analysisWindow, synthesisWindow, asymmetricHopSize)
title('fixed' if fix else 'orig')
plot(symmetricResults[-1][-1], label='symmetric', color='b', alpha=0.5)
plot(asymmetricResults[-1][-1], label='asymmetric', color='r', alpha=0.5)
legend()
show()
Metadata
Metadata
Assignees
Labels
No labels