-
Notifications
You must be signed in to change notification settings - Fork 248
Open
Description
audio.txt
Using this to convert audio.txt
to audio.bin
, as I can't upload a binary file.
import base64
with open('audio.txt', 'r') as f:
encoded_data = f.read()
binary_data = base64.b64decode(encoded_data)
with open('audio.bin', 'wb') as f:
f.write(binary_data)
After converting a recording to cut, cut.load_audio()
failed with lhotse==1.30.3
.
from lhotse import Recording
from lhotse.audio.backend import audio_backend, TorchaudioDefaultBackend
with open("audio.bin", "rb") as f:
audio_bytes = f.read()
def success():
with audio_backend(TorchaudioDefaultBackend):
recording = Recording.from_bytes(audio_bytes, "1")
cut = recording.to_cut()
cut.load_audio()
def fail():
recording = Recording.from_bytes(audio_bytes, "1")
cut = recording.to_cut()
cut.load_audio()
if __name__ == "__main__":
success()
fail()
Error msg:
Traceback (most recent call last):
File "/conda_envs/torch/lib/python3.10/site-packages/lhotse/utils.py", line 848, in wrapper
return fn(*args, **kwargs)
File "/conda_envs/torch/lib/python3.10/site-packages/lhotse/audio/recording.py", line 485, in load_audio
audio = assert_and_maybe_fix_num_samples(
File "/conda_envs/torch/lib/python3.10/site-packages/lhotse/audio/recording.py", line 969, in assert_and_maybe_fix_num_samples
raise AudioLoadingError(
lhotse.audio.utils.AudioLoadingError: The number of declared samples in the recording diverged from the one obtained when loading audio (offset=0.0, duration=3.24715625). This could be internal Lhotse's error or a faulty transform implementation. Please report this issue in Lhotse and show the following: diff=1381, audio.shape=(1, 102528), recording=Recording(id='1', sources=[AudioSource(type='memory', channels=[0], source='<binary-data>')], sampling_rate=32000, num_samples=103909, duration=3.24715625, channel_ids=[0], transforms=None)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/conda_envs/torch/lib/python3.10/site-packages/lhotse/utils.py", line 848, in wrapper
return fn(*args, **kwargs)
File "/conda_envs/torch/lib/python3.10/site-packages/lhotse/cut/mono.py", line 77, in load_audio
return self.recording.load_audio(
File "/conda_envs/torch/lib/python3.10/site-packages/lhotse/utils.py", line 850, in wrapper
raise type(e)(
lhotse.audio.utils.AudioLoadingError: The number of declared samples in the recording diverged from the one obtained when loading audio (offset=0.0, duration=3.24715625). This could be internal Lhotse's error or a faulty transform implementation. Please report this issue in Lhotse and show the following: diff=1381, audio.shape=(1, 102528), recording=Recording(id='1', sources=[AudioSource(type='memory', channels=[0], source='<binary-data>')], sampling_rate=32000, num_samples=103909, duration=3.24715625, channel_ids=[0], transforms=None)
[extra info] When calling: Recording.load_audio(args=(Recording(id='1', sources=[AudioSource(type='memory', channels=[0], source='<binary-data>')], sampling_rate=32000, num_samples=103909, duration=3.24715625, channel_ids=[0], transforms=None),) kwargs={'channels': 0, 'offset': 0.0, 'duration': 3.24715625})
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "./bug.py", line 23, in <module>
fail()
File "./bug.py", line 17, in fail
cut.load_audio()
File "/conda_envs/torch/lib/python3.10/site-packages/lhotse/utils.py", line 850, in wrapper
raise type(e)(
lhotse.audio.utils.AudioLoadingError: The number of declared samples in the recording diverged from the one obtained when loading audio (offset=0.0, duration=3.24715625). This could be internal Lhotse's error or a faulty transform implementation. Please report this issue in Lhotse and show the following: diff=1381, audio.shape=(1, 102528), recording=Recording(id='1', sources=[AudioSource(type='memory', channels=[0], source='<binary-data>')], sampling_rate=32000, num_samples=103909, duration=3.24715625, channel_ids=[0], transforms=None)
[extra info] When calling: Recording.load_audio(args=(Recording(id='1', sources=[AudioSource(type='memory', channels=[0], source='<binary-data>')], sampling_rate=32000, num_samples=103909, duration=3.24715625, channel_ids=[0], transforms=None),) kwargs={'channels': 0, 'offset': 0.0, 'duration': 3.24715625})
[extra info] When calling: MonoCut.load_audio(args=(MonoCut(id='1', start=0.0, duration=3.24715625, channel=0, supervisions=[], features=None, recording=Recording(id='1', sources=[AudioSource(type='memory', channels=[0], source='<binary-data>')], sampling_rate=32000, num_samples=103909, duration=3.24715625, channel_ids=[0], transforms=None), custom=None),) kwargs={})
Metadata
Metadata
Assignees
Labels
No labels