Skip to content

cut.load_audio() fail bug. #1500

@kobenaxie

Description

@kobenaxie

audio.txt
Using this to convert audio.txt to audio.bin, as I can't upload a binary file.

import base64

with open('audio.txt', 'r') as f:
    encoded_data = f.read()

binary_data = base64.b64decode(encoded_data)

with open('audio.bin', 'wb') as f:
    f.write(binary_data)

After converting a recording to cut, cut.load_audio() failed with lhotse==1.30.3.

from lhotse import Recording
from lhotse.audio.backend import audio_backend, TorchaudioDefaultBackend

with open("audio.bin", "rb") as f:
    audio_bytes = f.read()
 
def success():
    with audio_backend(TorchaudioDefaultBackend):
        recording = Recording.from_bytes(audio_bytes, "1")
    cut = recording.to_cut()
    cut.load_audio()

def fail():
    recording = Recording.from_bytes(audio_bytes, "1")
    cut = recording.to_cut()
    cut.load_audio()


if __name__ == "__main__":
    success()
    fail()

Error msg:

Traceback (most recent call last):
  File "/conda_envs/torch/lib/python3.10/site-packages/lhotse/utils.py", line 848, in wrapper
    return fn(*args, **kwargs)
  File "/conda_envs/torch/lib/python3.10/site-packages/lhotse/audio/recording.py", line 485, in load_audio
    audio = assert_and_maybe_fix_num_samples(
  File "/conda_envs/torch/lib/python3.10/site-packages/lhotse/audio/recording.py", line 969, in assert_and_maybe_fix_num_samples
    raise AudioLoadingError(
lhotse.audio.utils.AudioLoadingError: The number of declared samples in the recording diverged from the one obtained when loading audio (offset=0.0, duration=3.24715625). This could be internal Lhotse's error or a faulty transform implementation. Please report this issue in Lhotse and show the following: diff=1381, audio.shape=(1, 102528), recording=Recording(id='1', sources=[AudioSource(type='memory', channels=[0], source='<binary-data>')], sampling_rate=32000, num_samples=103909, duration=3.24715625, channel_ids=[0], transforms=None)

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/conda_envs/torch/lib/python3.10/site-packages/lhotse/utils.py", line 848, in wrapper
    return fn(*args, **kwargs)
  File "/conda_envs/torch/lib/python3.10/site-packages/lhotse/cut/mono.py", line 77, in load_audio
    return self.recording.load_audio(
  File "/conda_envs/torch/lib/python3.10/site-packages/lhotse/utils.py", line 850, in wrapper
    raise type(e)(
lhotse.audio.utils.AudioLoadingError: The number of declared samples in the recording diverged from the one obtained when loading audio (offset=0.0, duration=3.24715625). This could be internal Lhotse's error or a faulty transform implementation. Please report this issue in Lhotse and show the following: diff=1381, audio.shape=(1, 102528), recording=Recording(id='1', sources=[AudioSource(type='memory', channels=[0], source='<binary-data>')], sampling_rate=32000, num_samples=103909, duration=3.24715625, channel_ids=[0], transforms=None)
[extra info] When calling: Recording.load_audio(args=(Recording(id='1', sources=[AudioSource(type='memory', channels=[0], source='<binary-data>')], sampling_rate=32000, num_samples=103909, duration=3.24715625, channel_ids=[0], transforms=None),) kwargs={'channels': 0, 'offset': 0.0, 'duration': 3.24715625})

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "./bug.py", line 23, in <module>
    fail()
  File "./bug.py", line 17, in fail
    cut.load_audio()
  File "/conda_envs/torch/lib/python3.10/site-packages/lhotse/utils.py", line 850, in wrapper
    raise type(e)(
lhotse.audio.utils.AudioLoadingError: The number of declared samples in the recording diverged from the one obtained when loading audio (offset=0.0, duration=3.24715625). This could be internal Lhotse's error or a faulty transform implementation. Please report this issue in Lhotse and show the following: diff=1381, audio.shape=(1, 102528), recording=Recording(id='1', sources=[AudioSource(type='memory', channels=[0], source='<binary-data>')], sampling_rate=32000, num_samples=103909, duration=3.24715625, channel_ids=[0], transforms=None)
[extra info] When calling: Recording.load_audio(args=(Recording(id='1', sources=[AudioSource(type='memory', channels=[0], source='<binary-data>')], sampling_rate=32000, num_samples=103909, duration=3.24715625, channel_ids=[0], transforms=None),) kwargs={'channels': 0, 'offset': 0.0, 'duration': 3.24715625})
[extra info] When calling: MonoCut.load_audio(args=(MonoCut(id='1', start=0.0, duration=3.24715625, channel=0, supervisions=[], features=None, recording=Recording(id='1', sources=[AudioSource(type='memory', channels=[0], source='<binary-data>')], sampling_rate=32000, num_samples=103909, duration=3.24715625, channel_ids=[0], transforms=None), custom=None),) kwargs={})

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions