From fe624e7da58fc33296cd144af637bf4c4643f355 Mon Sep 17 00:00:00 2001 From: Phil Schatzmann Date: Sun, 1 Jun 2025 20:04:33 +0200 Subject: [PATCH 1/7] Update test-codec-alac.ino --- examples/tests/codecs/test-codec-alac/test-codec-alac.ino | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/tests/codecs/test-codec-alac/test-codec-alac.ino b/examples/tests/codecs/test-codec-alac/test-codec-alac.ino index b52f69e03..1eec5c5e3 100644 --- a/examples/tests/codecs/test-codec-alac/test-codec-alac.ino +++ b/examples/tests/codecs/test-codec-alac/test-codec-alac.ino @@ -2,6 +2,7 @@ * @file test-codec-alac.ino * @author Phil Schatzmann * @brief generate sine wave -> encoder -> decoder -> audiokit (i2s) + * @note Activate PSRAM or dicrease the frame size e.g. by adding 1024 to the constructor of the enc_alac and dec_alac * @version 0.1 * * @copyright Copyright (c) 2025 @@ -56,4 +57,4 @@ void setup() { void loop() { copier.copy(); -} \ No newline at end of file +} From 5d657ca4dce51c679dae3c1b758e2b588ff499dd Mon Sep 17 00:00:00 2001 From: Phil Schatzmann Date: Sun, 1 Jun 2025 20:05:05 +0200 Subject: [PATCH 2/7] Update test-codec-alac.ino --- examples/tests/codecs/test-codec-alac/test-codec-alac.ino | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/tests/codecs/test-codec-alac/test-codec-alac.ino b/examples/tests/codecs/test-codec-alac/test-codec-alac.ino index 1eec5c5e3..4902d6632 100644 --- a/examples/tests/codecs/test-codec-alac/test-codec-alac.ino +++ b/examples/tests/codecs/test-codec-alac/test-codec-alac.ino @@ -12,7 +12,7 @@ #include "AudioTools/AudioCodecs/CodecALAC.h" #include "AudioTools/AudioLibs/AudioBoardStream.h" -SET_LOOP_TASK_STACK_SIZE(16*1024); // 16KB +// SET_LOOP_TASK_STACK_SIZE(16*1024); // 16KB - not needed AudioInfo info(44100, 2, 16); SineWaveGenerator sineWave( 32000); // subclass of SoundGenerator with max amplitude of 32000 From df97da4a819eab2024f1c1f64a1706d73d8ac617 Mon Sep 17 00:00:00 2001 From: pschatzmann Date: Wed, 4 Jun 2025 09:11:14 +0200 Subject: [PATCH 3/7] InputMixer --- src/AudioTools/CoreAudio/AudioStreams.h | 45 +++++++++++++++++++++---- 1 file changed, 38 insertions(+), 7 deletions(-) diff --git a/src/AudioTools/CoreAudio/AudioStreams.h b/src/AudioTools/CoreAudio/AudioStreams.h index 715e9c25b..28f0e7873 100644 --- a/src/AudioTools/CoreAudio/AudioStreams.h +++ b/src/AudioTools/CoreAudio/AudioStreams.h @@ -1142,19 +1142,22 @@ class InputMixer : public AudioStream { public: InputMixer() = default; - /// Adds a new input stream - void add(Stream &in, int weight = 100) { + /// Adds a new input stream and returns it's actual index position + int add(Stream &in, int weight = 100) { streams.push_back(&in); weights.push_back(weight); total_weights += weight; + return streams.indexOf(&in); } - /// Replaces a stream at the indicated channel - void set(int channel, Stream &in) { - if (channel < size()) { - streams[channel] = ∈ + /// Replaces a stream at the indicated index + bool set(int index, Stream &in) { + if (index < size()) { + streams[index] = ∈ + return true; } else { - LOGE("Invalid channel %d - max is %d", channel, size() - 1); + LOGE("Invalid index %d - max is %d", index, size() - 1); + return false; } } @@ -1224,6 +1227,34 @@ class InputMixer : public AudioStream { /// abort the read and provide empty data void setRetryCount(int retry) { retry_count = retry; } + /// Removes a stream by index position + bool remove(int idx) { + if (idx < 0 || idx >= size()) { + return false; + } + streams.erase(idx); + return true; + } + + /// Provides the actual index of the stream + int indexOf(Stream &stream) { return streams.indexOf(&stream); } + + /// Provides the stream pointer at the indicated index + Stream * operator [](int idx) { + if (idx < 0 || idx >= size()) return nullptr; + return streams[idx]; + } + + /// Provides you the index of the next empty stream. -1 when none is found. + int nextEmptyIndex() { + for (int i = 0; i < streams.size(); i++) { + if (streams[i]->available() == 0) { + return i; + } + } + return -1; + } + protected: Vector streams{0}; Vector weights{0}; From fad6c2a4e0806b8da3311178b9eb4402efad4ed6 Mon Sep 17 00:00:00 2001 From: pschatzmann Date: Wed, 4 Jun 2025 09:25:35 +0200 Subject: [PATCH 4/7] InputMixer --- src/AudioTools/CoreAudio/AudioStreams.h | 41 +++++++++++++++++++------ 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/src/AudioTools/CoreAudio/AudioStreams.h b/src/AudioTools/CoreAudio/AudioStreams.h index 28f0e7873..30093bab5 100644 --- a/src/AudioTools/CoreAudio/AudioStreams.h +++ b/src/AudioTools/CoreAudio/AudioStreams.h @@ -1174,11 +1174,7 @@ class InputMixer : public AudioStream { void setWeight(int channel, int weight) { if (channel < size()) { weights[channel] = weight; - int total = 0; - for (int j = 0; j < weights.size(); j++) { - total += weights[j]; - } - total_weights = total; + recalculateWeights(); } else { LOGE("Invalid channel %d - max is %d", channel, size() - 1); } @@ -1233,14 +1229,30 @@ class InputMixer : public AudioStream { return false; } streams.erase(idx); + weights.erase(idx); + recalculateWeights(); return true; } + /// Removes all streams which have no data available + bool remove() { + bool rc = false; + int idx = nextEmptyIndex(); + while (idx >= 0) { + rc = true; + streams.erase(idx); + weights.erase(idx); + idx = nextEmptyIndex(); + } + recalculateWeights(); + return rc; + } + /// Provides the actual index of the stream int indexOf(Stream &stream) { return streams.indexOf(&stream); } /// Provides the stream pointer at the indicated index - Stream * operator [](int idx) { + Stream *operator[](int idx) { if (idx < 0 || idx >= size()) return nullptr; return streams[idx]; } @@ -1265,6 +1277,15 @@ class InputMixer : public AudioStream { Vector result_vect; Vector current_vect; + /// Recalculate the weights + void recalculateWeights() { + int total = 0; + for (int j = 0; j < weights.size(); j++) { + total += weights[j]; + } + total_weights = total; + } + /// mixing using a vector of samples int readBytesVector(T *p_data, int byteCount) { int samples = byteCount / sizeof(T); @@ -1475,8 +1496,8 @@ class CallbackStream : public ModifyingStream { /// defines the callback to receive the actual audio info void setAudioInfoCallback(void (*cb)(AudioInfo info)) { this->cb_audio_info = cb; - } - + } + /// Updates the audio info and calls the callback void setAudioInfo(AudioInfo info) override { ModifyingStream::setAudioInfo(info); @@ -1714,9 +1735,9 @@ class VolumeMeter : public ModifyingStream { return begin(); } - bool begin() override { + bool begin() override { setAudioInfo(audioInfo()); - return true; + return true; } void setAudioInfo(AudioInfo info) override { From 6a608c9a3433067b4f0b0b22cf66d6138bc8b99e Mon Sep 17 00:00:00 2001 From: pschatzmann Date: Wed, 4 Jun 2025 09:56:36 +0200 Subject: [PATCH 5/7] Compile errors --- src/AudioTools/CoreAudio/AudioStreams.h | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/AudioTools/CoreAudio/AudioStreams.h b/src/AudioTools/CoreAudio/AudioStreams.h index 7ede245e4..bd389cd6a 100644 --- a/src/AudioTools/CoreAudio/AudioStreams.h +++ b/src/AudioTools/CoreAudio/AudioStreams.h @@ -1171,12 +1171,12 @@ class InputMixer : public AudioStream { /// Dynamically update the new weight for the indicated channel: If you set it /// to 0 it is muted (and the stream is not read any more). We recommend to /// use values between 1 and 100 - void setWeight(int channel, int weight) { + void setWeight(int index, int weight) { if (channel < size()) { - weights[channel] = weight; + weights[index] = weight; recalculateWeights(); } else { - LOGE("Invalid channel %d - max is %d", channel, size() - 1); + LOGE("Invalid index %d - max is %d", index, size() - 1); } } @@ -1267,11 +1267,6 @@ class InputMixer : public AudioStream { return -1; } - /// Provides the actual index of the stream - int indexOf(Stream& stream){ - return streams.indexOf(&stream); - } - protected: Vector streams{0}; Vector weights{0}; From c447efb725b1e89b5d01556452a8ec54148abd0b Mon Sep 17 00:00:00 2001 From: pschatzmann Date: Wed, 4 Jun 2025 09:58:56 +0200 Subject: [PATCH 6/7] Compile Errors --- src/AudioTools/CoreAudio/AudioStreams.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/AudioTools/CoreAudio/AudioStreams.h b/src/AudioTools/CoreAudio/AudioStreams.h index bd389cd6a..f065323f1 100644 --- a/src/AudioTools/CoreAudio/AudioStreams.h +++ b/src/AudioTools/CoreAudio/AudioStreams.h @@ -1172,7 +1172,7 @@ class InputMixer : public AudioStream { /// to 0 it is muted (and the stream is not read any more). We recommend to /// use values between 1 and 100 void setWeight(int index, int weight) { - if (channel < size()) { + if (index < streams.size()) { weights[index] = weight; recalculateWeights(); } else { From 6a99056fad8fcc8a2001a414609cfd46b3aed1a8 Mon Sep 17 00:00:00 2001 From: Phil Schatzmann Date: Wed, 4 Jun 2025 20:44:29 +0200 Subject: [PATCH 7/7] M4a (#2063) * InputMixer * MP4: fix alac * Fix esds parsing with ESDSParser * ContainerM4A fix broken output * Deactivate stco * M4A alac support --- src/AudioTools/AudioCodecs/CodecALAC.h | 11 +- src/AudioTools/AudioCodecs/CodecCopy.h | 4 + src/AudioTools/AudioCodecs/ContainerM4A.h | 6 +- src/AudioTools/AudioCodecs/M4AAudioDemuxer.h | 220 ++++++++++++++---- src/AudioTools/AudioCodecs/MP4Parser.h | 58 +++-- .../AudioCodecs/MP4ParserIncremental.h | 8 +- src/AudioTools/AudioCodecs/MultiDecoder.h | 15 +- tests-cmake/codec/container-m4a/m4a.ino | 2 +- 8 files changed, 250 insertions(+), 74 deletions(-) diff --git a/src/AudioTools/AudioCodecs/CodecALAC.h b/src/AudioTools/AudioCodecs/CodecALAC.h index c8cc4bef1..9847459d9 100644 --- a/src/AudioTools/AudioCodecs/CodecALAC.h +++ b/src/AudioTools/AudioCodecs/CodecALAC.h @@ -42,7 +42,7 @@ class DecoderALAC : public AudioDecoder { DecoderALAC(int frameSize = kALACDefaultFrameSize) { // this is used when setCodecConfig() is not called with encoder info setFrameSize(frameSize); - setDefaultConfig(); + //setDefaultConfig(); } // define ALACSpecificConfig @@ -86,14 +86,13 @@ class DecoderALAC : public AudioDecoder { dec.mConfig.bitDepth = from.bits_per_sample; } + /// we expect the write is called for a complete frame! size_t write(const uint8_t* encodedFrame, size_t encodedLen) override { LOGD("DecoderALAC::write: %d", (int)encodedLen); - // Safety check - if (!is_init) { - LOGE("Decoder not initialized"); - return 0; - } + // Make sure we have a config: we can't do this in begin because the setConfig() + // might be called after begin() + if (!is_init) setDefaultConfig(); // Make sure we have the output buffer set up if (result_buffer.size() != outputBufferSize()) { diff --git a/src/AudioTools/AudioCodecs/CodecCopy.h b/src/AudioTools/AudioCodecs/CodecCopy.h index ab2ff7bcb..469566651 100644 --- a/src/AudioTools/AudioCodecs/CodecCopy.h +++ b/src/AudioTools/AudioCodecs/CodecCopy.h @@ -36,6 +36,10 @@ class CopyDecoder : public AudioDecoder { size_t write(const uint8_t *data, size_t len) { TRACED(); + if (pt_print == nullptr) { + LOGE("No output stream defined for CopyDecoder"); + return 0; + } return pt_print->write((uint8_t*)data,len); } diff --git a/src/AudioTools/AudioCodecs/ContainerM4A.h b/src/AudioTools/AudioCodecs/ContainerM4A.h index 1e1fa57d3..c5ccf2347 100644 --- a/src/AudioTools/AudioCodecs/ContainerM4A.h +++ b/src/AudioTools/AudioCodecs/ContainerM4A.h @@ -37,7 +37,7 @@ class ContainerM4A : public ContainerDecoder { * @brief Set the output stream for decoded or raw audio. * @param out_stream Output AudioStream. */ - void setOutput(AudioStream& out_stream) override { + void setOutput(Print& out_stream) override { if (p_decoder != nullptr) p_decoder->setOutput(out_stream); ContainerDecoder::setOutput(out_stream); } @@ -118,7 +118,9 @@ class ContainerM4A : public ContainerDecoder { !self->is_magic_cookie_processed) { auto& magic_cookie = self->demux.getALACMagicCookie(); if (magic_cookie.size() > 0) { - dec.setCodecConfig(magic_cookie.data(), magic_cookie.size()); + if (!dec.setCodecConfig(magic_cookie.data(), magic_cookie.size())){ + LOGE("Failed to set ALAC magic cookie for decoder: %s", dec.selectedMime()); + } } self->is_magic_cookie_processed = true; } diff --git a/src/AudioTools/AudioCodecs/M4AAudioDemuxer.h b/src/AudioTools/AudioCodecs/M4AAudioDemuxer.h index 83831e756..56e1dc32e 100644 --- a/src/AudioTools/AudioCodecs/M4AAudioDemuxer.h +++ b/src/AudioTools/AudioCodecs/M4AAudioDemuxer.h @@ -4,6 +4,12 @@ namespace audio_tools { +/// The stsz sample size type should usually be uint32_t: However for audio +/// we expect that the sample size is usually aound 1 - 2k, so uint16_t +/// should be more then sufficient! Microcontolles only have a limited +/// amount of RAM, so this makes a big difference! +using stsz_sample_size_t = uint16_t; + /** * @brief A simple M4A audio data demuxer which is providing * AAC, MP3 and ALAC frames. @@ -27,6 +33,67 @@ class M4AAudioDemuxer { uint64_t timestamp; ///< Timestamp of the frame (if available). }; + /** + * @brief A parser for the ESDS segment to extract the relevant aac + * information. + * + */ + struct ESDSParser { + uint8_t audioObjectType; + uint8_t samplingRateIndex; + uint8_t channelConfiguration; + bool isValid = false; ///< True if the ESDP is valid + + // Parses esds content to extract audioObjectType, frequencyIndex, and + // channelConfiguration + bool parse(const uint8_t* data, size_t size) { + const uint8_t* ptr = data; + const uint8_t* end = data + size; + + if (ptr + 4 > end) return false; + ptr += 4; // skip version + flags + + if (ptr >= end || *ptr++ != 0x03) return false; + size_t es_len = parse_descriptor_length(ptr, end); + if (ptr + es_len > end) return false; + + ptr += 2; // skip ES_ID + ptr += 1; // skip flags + + if (ptr >= end || *ptr++ != 0x04) return false; + size_t dec_len = parse_descriptor_length(ptr, end); + if (ptr + dec_len > end) return false; + + ptr += 13; // skip objectTypeIndication, streamType, bufferSizeDB, + // maxBitrate, avgBitrate + + if (ptr >= end || *ptr++ != 0x05) return false; + size_t dsi_len = parse_descriptor_length(ptr, end); + if (ptr + dsi_len > end || dsi_len < 2) return false; + + uint8_t byte1 = ptr[0]; + uint8_t byte2 = ptr[1]; + + audioObjectType = (byte1 >> 3) & 0x1F; + samplingRateIndex = ((byte1 & 0x07) << 1) | ((byte2 >> 7) & 0x01); + channelConfiguration = (byte2 >> 3) & 0x0F; + return true; + } + + protected: + // Helper to decode variable-length descriptor lengths (e.g. 0x80 80 80 05) + inline size_t parse_descriptor_length(const uint8_t*& ptr, + const uint8_t* end) { + size_t len = 0; + for (int i = 0; i < 4 && ptr < end; ++i) { + uint8_t b = *ptr++; + len = (len << 7) | (b & 0x7F); + if ((b & 0x80) == 0) break; + } + return len; + } + }; + /** * @brief Extracts audio data based on the sample sizes defined in the stsz * box. It collects the data from the mdat box and calls the callback with the @@ -75,10 +142,15 @@ class M4AAudioDemuxer { void setReference(void* r) { ref = r; } /** - * @brief Sets the maximum box size (e.g., for mdat). + * @brief Sets the maximum box size (e.g., for mdat). This is called before + * the mdat data is posted. In order to be able to play a file multiple + * times we just reset the sampleIndex! * @param size Maximum size in bytes. */ - void setMaxSize(size_t size) { box_size = size; } + void setMaxSize(size_t size) { + box_size = size; + sampleIndex = 0; + } /** * @brief Writes data to the extractor, extracting frames as sample sizes @@ -92,7 +164,7 @@ class M4AAudioDemuxer { // Resize buffer to the current sample size size_t currentSize = currentSampleSize(); if (currentSize == 0) { - LOGE("No sample size defined, cannot write data"); + LOGE("No sample size defined: e.g. mdat before stsz!"); return 0; } resize(currentSize); @@ -126,7 +198,7 @@ class M4AAudioDemuxer { * @brief Returns the vector of sample sizes. * @return Reference to the vector of sample sizes. */ - Vector& getSampleSizes() { return sampleSizes; } + Vector& getSampleSizes() { return sampleSizes; } /** * @brief Returns the vector of chunk offsets. @@ -158,13 +230,13 @@ class M4AAudioDemuxer { } protected: - Vector sampleSizes; ///< Table of sample sizes. - Vector chunkOffsets; ///< Table of chunk offsets. - Codec codec = Codec::Unknown; ///< Current codec. - FrameCallback callback = nullptr; ///< Frame callback. - void* ref = nullptr; ///< Reference pointer for callback. - size_t sampleIndex = 0; ///< Current sample index. - SingleBuffer buffer; ///< Buffer for accumulating sample data. + Vector sampleSizes; ///< Table of sample sizes. + Vector chunkOffsets; ///< Table of chunk offsets. + Codec codec = Codec::Unknown; ///< Current codec. + FrameCallback callback = nullptr; ///< Frame callback. + void* ref = nullptr; ///< Reference pointer for callback. + size_t sampleIndex = 0; ///< Current sample index. + SingleBuffer buffer; ///< Buffer for accumulating sample data. int aacProfile = 2, sampleRateIdx = 4, channelCfg = 2; ///< AAC config. uint32_t fixed_sample_size = 0; ///< Fixed sample size (if used). uint32_t fixed_sample_count = 0; ///< Fixed sample count (if used). @@ -304,6 +376,9 @@ class M4AAudioDemuxer { alacMagicCookie.clear(); resize(default_size); + stsz_processed = false; + stco_processed = false; + // When codec/sampleSizes/callback/ref change, update the extractor: parser.begin(); sampleExtractor.begin(); @@ -353,6 +428,8 @@ class M4AAudioDemuxer { SampleExtractor sampleExtractor; ///< Extractor for audio samples. void* ref = nullptr; ///< Reference pointer for callbacks. size_t default_size = 2 * 1024; ///< Default buffer size. + bool stsz_processed = false; ///< Marks the stsz table as processed + bool stco_processed = false; ///< Marks the stco table as processed /** * @brief Reads a 32-bit big-endian unsigned integer from a buffer. @@ -458,23 +535,22 @@ class M4AAudioDemuxer { } else if (StrView(box.type) == "stsz") { onStsz(box); } else if (StrView(box.type) == "stco") { - onStco(box); + // onStco(box); // currently not supported } } - /** * @brief Handles the stsd (Sample Description) box. * @param box MP4 box. */ void onStsd(const MP4Parser::Box& box) { LOGI("onStsd: %s, size: %zu bytes", box.type, box.data_size); - const uint8_t* data = box.data; - size_t size = box.data_size; - if (size < 8) return; - uint32_t entryCount = readU32(data + 4); + // printHexDump(box); + if (box.data_size < 8) return; + uint32_t entryCount = readU32(box.data + 4); // One or more sample entry boxes (e.g. mp4a, .mp3, alac) - parser.parseString(data + 8, size - 8); + parser.parseString(box.data + 8, box.data_size - 8, box.file_offset + 8 + 8, + box.level + 1); } /** @@ -483,6 +559,7 @@ class M4AAudioDemuxer { */ void onMp4a(const MP4Parser::Box& box) { LOGI("onMp4a: %s, size: %zu bytes", box.type, box.data_size); + // printHexDump(box); if (box.data_size < 36) return; // Minimum size for mp4a box // use default configuration @@ -495,7 +572,7 @@ class M4AAudioDemuxer { /// for mp4a we expect to contain a esds: child boxes start at 36 int pos = 36 - 8; - parser.parseString(box.data + pos, box.data_size - pos); + parser.parseString(box.data + pos, box.data_size - pos, box.level + 1); } /** @@ -504,28 +581,54 @@ class M4AAudioDemuxer { */ void onEsds(const MP4Parser::Box& box) { LOGI("onEsds: %s, size: %zu bytes", box.type, box.data_size); - int aacProfile = 2; // Default: AAC LC - int sampleRateIdx = 4; // Default: 44100 Hz - int channelCfg = 2; // Default: Stereo + // printHexDump(box); + ESDSParser esdsParser; + if (!esdsParser.parse(box.data, box.data_size)) { + LOGE("Failed to parse esds box"); + return; + } + LOGI( + "-> esds: AAC objectType: %u, samplingRateIdx: %u, " + "channelCfg: %u", + esdsParser.audioObjectType, esdsParser.samplingRateIndex, + esdsParser.channelConfiguration); + sampleExtractor.setAACConfig(esdsParser.audioObjectType, + esdsParser.samplingRateIndex, + esdsParser.channelConfiguration); + } - for (size_t i = 2; i + 4 < box.data_size; ++i) { - if (box.data[i] == 0x05) { // 0x05 = AudioSpecificConfig tag - uint8_t asc_len = box.data[i + 1]; - if (i + 2 + asc_len > box.data_size) { - LOGW("esds box not long enough for AudioSpecificConfig"); - break; - }; - const uint8_t* asc = box.data + i + 2; - // AudioSpecificConfig is at least 2 bytes - aacProfile = (asc[0] >> 3) & 0x1F; // 5 bits - sampleRateIdx = - ((asc[0] & 0x07) << 1) | ((asc[1] >> 7) & 0x01); // 4 bits - channelCfg = (asc[1] >> 3) & 0x0F; // 4 bits - LOGI("AudioSpecificConfig: profile=%d, sampleRateIdx=%d, channelCfg=%d", - aacProfile, sampleRateIdx, channelCfg); - sampleExtractor.setAACConfig(aacProfile, sampleRateIdx, channelCfg); - } + void fixALACMagicCookie(uint8_t* cookie, size_t len) { + if (len < 28) { + return; } + + // Helper to read/write big-endian + auto read32 = [](uint8_t* p) -> uint32_t { + return (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]; + }; + auto write32 = [](uint8_t* p, uint32_t val) { + p[0] = (val >> 24) & 0xFF; + p[1] = (val >> 16) & 0xFF; + p[2] = (val >> 8) & 0xFF; + p[3] = val & 0xFF; + }; + auto read16 = [](uint8_t* p) -> uint16_t { return (p[0] << 8) | p[1]; }; + auto write16 = [](uint8_t* p, uint16_t val) { + p[0] = (val >> 8) & 0xFF; + p[1] = val & 0xFF; + }; + + // Fix values if zero or invalid + if (read32(cookie + 0) == 0) write32(cookie + 0, 4096); // frameLength + if (cookie[6] == 0) cookie[6] = 16; // bitDepth + if (cookie[7] == 0 || cookie[7] > 32) cookie[7] = 10; // pb + if (cookie[8] == 0 || cookie[8] > 32) cookie[8] = 14; // mb + if (cookie[9] == 0 || cookie[9] > 32) cookie[9] = 10; // kb + if (cookie[10] == 0 || cookie[10] > 8) cookie[10] = 2; // numChannels + if (read16(cookie + 11) == 0) write16(cookie + 11, 255); // maxRun + if (read32(cookie + 13) == 0) write32(cookie + 13, 8192); // maxFrameBytes + if (read32(cookie + 17) == 0) write32(cookie + 17, 512000); // avgBitRate + if (read32(cookie + 21) == 0) write32(cookie + 21, 44100); // sampleRate } /** @@ -537,8 +640,13 @@ class M4AAudioDemuxer { codec = Codec::ALAC; sampleExtractor.setCodec(codec); - alacMagicCookie.resize(box.data_size); - std::memcpy(alacMagicCookie.data(), box.data, box.data_size); + // only alac box in alac contains magic cookie + MP4Parser::Box alac; + if (parser.findBox("alac", box.data, box.data_size, alac)) { + // fixALACMagicCookie((uint8_t*)alac.data, alac.data_size); + alacMagicCookie.resize(alac.data_size - 4); + std::memcpy(alacMagicCookie.data(), alac.data + 4, alac.data_size - 4); + } } /** @@ -547,24 +655,25 @@ class M4AAudioDemuxer { */ void onStsz(MP4Parser::Box& box) { LOGI("onStsz: %s, size: %zu bytes", box.type, box.data_size); + if (stsz_processed) return; // Parse stsz box and fill sampleSizes const uint8_t* data = box.data; - size_t size = box.data_size; - if (size < 12) return; uint32_t sampleSize = readU32(data + 4); uint32_t sampleCount = readU32(data + 8); sampleExtractor.begin(); - Vector& sampleSizes = sampleExtractor.getSampleSizes(); + Vector& sampleSizes = sampleExtractor.getSampleSizes(); if (sampleSize == 0) { - if (size < 12 + 4 * sampleCount) return; LOGI("-> Sample Sizes Count: %u", sampleCount); sampleSizes.resize(sampleCount); for (uint32_t i = 0; i < sampleCount; ++i) { - sampleSizes[i] = readU32(data + 12 + i * 4); + uint32_t sampleSizes32 = readU32(data + 12 + i * 4); + sampleSizes[i] = static_cast(sampleSizes32); + assert(static_cast(sampleSizes[i]) == sampleSizes32); } } else { sampleExtractor.setFixedSampleCount(sampleSize, sampleCount); } + stsz_processed = true; } /** @@ -573,6 +682,7 @@ class M4AAudioDemuxer { */ void onStco(MP4Parser::Box& box) { LOGI("onStco: %s, size: %zu bytes", box.type, box.data_size); + if (stco_processed) return; // Parse stco box and fill chunkOffsets const uint8_t* data = box.data + 4; size_t size = box.data_size; @@ -585,6 +695,24 @@ class M4AAudioDemuxer { for (uint32_t i = 0; i < entryCount; ++i) { chunkOffsets[i] = readU32(data + 4 + i * 4); } + stco_processed = true; + } + + void printHexDump(const MP4Parser::Box& box) { + const uint8_t* data = box.data; + size_t len = box.data_size; + LOGI("==========================="); + for (size_t i = 0; i < len; i += 16) { + char hex[49] = {0}; + char ascii[17] = {0}; + for (size_t j = 0; j < 16 && i + j < len; ++j) { + sprintf(hex + j * 3, "%02X ", data[i + j]); + ascii[j] = (data[i + j] >= 32 && data[i + j] < 127) ? data[i + j] : '.'; + } + ascii[16] = 0; + LOGI("%04zx: %-48s |%s|", i, hex, ascii); + } + LOGI("==========================="); } }; diff --git a/src/AudioTools/AudioCodecs/MP4Parser.h b/src/AudioTools/AudioCodecs/MP4Parser.h index eb266eca6..9ffc2d61a 100644 --- a/src/AudioTools/AudioCodecs/MP4Parser.h +++ b/src/AudioTools/AudioCodecs/MP4Parser.h @@ -20,8 +20,11 @@ namespace audio_tools { * Serial. * If a container box contains data, it will be processed recursively and if it * contains data itself, it might be reported in a second callback call. - * @note This parser expects that the buffer size is larger than the biggest - * box! + * @note This parser expect the mdat box to be the last box in the file. This + * can be achieve with the following ffmpeg commands: + * - ffmpeg -i ../sine.wav -c:a alac -movflags +faststart alac.m4a + * - ffmpeg -i ../sine.wav -c:a aac -movflags +faststart aac.m4a + * * @ingroup codecs * @author Phil Schatzmann */ @@ -39,9 +42,10 @@ class MP4Parser { const uint8_t* data = nullptr; ///< Pointer to box payload (not including header) size_t data_size = 0; ///< Size of payload (not including header) - size_t size = 0; ///< Size of payload including subboxes (not including header) - int level = 0; ///< Nesting depth - uint64_t offset = 0; ///< File offset where box starts + size_t size = + 0; ///< Size of payload including subboxes (not including header) + int level = 0; ///< Nesting depth + uint64_t file_offset = 0; ///< File offset where box starts bool is_complete = false; ///< True if the box data is complete bool is_container = false; ///< True if the box is a container }; @@ -110,7 +114,7 @@ class MP4Parser { box.data = nullptr; box.size = 0; box.level = 0; - box.offset = 0; + box.file_offset = 0; box.id = 0; return true; } @@ -161,7 +165,8 @@ class MP4Parser { * @param len Length of the string data. * @return Number of bytes parsed. */ - int parseString(const uint8_t* str, int len) { + int parseString(const uint8_t* str, int len, int fileOffset = 0, + int level = 0) { char type[5]; int idx = 0; Box box; @@ -169,9 +174,12 @@ class MP4Parser { if (!isValidType((const char*)str + idx + 4)) { return idx; } + size_t box_size = readU32(str + idx) - 8; box.data = str + 8 + idx; - box.size = readU32(str + idx); - box.data_size = box.size - 8; + box.size = box_size; + box.level = level; + box.data_size = box.size; + box.file_offset = fileOffset + idx; strncpy(box.type, (char*)(str + idx + 4), 4); box.type[4] = '\0'; idx += box.size; @@ -181,6 +189,28 @@ class MP4Parser { return idx; } + /// find box in box + bool findBox(const char* name, const uint8_t* data, size_t len, Box& result) { + for (int j = 0; j < len - 4; j++) { + if (!isValidType((const char*)data + j + 4)) { + continue; // Skip invalid types + } + size_t box_size = readU32(data + j) - 8; + if (box_size < 8) continue; // Invalid box size + Box box; + box.data = data + j + 8; + box.size = box_size; + box.data_size = box.size; + strncpy(box.type, (char*)(data + j + 4), 4); + box.type[4] = '\0'; + if (StrView(box.type) == name) { + result = box; + return true; // Found the box + } + } + return false; + } + protected: BoxCallback callback = defaultCallback; ///< Generic callback for all boxes Vector callbacks; ///< List of type-specific callbacks @@ -218,9 +248,9 @@ class MP4Parser { memset(space, ' ', box.level * 2); space[box.level * 2] = '\0'; // Null-terminate the string snprintf(str_buffer, sizeof(str_buffer), - "%s- #%u %s, Offset: %u, Size: %u, Data Size: %u", space, (unsigned)box.id, - box.type, (unsigned)box.offset, (unsigned)box.size, - (unsigned)box.data_size); + "%s- #%u %s, Offset: %u, Size: %u, Data Size: %u", space, + (unsigned)box.id, box.type, (unsigned)box.file_offset, + (unsigned)box.size, (unsigned)box.data_size); #ifdef ARDUINO Serial.println(str_buffer); #else @@ -290,14 +320,14 @@ class MP4Parser { box.size = static_cast(boxSize - headerSize); box.data_size = box.size; box.level = level; - box.offset = fileOffset + parseOffset; + box.file_offset = fileOffset + parseOffset; box.is_complete = (parseOffset + boxSize <= bufferSize); box.is_container = is_container; // Special logic for container: usually no data if (box.is_container) { box.data_size = getContainerDataLength(box.type); - if (box.data_size == 0) box.data = nullptr; + if (box.data_size == 0) box.data = nullptr; box.is_complete = true; } diff --git a/src/AudioTools/AudioCodecs/MP4ParserIncremental.h b/src/AudioTools/AudioCodecs/MP4ParserIncremental.h index 7cff1283a..ee18c60bc 100644 --- a/src/AudioTools/AudioCodecs/MP4ParserIncremental.h +++ b/src/AudioTools/AudioCodecs/MP4ParserIncremental.h @@ -166,7 +166,7 @@ class MP4ParserIncremental : public MP4Parser { box.size = static_cast(boxSize - 8); box.data_size = 0; box.level = level; - box.offset = fileOffset + parseOffset; + box.file_offset = fileOffset + parseOffset; box.is_complete = true; box.is_container = true; processCallback(box); @@ -192,7 +192,7 @@ class MP4ParserIncremental : public MP4Parser { box.size = payload_size; box.data_size = payload_size; box.level = level; - box.offset = fileOffset + parseOffset; + box.file_offset = fileOffset + parseOffset; box.is_complete = true; box.is_container = false; processCallback(box); @@ -227,7 +227,7 @@ class MP4ParserIncremental : public MP4Parser { box.data = nullptr; box.data_size = available_payload; box.level = box_level; - box.offset = box_offset; + box.file_offset = box_offset; box.is_complete = false; box.is_container = false; @@ -262,7 +262,7 @@ class MP4ParserIncremental : public MP4Parser { box.size = box_bytes_expected; box.data_size = to_read; box.level = box_level; - box.offset = box_offset + box_bytes_received; + box.file_offset = box_offset + box_bytes_received; box.is_complete = (box_bytes_received + to_read == box_bytes_expected); box.is_container = false; processIncrementalDataCallback(box, buffer.data(), to_read, box.is_complete, ref); diff --git a/src/AudioTools/AudioCodecs/MultiDecoder.h b/src/AudioTools/AudioCodecs/MultiDecoder.h index a08d08402..706c54a5c 100644 --- a/src/AudioTools/AudioCodecs/MultiDecoder.h +++ b/src/AudioTools/AudioCodecs/MultiDecoder.h @@ -29,6 +29,10 @@ class MultiDecoder : public AudioDecoder { bool begin() override { mime_detector.begin(); is_first = true; + if (p_print==nullptr) { + LOGE("No output defined"); + return false; + } return true; } @@ -58,7 +62,7 @@ class MultiDecoder : public AudioDecoder { mime_detector.setCheck(mime, check); } - virtual void setOutput(Print& out_stream) override { + void setOutput(Print& out_stream) override { p_print = &out_stream; for (int j = 0; j < decoders.size(); j++) { decoders[j].decoder->setOutput(out_stream); @@ -141,6 +145,15 @@ class MultiDecoder : public AudioDecoder { return is_first || actual_decoder.is_open; }; + /// Sets the config to the selected decoder + bool setCodecConfig(const uint8_t* data, size_t len) override { + if (actual_decoder.decoder == nullptr) { + LOGE("No decoder defined, cannot set codec config"); + return false; + } + return actual_decoder.decoder->setCodecConfig(data, len); + } + protected: struct DecoderInfo { const char* mime = nullptr; diff --git a/tests-cmake/codec/container-m4a/m4a.ino b/tests-cmake/codec/container-m4a/m4a.ino index f49e996b5..b7e807640 100644 --- a/tests-cmake/codec/container-m4a/m4a.ino +++ b/tests-cmake/codec/container-m4a/m4a.ino @@ -32,7 +32,7 @@ void setup() { return; } - file = SD.open("/home/pschatzmann/Music/m4a/03 We'll never speak again.m4a"); + file = SD.open("/home/pschatzmann/Music/m4a/alac.m4a"); if (!file.isOpen()) { Serial.println("Failed to open file!"); return;