From fe624e7da58fc33296cd144af637bf4c4643f355 Mon Sep 17 00:00:00 2001
From: Phil Schatzmann <phil.schatzmann@gmail.com>
Date: Sun, 1 Jun 2025 20:04:33 +0200
Subject: [PATCH 1/7] Update test-codec-alac.ino

---
 examples/tests/codecs/test-codec-alac/test-codec-alac.ino | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/tests/codecs/test-codec-alac/test-codec-alac.ino b/examples/tests/codecs/test-codec-alac/test-codec-alac.ino
index b52f69e03..1eec5c5e3 100644
--- a/examples/tests/codecs/test-codec-alac/test-codec-alac.ino
+++ b/examples/tests/codecs/test-codec-alac/test-codec-alac.ino
@@ -2,6 +2,7 @@
  * @file test-codec-alac.ino
  * @author Phil Schatzmann
  * @brief generate sine wave -> encoder -> decoder -> audiokit (i2s)
+ * @note Activate PSRAM or dicrease the frame size e.g. by adding 1024 to the constructor of the enc_alac and dec_alac 
  * @version 0.1
  * 
  * @copyright Copyright (c) 2025
@@ -56,4 +57,4 @@ void setup() {
 
 void loop() { 
   copier.copy();
-}
\ No newline at end of file
+}

From 5d657ca4dce51c679dae3c1b758e2b588ff499dd Mon Sep 17 00:00:00 2001
From: Phil Schatzmann <phil.schatzmann@gmail.com>
Date: Sun, 1 Jun 2025 20:05:05 +0200
Subject: [PATCH 2/7] Update test-codec-alac.ino

---
 examples/tests/codecs/test-codec-alac/test-codec-alac.ino | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/tests/codecs/test-codec-alac/test-codec-alac.ino b/examples/tests/codecs/test-codec-alac/test-codec-alac.ino
index 1eec5c5e3..4902d6632 100644
--- a/examples/tests/codecs/test-codec-alac/test-codec-alac.ino
+++ b/examples/tests/codecs/test-codec-alac/test-codec-alac.ino
@@ -12,7 +12,7 @@
 #include "AudioTools/AudioCodecs/CodecALAC.h"
 #include "AudioTools/AudioLibs/AudioBoardStream.h"
 
-SET_LOOP_TASK_STACK_SIZE(16*1024); // 16KB
+// SET_LOOP_TASK_STACK_SIZE(16*1024); // 16KB - not needed
 
 AudioInfo info(44100, 2, 16);
 SineWaveGenerator<int16_t> sineWave( 32000);  // subclass of SoundGenerator with max amplitude of 32000

From df97da4a819eab2024f1c1f64a1706d73d8ac617 Mon Sep 17 00:00:00 2001
From: pschatzmann <phil.schatzmann@gmail.com>
Date: Wed, 4 Jun 2025 09:11:14 +0200
Subject: [PATCH 3/7] InputMixer

---
 src/AudioTools/CoreAudio/AudioStreams.h | 45 +++++++++++++++++++++----
 1 file changed, 38 insertions(+), 7 deletions(-)

diff --git a/src/AudioTools/CoreAudio/AudioStreams.h b/src/AudioTools/CoreAudio/AudioStreams.h
index 715e9c25b..28f0e7873 100644
--- a/src/AudioTools/CoreAudio/AudioStreams.h
+++ b/src/AudioTools/CoreAudio/AudioStreams.h
@@ -1142,19 +1142,22 @@ class InputMixer : public AudioStream {
  public:
   InputMixer() = default;
 
-  /// Adds a new input stream
-  void add(Stream &in, int weight = 100) {
+  /// Adds a new input stream and returns it's actual index position
+  int add(Stream &in, int weight = 100) {
     streams.push_back(&in);
     weights.push_back(weight);
     total_weights += weight;
+    return streams.indexOf(&in);
   }
 
-  /// Replaces a stream at the indicated channel
-  void set(int channel, Stream &in) {
-    if (channel < size()) {
-      streams[channel] = &in;
+  /// Replaces a stream at the indicated index
+  bool set(int index, Stream &in) {
+    if (index < size()) {
+      streams[index] = &in;
+      return true;
     } else {
-      LOGE("Invalid channel %d - max is %d", channel, size() - 1);
+      LOGE("Invalid index %d - max is %d", index, size() - 1);
+      return false;
     }
   }
 
@@ -1224,6 +1227,34 @@ class InputMixer : public AudioStream {
   /// abort the read and provide empty data
   void setRetryCount(int retry) { retry_count = retry; }
 
+  /// Removes a stream by index position
+  bool remove(int idx) {
+    if (idx < 0 || idx >= size()) {
+      return false;
+    }
+    streams.erase(idx);
+    return true;
+  }
+
+  /// Provides the actual index of the stream
+  int indexOf(Stream &stream) { return streams.indexOf(&stream); }
+
+  /// Provides the stream pointer at the indicated index
+  Stream * operator [](int idx) {
+    if (idx < 0 || idx >= size()) return nullptr;
+    return streams[idx];
+  }
+
+  /// Provides you the index of the next empty stream. -1 when none is found.
+  int nextEmptyIndex() {
+    for (int i = 0; i < streams.size(); i++) {
+      if (streams[i]->available() == 0) {
+        return i;
+      }
+    }
+    return -1;
+  }
+
  protected:
   Vector<Stream *> streams{0};
   Vector<int> weights{0};

From fad6c2a4e0806b8da3311178b9eb4402efad4ed6 Mon Sep 17 00:00:00 2001
From: pschatzmann <phil.schatzmann@gmail.com>
Date: Wed, 4 Jun 2025 09:25:35 +0200
Subject: [PATCH 4/7] InputMixer

---
 src/AudioTools/CoreAudio/AudioStreams.h | 41 +++++++++++++++++++------
 1 file changed, 31 insertions(+), 10 deletions(-)

diff --git a/src/AudioTools/CoreAudio/AudioStreams.h b/src/AudioTools/CoreAudio/AudioStreams.h
index 28f0e7873..30093bab5 100644
--- a/src/AudioTools/CoreAudio/AudioStreams.h
+++ b/src/AudioTools/CoreAudio/AudioStreams.h
@@ -1174,11 +1174,7 @@ class InputMixer : public AudioStream {
   void setWeight(int channel, int weight) {
     if (channel < size()) {
       weights[channel] = weight;
-      int total = 0;
-      for (int j = 0; j < weights.size(); j++) {
-        total += weights[j];
-      }
-      total_weights = total;
+      recalculateWeights();
     } else {
       LOGE("Invalid channel %d - max is %d", channel, size() - 1);
     }
@@ -1233,14 +1229,30 @@ class InputMixer : public AudioStream {
       return false;
     }
     streams.erase(idx);
+    weights.erase(idx);
+    recalculateWeights();
     return true;
   }
 
+  /// Removes all streams which have no data available
+  bool remove() {
+    bool rc = false;
+    int idx = nextEmptyIndex();
+    while (idx >= 0) {
+      rc = true;
+      streams.erase(idx);
+      weights.erase(idx);
+      idx = nextEmptyIndex();
+    }
+    recalculateWeights();
+    return rc;
+  }
+
   /// Provides the actual index of the stream
   int indexOf(Stream &stream) { return streams.indexOf(&stream); }
 
   /// Provides the stream pointer at the indicated index
-  Stream * operator [](int idx) {
+  Stream *operator[](int idx) {
     if (idx < 0 || idx >= size()) return nullptr;
     return streams[idx];
   }
@@ -1265,6 +1277,15 @@ class InputMixer : public AudioStream {
   Vector<int> result_vect;
   Vector<T> current_vect;
 
+  /// Recalculate the weights
+  void recalculateWeights() {
+      int total = 0;
+      for (int j = 0; j < weights.size(); j++) {
+        total += weights[j];
+      }
+      total_weights = total;
+  }
+
   /// mixing using a vector of samples
   int readBytesVector(T *p_data, int byteCount) {
     int samples = byteCount / sizeof(T);
@@ -1475,8 +1496,8 @@ class CallbackStream : public ModifyingStream {
   /// defines the callback to receive the actual audio info
   void setAudioInfoCallback(void (*cb)(AudioInfo info)) {
     this->cb_audio_info = cb;
-  } 
-  
+  }
+
   /// Updates the audio info and calls the callback
   void setAudioInfo(AudioInfo info) override {
     ModifyingStream::setAudioInfo(info);
@@ -1714,9 +1735,9 @@ class VolumeMeter : public ModifyingStream {
     return begin();
   }
 
-  bool begin() override { 
+  bool begin() override {
     setAudioInfo(audioInfo());
-    return true; 
+    return true;
   }
 
   void setAudioInfo(AudioInfo info) override {

From 6a608c9a3433067b4f0b0b22cf66d6138bc8b99e Mon Sep 17 00:00:00 2001
From: pschatzmann <phil.schatzmann@gmail.com>
Date: Wed, 4 Jun 2025 09:56:36 +0200
Subject: [PATCH 5/7] Compile errors

---
 src/AudioTools/CoreAudio/AudioStreams.h | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/src/AudioTools/CoreAudio/AudioStreams.h b/src/AudioTools/CoreAudio/AudioStreams.h
index 7ede245e4..bd389cd6a 100644
--- a/src/AudioTools/CoreAudio/AudioStreams.h
+++ b/src/AudioTools/CoreAudio/AudioStreams.h
@@ -1171,12 +1171,12 @@ class InputMixer : public AudioStream {
   /// Dynamically update the new weight for the indicated channel: If you set it
   /// to 0 it is muted (and the stream is not read any more). We recommend to
   /// use values between 1 and 100
-  void setWeight(int channel, int weight) {
+  void setWeight(int index, int weight) {
     if (channel < size()) {
-      weights[channel] = weight;
+      weights[index] = weight;
       recalculateWeights();
     } else {
-      LOGE("Invalid channel %d - max is %d", channel, size() - 1);
+      LOGE("Invalid index %d - max is %d", index, size() - 1);
     }
   }
 
@@ -1267,11 +1267,6 @@ class InputMixer : public AudioStream {
     return -1;
   }
 
-  /// Provides the actual index of the stream 
-  int indexOf(Stream& stream){
-    return streams.indexOf(&stream);
-  }
-
  protected:
   Vector<Stream *> streams{0};
   Vector<int> weights{0};

From c447efb725b1e89b5d01556452a8ec54148abd0b Mon Sep 17 00:00:00 2001
From: pschatzmann <phil.schatzmann@gmail.com>
Date: Wed, 4 Jun 2025 09:58:56 +0200
Subject: [PATCH 6/7] Compile Errors

---
 src/AudioTools/CoreAudio/AudioStreams.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/AudioTools/CoreAudio/AudioStreams.h b/src/AudioTools/CoreAudio/AudioStreams.h
index bd389cd6a..f065323f1 100644
--- a/src/AudioTools/CoreAudio/AudioStreams.h
+++ b/src/AudioTools/CoreAudio/AudioStreams.h
@@ -1172,7 +1172,7 @@ class InputMixer : public AudioStream {
   /// to 0 it is muted (and the stream is not read any more). We recommend to
   /// use values between 1 and 100
   void setWeight(int index, int weight) {
-    if (channel < size()) {
+    if (index < streams.size()) {
       weights[index] = weight;
       recalculateWeights();
     } else {

From 6a99056fad8fcc8a2001a414609cfd46b3aed1a8 Mon Sep 17 00:00:00 2001
From: Phil Schatzmann <phil.schatzmann@gmail.com>
Date: Wed, 4 Jun 2025 20:44:29 +0200
Subject: [PATCH 7/7] M4a (#2063)

* InputMixer

* MP4: fix alac

* Fix esds parsing with ESDSParser

* ContainerM4A fix broken output

* Deactivate stco

* M4A alac support
---
 src/AudioTools/AudioCodecs/CodecALAC.h        |  11 +-
 src/AudioTools/AudioCodecs/CodecCopy.h        |   4 +
 src/AudioTools/AudioCodecs/ContainerM4A.h     |   6 +-
 src/AudioTools/AudioCodecs/M4AAudioDemuxer.h  | 220 ++++++++++++++----
 src/AudioTools/AudioCodecs/MP4Parser.h        |  58 +++--
 .../AudioCodecs/MP4ParserIncremental.h        |   8 +-
 src/AudioTools/AudioCodecs/MultiDecoder.h     |  15 +-
 tests-cmake/codec/container-m4a/m4a.ino       |   2 +-
 8 files changed, 250 insertions(+), 74 deletions(-)

diff --git a/src/AudioTools/AudioCodecs/CodecALAC.h b/src/AudioTools/AudioCodecs/CodecALAC.h
index c8cc4bef1..9847459d9 100644
--- a/src/AudioTools/AudioCodecs/CodecALAC.h
+++ b/src/AudioTools/AudioCodecs/CodecALAC.h
@@ -42,7 +42,7 @@ class DecoderALAC : public AudioDecoder {
   DecoderALAC(int frameSize = kALACDefaultFrameSize) {
     // this is used when setCodecConfig() is not called with encoder info
     setFrameSize(frameSize);
-    setDefaultConfig();
+    //setDefaultConfig();
   }
 
   // define ALACSpecificConfig
@@ -86,14 +86,13 @@ class DecoderALAC : public AudioDecoder {
     dec.mConfig.bitDepth = from.bits_per_sample;
   }
 
+
   /// we expect the write is called for a complete frame!
   size_t write(const uint8_t* encodedFrame, size_t encodedLen) override {
     LOGD("DecoderALAC::write: %d", (int)encodedLen);
-    // Safety check
-    if (!is_init) {
-      LOGE("Decoder not initialized");
-      return 0;
-    }
+    // Make sure we have a config: we can't do this in begin because the setConfig()
+    // might be called after begin()
+    if (!is_init) setDefaultConfig();
 
     // Make sure we have the output buffer set up
     if (result_buffer.size() != outputBufferSize()) {
diff --git a/src/AudioTools/AudioCodecs/CodecCopy.h b/src/AudioTools/AudioCodecs/CodecCopy.h
index ab2ff7bcb..469566651 100644
--- a/src/AudioTools/AudioCodecs/CodecCopy.h
+++ b/src/AudioTools/AudioCodecs/CodecCopy.h
@@ -36,6 +36,10 @@ class CopyDecoder : public AudioDecoder {
 
   size_t write(const uint8_t *data, size_t len) { 
     TRACED();
+    if (pt_print == nullptr) {
+      LOGE("No output stream defined for CopyDecoder");
+      return 0;
+    }
     return pt_print->write((uint8_t*)data,len);
   }
 
diff --git a/src/AudioTools/AudioCodecs/ContainerM4A.h b/src/AudioTools/AudioCodecs/ContainerM4A.h
index 1e1fa57d3..c5ccf2347 100644
--- a/src/AudioTools/AudioCodecs/ContainerM4A.h
+++ b/src/AudioTools/AudioCodecs/ContainerM4A.h
@@ -37,7 +37,7 @@ class ContainerM4A : public ContainerDecoder {
    * @brief Set the output stream for decoded or raw audio.
    * @param out_stream Output AudioStream.
    */
-  void setOutput(AudioStream& out_stream) override {
+  void setOutput(Print& out_stream) override {
     if (p_decoder != nullptr) p_decoder->setOutput(out_stream);
     ContainerDecoder::setOutput(out_stream);
   }
@@ -118,7 +118,9 @@ class ContainerM4A : public ContainerDecoder {
         !self->is_magic_cookie_processed) {
       auto& magic_cookie = self->demux.getALACMagicCookie();
       if (magic_cookie.size() > 0) {
-        dec.setCodecConfig(magic_cookie.data(), magic_cookie.size());
+        if (!dec.setCodecConfig(magic_cookie.data(), magic_cookie.size())){
+          LOGE("Failed to set ALAC magic cookie for decoder: %s", dec.selectedMime());
+        }
       }
       self->is_magic_cookie_processed = true;
     }
diff --git a/src/AudioTools/AudioCodecs/M4AAudioDemuxer.h b/src/AudioTools/AudioCodecs/M4AAudioDemuxer.h
index 83831e756..56e1dc32e 100644
--- a/src/AudioTools/AudioCodecs/M4AAudioDemuxer.h
+++ b/src/AudioTools/AudioCodecs/M4AAudioDemuxer.h
@@ -4,6 +4,12 @@
 
 namespace audio_tools {
 
+/// The stsz sample size type should usually be uint32_t: However for audio
+/// we expect that the sample size is usually aound  1 - 2k, so uint16_t
+/// should be more then sufficient! Microcontolles only have a limited
+/// amount of RAM, so this makes a big difference!
+using stsz_sample_size_t = uint16_t;
+
 /**
  * @brief A simple M4A audio data demuxer which is providing
  * AAC, MP3 and ALAC frames.
@@ -27,6 +33,67 @@ class M4AAudioDemuxer {
     uint64_t timestamp;          ///< Timestamp of the frame (if available).
   };
 
+  /**
+   * @brief A parser for the ESDS segment to extract the relevant aac
+   * information.
+   *
+   */
+  struct ESDSParser {
+    uint8_t audioObjectType;
+    uint8_t samplingRateIndex;
+    uint8_t channelConfiguration;
+    bool isValid = false;  ///< True if the ESDP is valid
+
+    // Parses esds content to extract audioObjectType, frequencyIndex, and
+    // channelConfiguration
+    bool parse(const uint8_t* data, size_t size) {
+      const uint8_t* ptr = data;
+      const uint8_t* end = data + size;
+
+      if (ptr + 4 > end) return false;
+      ptr += 4;  // skip version + flags
+
+      if (ptr >= end || *ptr++ != 0x03) return false;
+      size_t es_len = parse_descriptor_length(ptr, end);
+      if (ptr + es_len > end) return false;
+
+      ptr += 2;  // skip ES_ID
+      ptr += 1;  // skip flags
+
+      if (ptr >= end || *ptr++ != 0x04) return false;
+      size_t dec_len = parse_descriptor_length(ptr, end);
+      if (ptr + dec_len > end) return false;
+
+      ptr += 13;  // skip objectTypeIndication, streamType, bufferSizeDB,
+                  // maxBitrate, avgBitrate
+
+      if (ptr >= end || *ptr++ != 0x05) return false;
+      size_t dsi_len = parse_descriptor_length(ptr, end);
+      if (ptr + dsi_len > end || dsi_len < 2) return false;
+
+      uint8_t byte1 = ptr[0];
+      uint8_t byte2 = ptr[1];
+
+      audioObjectType = (byte1 >> 3) & 0x1F;
+      samplingRateIndex = ((byte1 & 0x07) << 1) | ((byte2 >> 7) & 0x01);
+      channelConfiguration = (byte2 >> 3) & 0x0F;
+      return true;
+    }
+
+   protected:
+    // Helper to decode variable-length descriptor lengths (e.g. 0x80 80 80 05)
+    inline size_t parse_descriptor_length(const uint8_t*& ptr,
+                                          const uint8_t* end) {
+      size_t len = 0;
+      for (int i = 0; i < 4 && ptr < end; ++i) {
+        uint8_t b = *ptr++;
+        len = (len << 7) | (b & 0x7F);
+        if ((b & 0x80) == 0) break;
+      }
+      return len;
+    }
+  };
+
   /**
    * @brief Extracts audio data based on the sample sizes defined in the stsz
    * box. It collects the data from the mdat box and calls the callback with the
@@ -75,10 +142,15 @@ class M4AAudioDemuxer {
     void setReference(void* r) { ref = r; }
 
     /**
-     * @brief Sets the maximum box size (e.g., for mdat).
+     * @brief Sets the maximum box size (e.g., for mdat). This is called before
+     * the mdat data is posted. In order to be able to play a file multiple
+     * times we just reset the sampleIndex!
      * @param size Maximum size in bytes.
      */
-    void setMaxSize(size_t size) { box_size = size; }
+    void setMaxSize(size_t size) {
+      box_size = size;
+      sampleIndex = 0;
+    }
 
     /**
      * @brief Writes data to the extractor, extracting frames as sample sizes
@@ -92,7 +164,7 @@ class M4AAudioDemuxer {
       // Resize buffer to the current sample size
       size_t currentSize = currentSampleSize();
       if (currentSize == 0) {
-        LOGE("No sample size defined, cannot write data");
+        LOGE("No sample size defined: e.g. mdat before stsz!");
         return 0;
       }
       resize(currentSize);
@@ -126,7 +198,7 @@ class M4AAudioDemuxer {
      * @brief Returns the vector of sample sizes.
      * @return Reference to the vector of sample sizes.
      */
-    Vector<uint32_t>& getSampleSizes() { return sampleSizes; }
+    Vector<stsz_sample_size_t>& getSampleSizes() { return sampleSizes; }
 
     /**
      * @brief Returns the vector of chunk offsets.
@@ -158,13 +230,13 @@ class M4AAudioDemuxer {
     }
 
    protected:
-    Vector<uint32_t> sampleSizes;      ///< Table of sample sizes.
-    Vector<uint32_t> chunkOffsets;     ///< Table of chunk offsets.
-    Codec codec = Codec::Unknown;      ///< Current codec.
-    FrameCallback callback = nullptr;  ///< Frame callback.
-    void* ref = nullptr;               ///< Reference pointer for callback.
-    size_t sampleIndex = 0;            ///< Current sample index.
-    SingleBuffer<uint8_t> buffer;      ///< Buffer for accumulating sample data.
+    Vector<stsz_sample_size_t> sampleSizes;  ///< Table of sample sizes.
+    Vector<uint32_t> chunkOffsets;           ///< Table of chunk offsets.
+    Codec codec = Codec::Unknown;            ///< Current codec.
+    FrameCallback callback = nullptr;        ///< Frame callback.
+    void* ref = nullptr;           ///< Reference pointer for callback.
+    size_t sampleIndex = 0;        ///< Current sample index.
+    SingleBuffer<uint8_t> buffer;  ///< Buffer for accumulating sample data.
     int aacProfile = 2, sampleRateIdx = 4, channelCfg = 2;  ///< AAC config.
     uint32_t fixed_sample_size = 0;   ///< Fixed sample size (if used).
     uint32_t fixed_sample_count = 0;  ///< Fixed sample count (if used).
@@ -304,6 +376,9 @@ class M4AAudioDemuxer {
     alacMagicCookie.clear();
     resize(default_size);
 
+    stsz_processed = false;
+    stco_processed = false;
+
     // When codec/sampleSizes/callback/ref change, update the extractor:
     parser.begin();
     sampleExtractor.begin();
@@ -353,6 +428,8 @@ class M4AAudioDemuxer {
   SampleExtractor sampleExtractor;  ///< Extractor for audio samples.
   void* ref = nullptr;              ///< Reference pointer for callbacks.
   size_t default_size = 2 * 1024;   ///< Default buffer size.
+  bool stsz_processed = false;      ///< Marks the stsz table as processed
+  bool stco_processed = false;      ///< Marks the stco table as processed
 
   /**
    * @brief Reads a 32-bit big-endian unsigned integer from a buffer.
@@ -458,23 +535,22 @@ class M4AAudioDemuxer {
     } else if (StrView(box.type) == "stsz") {
       onStsz(box);
     } else if (StrView(box.type) == "stco") {
-      onStco(box);
+      // onStco(box); // currently not supported
     }
   }
 
-
   /**
    * @brief Handles the stsd (Sample Description) box.
    * @param box MP4 box.
    */
   void onStsd(const MP4Parser::Box& box) {
     LOGI("onStsd: %s, size: %zu bytes", box.type, box.data_size);
-    const uint8_t* data = box.data;
-    size_t size = box.data_size;
-    if (size < 8) return;
-    uint32_t entryCount = readU32(data + 4);
+    // printHexDump(box);
+    if (box.data_size < 8) return;
+    uint32_t entryCount = readU32(box.data + 4);
     // One or more sample entry boxes (e.g. mp4a, .mp3, alac)
-    parser.parseString(data + 8, size - 8);
+    parser.parseString(box.data + 8, box.data_size - 8, box.file_offset + 8 + 8,
+                       box.level + 1);
   }
 
   /**
@@ -483,6 +559,7 @@ class M4AAudioDemuxer {
    */
   void onMp4a(const MP4Parser::Box& box) {
     LOGI("onMp4a: %s, size: %zu bytes", box.type, box.data_size);
+    // printHexDump(box);
     if (box.data_size < 36) return;  // Minimum size for mp4a box
 
     // use default configuration
@@ -495,7 +572,7 @@ class M4AAudioDemuxer {
 
     /// for mp4a we expect to contain a esds: child boxes start at 36
     int pos = 36 - 8;
-    parser.parseString(box.data + pos, box.data_size - pos);
+    parser.parseString(box.data + pos, box.data_size - pos, box.level + 1);
   }
 
   /**
@@ -504,28 +581,54 @@ class M4AAudioDemuxer {
    */
   void onEsds(const MP4Parser::Box& box) {
     LOGI("onEsds: %s, size: %zu bytes", box.type, box.data_size);
-    int aacProfile = 2;     // Default: AAC LC
-    int sampleRateIdx = 4;  // Default: 44100 Hz
-    int channelCfg = 2;     // Default: Stereo
+    // printHexDump(box);
+    ESDSParser esdsParser;
+    if (!esdsParser.parse(box.data, box.data_size)) {
+      LOGE("Failed to parse esds box");
+      return;
+    }
+    LOGI(
+        "-> esds: AAC objectType: %u, samplingRateIdx: %u, "
+        "channelCfg: %u",
+        esdsParser.audioObjectType, esdsParser.samplingRateIndex,
+        esdsParser.channelConfiguration);
+    sampleExtractor.setAACConfig(esdsParser.audioObjectType,
+                                 esdsParser.samplingRateIndex,
+                                 esdsParser.channelConfiguration);
+  }
 
-    for (size_t i = 2; i + 4 < box.data_size; ++i) {
-      if (box.data[i] == 0x05) {  // 0x05 = AudioSpecificConfig tag
-        uint8_t asc_len = box.data[i + 1];
-        if (i + 2 + asc_len > box.data_size) {
-          LOGW("esds box not long enough for AudioSpecificConfig");
-          break;
-        };
-        const uint8_t* asc = box.data + i + 2;
-        // AudioSpecificConfig is at least 2 bytes
-        aacProfile = (asc[0] >> 3) & 0x1F;  // 5 bits
-        sampleRateIdx =
-            ((asc[0] & 0x07) << 1) | ((asc[1] >> 7) & 0x01);  // 4 bits
-        channelCfg = (asc[1] >> 3) & 0x0F;                    // 4 bits
-        LOGI("AudioSpecificConfig: profile=%d, sampleRateIdx=%d, channelCfg=%d",
-             aacProfile, sampleRateIdx, channelCfg);
-        sampleExtractor.setAACConfig(aacProfile, sampleRateIdx, channelCfg);
-      }
+  void fixALACMagicCookie(uint8_t* cookie, size_t len) {
+    if (len < 28) {
+      return;
     }
+
+    // Helper to read/write big-endian
+    auto read32 = [](uint8_t* p) -> uint32_t {
+      return (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3];
+    };
+    auto write32 = [](uint8_t* p, uint32_t val) {
+      p[0] = (val >> 24) & 0xFF;
+      p[1] = (val >> 16) & 0xFF;
+      p[2] = (val >> 8) & 0xFF;
+      p[3] = val & 0xFF;
+    };
+    auto read16 = [](uint8_t* p) -> uint16_t { return (p[0] << 8) | p[1]; };
+    auto write16 = [](uint8_t* p, uint16_t val) {
+      p[0] = (val >> 8) & 0xFF;
+      p[1] = val & 0xFF;
+    };
+
+    // Fix values if zero or invalid
+    if (read32(cookie + 0) == 0) write32(cookie + 0, 4096);    // frameLength
+    if (cookie[6] == 0) cookie[6] = 16;                        // bitDepth
+    if (cookie[7] == 0 || cookie[7] > 32) cookie[7] = 10;      // pb
+    if (cookie[8] == 0 || cookie[8] > 32) cookie[8] = 14;      // mb
+    if (cookie[9] == 0 || cookie[9] > 32) cookie[9] = 10;      // kb
+    if (cookie[10] == 0 || cookie[10] > 8) cookie[10] = 2;     // numChannels
+    if (read16(cookie + 11) == 0) write16(cookie + 11, 255);   // maxRun
+    if (read32(cookie + 13) == 0) write32(cookie + 13, 8192);  // maxFrameBytes
+    if (read32(cookie + 17) == 0) write32(cookie + 17, 512000);  // avgBitRate
+    if (read32(cookie + 21) == 0) write32(cookie + 21, 44100);   // sampleRate
   }
 
   /**
@@ -537,8 +640,13 @@ class M4AAudioDemuxer {
     codec = Codec::ALAC;
     sampleExtractor.setCodec(codec);
 
-    alacMagicCookie.resize(box.data_size);
-    std::memcpy(alacMagicCookie.data(), box.data, box.data_size);
+    // only alac box in alac contains magic cookie
+    MP4Parser::Box alac;
+    if (parser.findBox("alac", box.data, box.data_size, alac)) {
+      // fixALACMagicCookie((uint8_t*)alac.data, alac.data_size);
+      alacMagicCookie.resize(alac.data_size - 4);
+      std::memcpy(alacMagicCookie.data(), alac.data + 4, alac.data_size - 4);
+    }
   }
 
   /**
@@ -547,24 +655,25 @@ class M4AAudioDemuxer {
    */
   void onStsz(MP4Parser::Box& box) {
     LOGI("onStsz: %s, size: %zu bytes", box.type, box.data_size);
+    if (stsz_processed) return;
     // Parse stsz box and fill sampleSizes
     const uint8_t* data = box.data;
-    size_t size = box.data_size;
-    if (size < 12) return;
     uint32_t sampleSize = readU32(data + 4);
     uint32_t sampleCount = readU32(data + 8);
     sampleExtractor.begin();
-    Vector<uint32_t>& sampleSizes = sampleExtractor.getSampleSizes();
+    Vector<stsz_sample_size_t>& sampleSizes = sampleExtractor.getSampleSizes();
     if (sampleSize == 0) {
-      if (size < 12 + 4 * sampleCount) return;
       LOGI("-> Sample Sizes Count: %u", sampleCount);
       sampleSizes.resize(sampleCount);
       for (uint32_t i = 0; i < sampleCount; ++i) {
-        sampleSizes[i] = readU32(data + 12 + i * 4);
+        uint32_t sampleSizes32 = readU32(data + 12 + i * 4);
+        sampleSizes[i] = static_cast<stsz_sample_size_t>(sampleSizes32);
+        assert(static_cast<uint32_t>(sampleSizes[i]) == sampleSizes32);
       }
     } else {
       sampleExtractor.setFixedSampleCount(sampleSize, sampleCount);
     }
+    stsz_processed = true;
   }
 
   /**
@@ -573,6 +682,7 @@ class M4AAudioDemuxer {
    */
   void onStco(MP4Parser::Box& box) {
     LOGI("onStco: %s, size: %zu bytes", box.type, box.data_size);
+    if (stco_processed) return;
     // Parse stco box and fill chunkOffsets
     const uint8_t* data = box.data + 4;
     size_t size = box.data_size;
@@ -585,6 +695,24 @@ class M4AAudioDemuxer {
     for (uint32_t i = 0; i < entryCount; ++i) {
       chunkOffsets[i] = readU32(data + 4 + i * 4);
     }
+    stco_processed = true;
+  }
+
+  void printHexDump(const MP4Parser::Box& box) {
+    const uint8_t* data = box.data;
+    size_t len = box.data_size;
+    LOGI("===========================");
+    for (size_t i = 0; i < len; i += 16) {
+      char hex[49] = {0};
+      char ascii[17] = {0};
+      for (size_t j = 0; j < 16 && i + j < len; ++j) {
+        sprintf(hex + j * 3, "%02X ", data[i + j]);
+        ascii[j] = (data[i + j] >= 32 && data[i + j] < 127) ? data[i + j] : '.';
+      }
+      ascii[16] = 0;
+      LOGI("%04zx: %-48s |%s|", i, hex, ascii);
+    }
+    LOGI("===========================");
   }
 };
 
diff --git a/src/AudioTools/AudioCodecs/MP4Parser.h b/src/AudioTools/AudioCodecs/MP4Parser.h
index eb266eca6..9ffc2d61a 100644
--- a/src/AudioTools/AudioCodecs/MP4Parser.h
+++ b/src/AudioTools/AudioCodecs/MP4Parser.h
@@ -20,8 +20,11 @@ namespace audio_tools {
  * Serial.
  * If a container box contains data, it will be processed recursively and if it
  * contains data itself, it might be reported in a second callback call.
- * @note This parser expects that the buffer size is larger than the biggest
- * box!
+ * @note This parser expect the mdat box to be the last box in the file. This
+ * can be achieve with the following ffmpeg commands:
+ * - ffmpeg -i ../sine.wav -c:a alac  -movflags +faststart alac.m4a
+ * - ffmpeg -i ../sine.wav -c:a aac  -movflags +faststart aac.m4a
+ *
  * @ingroup codecs
  * @author Phil Schatzmann
  */
@@ -39,9 +42,10 @@ class MP4Parser {
     const uint8_t* data =
         nullptr;           ///< Pointer to box payload (not including header)
     size_t data_size = 0;  ///< Size of payload (not including header)
-    size_t size = 0;       ///< Size of payload including subboxes (not including header)
-    int level = 0;         ///< Nesting depth
-    uint64_t offset = 0;   ///< File offset where box starts
+    size_t size =
+        0;  ///< Size of payload including subboxes (not including header)
+    int level = 0;              ///< Nesting depth
+    uint64_t file_offset = 0;   ///< File offset where box starts
     bool is_complete = false;   ///< True if the box data is complete
     bool is_container = false;  ///< True if the box is a container
   };
@@ -110,7 +114,7 @@ class MP4Parser {
     box.data = nullptr;
     box.size = 0;
     box.level = 0;
-    box.offset = 0;
+    box.file_offset = 0;
     box.id = 0;
     return true;
   }
@@ -161,7 +165,8 @@ class MP4Parser {
    * @param len Length of the string data.
    * @return Number of bytes parsed.
    */
-  int parseString(const uint8_t* str, int len) {
+  int parseString(const uint8_t* str, int len, int fileOffset = 0,
+                  int level = 0) {
     char type[5];
     int idx = 0;
     Box box;
@@ -169,9 +174,12 @@ class MP4Parser {
       if (!isValidType((const char*)str + idx + 4)) {
         return idx;
       }
+      size_t box_size = readU32(str + idx) - 8;
       box.data = str + 8 + idx;
-      box.size = readU32(str + idx);
-      box.data_size = box.size - 8;
+      box.size = box_size;
+      box.level = level;
+      box.data_size = box.size;
+      box.file_offset = fileOffset + idx;
       strncpy(box.type, (char*)(str + idx + 4), 4);
       box.type[4] = '\0';
       idx += box.size;
@@ -181,6 +189,28 @@ class MP4Parser {
     return idx;
   }
 
+  /// find box in box
+  bool findBox(const char* name, const uint8_t* data, size_t len, Box& result) {
+    for (int j = 0; j < len - 4; j++) {
+      if (!isValidType((const char*)data + j + 4)) {
+        continue;  // Skip invalid types
+      }
+      size_t box_size = readU32(data + j) - 8;
+      if (box_size < 8) continue;  // Invalid box size
+      Box box;
+      box.data = data + j + 8;
+      box.size = box_size;
+      box.data_size = box.size;
+      strncpy(box.type, (char*)(data + j + 4), 4);
+      box.type[4] = '\0';
+      if (StrView(box.type) == name) {
+        result = box;
+        return true;  // Found the box
+      }
+    }
+    return false;
+  }
+
  protected:
   BoxCallback callback = defaultCallback;  ///< Generic callback for all boxes
   Vector<CallbackEntry> callbacks;         ///< List of type-specific callbacks
@@ -218,9 +248,9 @@ class MP4Parser {
     memset(space, ' ', box.level * 2);
     space[box.level * 2] = '\0';  // Null-terminate the string
     snprintf(str_buffer, sizeof(str_buffer),
-             "%s- #%u %s, Offset: %u, Size: %u, Data Size: %u", space, (unsigned)box.id,
-             box.type, (unsigned)box.offset, (unsigned)box.size,
-             (unsigned)box.data_size);
+             "%s- #%u %s, Offset: %u, Size: %u, Data Size: %u", space,
+             (unsigned)box.id, box.type, (unsigned)box.file_offset,
+             (unsigned)box.size, (unsigned)box.data_size);
 #ifdef ARDUINO
     Serial.println(str_buffer);
 #else
@@ -290,14 +320,14 @@ class MP4Parser {
       box.size = static_cast<size_t>(boxSize - headerSize);
       box.data_size = box.size;
       box.level = level;
-      box.offset = fileOffset + parseOffset;
+      box.file_offset = fileOffset + parseOffset;
       box.is_complete = (parseOffset + boxSize <= bufferSize);
       box.is_container = is_container;
 
       // Special logic for container: usually no data
       if (box.is_container) {
         box.data_size = getContainerDataLength(box.type);
-        if (box.data_size == 0) box.data = nullptr; 
+        if (box.data_size == 0) box.data = nullptr;
         box.is_complete = true;
       }
 
diff --git a/src/AudioTools/AudioCodecs/MP4ParserIncremental.h b/src/AudioTools/AudioCodecs/MP4ParserIncremental.h
index 7cff1283a..ee18c60bc 100644
--- a/src/AudioTools/AudioCodecs/MP4ParserIncremental.h
+++ b/src/AudioTools/AudioCodecs/MP4ParserIncremental.h
@@ -166,7 +166,7 @@ class MP4ParserIncremental : public MP4Parser {
     box.size = static_cast<size_t>(boxSize - 8);
     box.data_size = 0;
     box.level = level;
-    box.offset = fileOffset + parseOffset;
+    box.file_offset = fileOffset + parseOffset;
     box.is_complete = true;
     box.is_container = true;
     processCallback(box);
@@ -192,7 +192,7 @@ class MP4ParserIncremental : public MP4Parser {
     box.size = payload_size;
     box.data_size = payload_size;
     box.level = level;
-    box.offset = fileOffset + parseOffset;
+    box.file_offset = fileOffset + parseOffset;
     box.is_complete = true;
     box.is_container = false;
     processCallback(box);
@@ -227,7 +227,7 @@ class MP4ParserIncremental : public MP4Parser {
         box.data = nullptr;
         box.data_size = available_payload;
         box.level = box_level;
-        box.offset = box_offset;
+        box.file_offset = box_offset;
         box.is_complete = false;
         box.is_container = false;
 
@@ -262,7 +262,7 @@ class MP4ParserIncremental : public MP4Parser {
       box.size = box_bytes_expected;
       box.data_size = to_read;
       box.level = box_level;
-      box.offset = box_offset + box_bytes_received;
+      box.file_offset = box_offset + box_bytes_received;
       box.is_complete = (box_bytes_received + to_read == box_bytes_expected);
       box.is_container = false;
       processIncrementalDataCallback(box, buffer.data(), to_read, box.is_complete, ref);
diff --git a/src/AudioTools/AudioCodecs/MultiDecoder.h b/src/AudioTools/AudioCodecs/MultiDecoder.h
index a08d08402..706c54a5c 100644
--- a/src/AudioTools/AudioCodecs/MultiDecoder.h
+++ b/src/AudioTools/AudioCodecs/MultiDecoder.h
@@ -29,6 +29,10 @@ class MultiDecoder : public AudioDecoder {
   bool begin() override {
     mime_detector.begin();
     is_first = true;
+    if (p_print==nullptr) {
+      LOGE("No output defined");
+      return false;
+    }
     return true;
   }
 
@@ -58,7 +62,7 @@ class MultiDecoder : public AudioDecoder {
     mime_detector.setCheck(mime, check);
   }
 
-  virtual void setOutput(Print& out_stream) override {
+  void setOutput(Print& out_stream) override {
     p_print = &out_stream;
     for (int j = 0; j < decoders.size(); j++) {
       decoders[j].decoder->setOutput(out_stream);
@@ -141,6 +145,15 @@ class MultiDecoder : public AudioDecoder {
     return is_first || actual_decoder.is_open;
   };
 
+  /// Sets the config to the selected decoder
+  bool setCodecConfig(const uint8_t* data, size_t len) override {
+    if (actual_decoder.decoder == nullptr) {
+      LOGE("No decoder defined, cannot set codec config");
+      return false;
+    }
+    return actual_decoder.decoder->setCodecConfig(data, len);
+  }
+
  protected:
   struct DecoderInfo {
     const char* mime = nullptr;
diff --git a/tests-cmake/codec/container-m4a/m4a.ino b/tests-cmake/codec/container-m4a/m4a.ino
index f49e996b5..b7e807640 100644
--- a/tests-cmake/codec/container-m4a/m4a.ino
+++ b/tests-cmake/codec/container-m4a/m4a.ino
@@ -32,7 +32,7 @@ void setup() {
     return;
   }
 
-  file = SD.open("/home/pschatzmann/Music/m4a/03 We'll never speak again.m4a");
+  file = SD.open("/home/pschatzmann/Music/m4a/alac.m4a");
   if (!file.isOpen()) {
     Serial.println("Failed to open file!");
     return;