Skip to content

Add support for demuxing xhe-aac files #435

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 16 additions & 6 deletions mp4parse/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2050,6 +2050,7 @@ pub enum CodecType {
Unknown,
MP3,
AAC,
XHEAAC, // xHE-AAC (Extended High Efficiency AAC)
FLAC,
Opus,
H264, // 14496-10
Expand Down Expand Up @@ -5118,7 +5119,7 @@ fn read_ds_descriptor(
};

match audio_object_type {
1..=4 | 6 | 7 | 17 | 19..=23 => {
1..=4 | 6 | 7 | 17 | 19..=23 | 42 => {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems to work for now (similar to Chromium's AAC parsing) but it looks like xHE-AAC uses UsacConfig rather than GASpecificConfig (ffmpeg handles them separately), so it'd be worth adding a comment highlighting that audio_object_type == 42 could need different handling in the future to avoid tripping ourselves up if the GASpecificConfig parsing is ever extended or made stricter or we need specific fields from the UsacConfig.

if sample_frequency.is_none() {
return Err(Error::Unsupported("unknown frequency"));
}
Expand Down Expand Up @@ -5205,6 +5206,12 @@ fn read_ds_descriptor(
esds.extended_audio_object_type = extended_audio_object_type;
esds.audio_sample_rate = Some(sample_frequency_value);
esds.audio_channel_count = Some(channel_counts);

// Update codec type for xHE-AAC if audio object type 42 is detected
if audio_object_type == 42 {
esds.audio_codec = CodecType::XHEAAC;
}

if !esds.decoder_specific_data.is_empty() {
fail_with_status_if(
strictness == ParseStrictness::Strict,
Expand Down Expand Up @@ -5257,11 +5264,14 @@ fn read_dc_descriptor(
)?;
}

esds.audio_codec = match object_profile {
0x40 | 0x66 | 0x67 => CodecType::AAC,
0x69 | 0x6B => CodecType::MP3,
_ => CodecType::Unknown,
};
// Only set codec type if it hasn't been set to a more specific type (e.g., XHEAAC)
if esds.audio_codec == CodecType::Unknown {
esds.audio_codec = match object_profile {
0x40 | 0x66 | 0x67 => CodecType::AAC,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be a tiny bit clearer to use:
0x40 if esds.audio_object_type == Some(42) => CodecType::XHEAAC,
to keep the initialization of audio_codec in a single location, but it doesn't matter too much.

0x69 | 0x6B => CodecType::MP3,
_ => CodecType::Unknown,
};
}

debug!(
"read_dc_descriptor: esds.audio_codec = {:?}",
Expand Down
79 changes: 79 additions & 0 deletions mp4parse/tests/public.rs
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,8 @@ static VIDEO_H263_3GP: &str = "tests/bbb_sunflower_QCIF_30fps_h263_noaudio_1f.3g
// The 1 frame hevc mp4 file generated by ffmpeg with command
// "ffmpeg -f lavfi -i color=c=white:s=640x480 -c:v libx265 -frames:v 1 -pix_fmt yuv420p hevc_white_frame.mp4"
static VIDEO_HEVC_MP4: &str = "tests/hevc_white_frame.mp4";
// xHE-AAC test file generated by exhale encoder - 3 seconds, 44.1kHz mono, ~14.6kbps
static AUDIO_XHE_AAC_MP4: &str = "tests/sine-3s-xhe-aac-44khz-mono.mp4";
// The 1 frame AMR-NB 3gp file can be generated by ffmpeg with command
// "ffmpeg -i [input file] -f 3gp -acodec amr_nb -ar 8000 -ac 1 -frames:a 1 -vn output.3gp"
#[cfg(feature = "3gpp")]
Expand Down Expand Up @@ -1580,3 +1582,80 @@ fn public_video_mp4v() {
};
}
}

#[test]
fn public_audio_xhe_aac() {
let mut fd = File::open(AUDIO_XHE_AAC_MP4).expect("Unknown file");
let mut buf = Vec::new();
fd.read_to_end(&mut buf).expect("File error");

let mut c = Cursor::new(&buf);
let context = mp4::read_mp4(&mut c, ParseStrictness::Normal).expect("read_mp4 failed");

println!("xHE-AAC MP4 file parsed successfully");
println!("Number of tracks: {}", context.tracks.len());

// This file contains a single xHE-AAC audio track at 44.1kHz mono, ~14.6kbps, 3 seconds
assert_eq!(context.tracks.len(), 1, "Expected exactly one track");

let track = &context.tracks[0];
assert_eq!(
track.track_type,
mp4::TrackType::Audio,
"Expected audio track"
);

// Check sample description
let stsd = track.stsd.as_ref().expect("expected an stsd");
assert_eq!(
stsd.descriptions.len(),
1,
"Expected one sample description"
);

let a = match stsd.descriptions.first().expect("expected a SampleEntry") {
mp4::SampleEntry::Audio(ref a) => a,
_ => panic!("expected an AudioSampleEntry"),
};

println!("Audio track details:");
println!(" Codec type: {:?}", a.codec_type);
println!(" Sample rate: {}", a.samplerate);
println!(" Channel count: {}", a.channelcount);

// The parser should detect this as xHE-AAC
assert_eq!(a.codec_type, mp4::CodecType::XHEAAC);

// Based on ffprobe: 44.1kHz, 1 channel (mono)
assert_eq!(a.samplerate, 44100.0);
assert_eq!(a.channelcount, 1);

// Check codec-specific data
match &a.codec_specific {
mp4::AudioCodecSpecific::ES_Descriptor(ref esds) => {
println!(" ESDS present");
println!(" Audio object type: {:?}", esds.audio_object_type);
println!(
" Extended audio object type: {:?}",
esds.extended_audio_object_type
);
println!(" Audio sample rate: {:?}", esds.audio_sample_rate);
println!(" Audio channel count: {:?}", esds.audio_channel_count);

// Should be xHE-AAC with audio object type 42
assert_eq!(esds.audio_codec, mp4::CodecType::XHEAAC);
assert_eq!(esds.audio_object_type, Some(42));

// Verify ESDS matches the container info
if let Some(sample_rate) = esds.audio_sample_rate {
assert_eq!(sample_rate, 44100);
}
if let Some(channel_count) = esds.audio_channel_count {
assert_eq!(channel_count, 1);
}
}
_ => panic!("Expected ES descriptor for xHE-AAC audio"),
}

println!("xHE-AAC file parsing test completed successfully");
}
Binary file added mp4parse/tests/sine-3s-xhe-aac-44khz-mono.mp4
Binary file not shown.
Loading