WAV: Constrain writer to stream length not file length

Serial-ATA · Serial-ATA · commit 5dd1017637c5 · 2025-04-26T15:04:16.000-04:00
Previously, tags were simply written to the end of the file, but this would break files that have junk data appended.

This allows for files with appended junk data that falls outside of the stream length. This can be caused by buggy software misusing padding.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -15,6 +15,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
     * If a `free` atom claims to be larger than the remainder of the stream, parsing will simply stop. This will now only
       be a `SizeMismatch` error in `Strict` mode. Invalid padding is a common issue in all tag formats due to buggy software,
       so it's better to work around it by default rather than discard the entire stream as invalid.
+* **WAV**:
+  * When writing tags, the writer will be constrained to the stream size reported by the file, not by the file's actual length ([PR](https://github.com/Serial-ATA/lofty-rs/pull/517))
+    * Previously, tags were simply written to the end of the file, but this would break files that have junk data appended.
+    * This allows for files with appended junk data that falls outside of the stream length. This can be caused by buggy software
+      misusing padding.
 
 ## [0.22.3] - 2025-04-04
 
diff --git a/Cargo.toml b/Cargo.toml
@@ -71,6 +71,7 @@ into_iter_without_iter 		       = "allow" # This is only going to fire on some i
 struct_excessive_bools 		       = "allow" # I have yet to find one case of this being useful
 needless_continue                  = "allow" # All occurences of this lint are just for clarity in large loops
 unbuffered_bytes                   = "allow" # It is up to the caller to wrap their data in `BufReader`s
+struct_field_names                 = "allow"
 
 [workspace.lints.rustdoc]
 broken_intra_doc_links = "deny"
diff --git a/lofty/src/id3/v2/write/chunk_file.rs b/lofty/src/id3/v2/write/chunk_file.rs
@@ -1,14 +1,16 @@
 use crate::config::WriteOptions;
 use crate::error::{LoftyError, Result};
 use crate::iff::chunk::Chunks;
+use crate::macros::err;
 use crate::util::io::{FileLike, Length, Truncate};
 
-use std::io::SeekFrom;
+use std::io::{Cursor, Seek, SeekFrom, Write};
 
 use byteorder::{ByteOrder, WriteBytesExt};
 
 const CHUNK_NAME_UPPER: [u8; 4] = [b'I', b'D', b'3', b' '];
 const CHUNK_NAME_LOWER: [u8; 4] = [b'i', b'd', b'3', b' '];
+const RIFF_CHUNK_HEADER_SIZE: usize = 8;
 
 pub(in crate::id3::v2) fn write_to_chunk_file<F, B>(
 	file: &mut F,
@@ -21,71 +23,93 @@ where
 	LoftyError: From<<F as Length>::Error>,
 	B: ByteOrder,
 {
-	// RIFF....WAVE
-	file.seek(SeekFrom::Current(12))?;
+	// Only rely on the actual file for the first chunk read
+	let file_len = file.len()?;
 
-	let file_len = file.len()?.saturating_sub(12);
+	let mut chunks = Chunks::<B>::new(file_len);
+	chunks.next(file)?;
 
-	let mut id3v2_chunk = (None, None);
+	let mut actual_stream_size = chunks.size;
 
-	let mut chunks = Chunks::<B>::new(file_len);
+	file.rewind()?;
+
+	let mut file_bytes = Cursor::new(Vec::with_capacity(actual_stream_size as usize));
+	file.read_to_end(file_bytes.get_mut())?;
+
+	if file_bytes.get_ref().len() < (actual_stream_size as usize + RIFF_CHUNK_HEADER_SIZE) {
+		err!(SizeMismatch);
+	}
+
+	// The first chunk format is RIFF....WAVE
+	file_bytes.seek(SeekFrom::Start(12))?;
 
-	while chunks.next(file).is_ok() {
+	let (mut exising_id3_start, mut existing_id3_size) = (None, None);
+
+	let mut chunks = Chunks::<B>::new(u64::from(actual_stream_size));
+	while let Ok(true) = chunks.next(&mut file_bytes) {
 		if chunks.fourcc == CHUNK_NAME_UPPER || chunks.fourcc == CHUNK_NAME_LOWER {
-			id3v2_chunk = (Some(file.stream_position()? - 8), Some(chunks.size));
+			exising_id3_start = Some(file_bytes.stream_position()? - 8);
+			existing_id3_size = Some(chunks.size);
 			break;
 		}
 
-		file.seek(SeekFrom::Current(i64::from(chunks.size)))?;
-
-		chunks.correct_position(file)?;
+		chunks.skip(&mut file_bytes)?;
 	}
 
-	if let (Some(chunk_start), Some(mut chunk_size)) = id3v2_chunk {
-		file.rewind()?;
-
+	if let (Some(exising_id3_start), Some(mut existing_id3_size)) =
+		(exising_id3_start, existing_id3_size)
+	{
 		// We need to remove the padding byte if it exists
-		if chunk_size % 2 != 0 {
-			chunk_size += 1;
+		if existing_id3_size % 2 != 0 {
+			existing_id3_size += 1;
 		}
 
-		let mut file_bytes = Vec::new();
-		file.read_to_end(&mut file_bytes)?;
-
-		file_bytes.splice(
-			chunk_start as usize..(chunk_start + u64::from(chunk_size) + 8) as usize,
-			[],
-		);
+		let existing_tag_end =
+			exising_id3_start as usize + RIFF_CHUNK_HEADER_SIZE + existing_id3_size as usize;
+		let _ = file_bytes
+			.get_mut()
+			.drain(exising_id3_start as usize..existing_tag_end);
 
-		file.rewind()?;
-		file.truncate(0)?;
-		file.write_all(&file_bytes)?;
+		actual_stream_size -= existing_id3_size + RIFF_CHUNK_HEADER_SIZE as u32;
 	}
 
 	if !tag.is_empty() {
-		file.seek(SeekFrom::End(0))?;
-
+		let mut tag_bytes = Cursor::new(Vec::new());
 		if write_options.uppercase_id3v2_chunk {
-			file.write_all(&CHUNK_NAME_UPPER)?;
+			tag_bytes.write_all(&CHUNK_NAME_UPPER)?;
 		} else {
-			file.write_all(&CHUNK_NAME_LOWER)?;
+			tag_bytes.write_all(&CHUNK_NAME_LOWER)?;
 		}
 
-		file.write_u32::<B>(tag.len() as u32)?;
-		file.write_all(tag)?;
+		tag_bytes.write_u32::<B>(tag.len() as u32)?;
+		tag_bytes.write_all(tag)?;
 
 		// It is required an odd length chunk be padded with a 0
 		// The 0 isn't included in the chunk size, however
 		if tag.len() % 2 != 0 {
-			file.write_u8(0)?;
+			tag_bytes.write_u8(0)?;
 		}
 
-		let total_size = file.stream_position()? - 8;
+		let Ok(tag_size): std::result::Result<u32, _> = tag_bytes.get_ref().len().try_into() else {
+			err!(TooMuchData)
+		};
 
-		file.seek(SeekFrom::Start(4))?;
+		let tag_position = actual_stream_size as usize + RIFF_CHUNK_HEADER_SIZE;
 
-		file.write_u32::<B>(total_size as u32)?;
+		file_bytes.get_mut().splice(
+			tag_position..tag_position,
+			tag_bytes.get_ref().iter().copied(),
+		);
+
+		actual_stream_size += tag_size;
 	}
 
+	file_bytes.seek(SeekFrom::Start(4))?;
+	file_bytes.write_u32::<B>(actual_stream_size)?;
+
+	file.rewind()?;
+	file.truncate(0)?;
+	file.write_all(file_bytes.get_ref())?;
+
 	Ok(())
 }
diff --git a/lofty/src/iff/aiff/read.rs b/lofty/src/iff/aiff/read.rs
@@ -67,7 +67,7 @@ where
 
 	let mut chunks = Chunks::<BigEndian>::new(file_len);
 
-	while chunks.next(data).is_ok() {
+	while let Ok(true) = chunks.next(data) {
 		match &chunks.fourcc {
 			b"ID3 " | b"id3 " if parse_options.read_tags => {
 				let tag = chunks.id3_chunk(data, parse_options)?;
diff --git a/lofty/src/iff/aiff/tag.rs b/lofty/src/iff/aiff/tag.rs
@@ -436,7 +436,7 @@ where
 
 		let mut chunks = Chunks::<BigEndian>::new(file_len);
 
-		while chunks.next(file).is_ok() {
+		while let Ok(true) = chunks.next(file) {
 			match &chunks.fourcc {
 				b"NAME" | b"AUTH" | b"(c) " | b"ANNO" | b"COMT" => {
 					let start = (file.stream_position()? - 8) as usize;
diff --git a/lofty/src/iff/chunk.rs b/lofty/src/iff/chunk.rs
@@ -9,6 +9,8 @@ use std::marker::PhantomData;
 
 use byteorder::{ByteOrder, ReadBytesExt};
 
+const RIFF_CHUNK_HEADER_SIZE: u64 = 8;
+
 pub(crate) struct Chunks<B>
 where
 	B: ByteOrder,
@@ -30,16 +32,20 @@ impl<B: ByteOrder> Chunks<B> {
 		}
 	}
 
-	pub fn next<R>(&mut self, data: &mut R) -> Result<()>
+	pub fn next<R>(&mut self, data: &mut R) -> Result<bool>
 	where
 		R: Read,
 	{
+		if self.remaining_size < RIFF_CHUNK_HEADER_SIZE {
+			return Ok(false);
+		}
+
 		data.read_exact(&mut self.fourcc)?;
 		self.size = data.read_u32::<B>()?;
 
 		self.remaining_size = self.remaining_size.saturating_sub(8);
 
-		Ok(())
+		Ok(true)
 	}
 
 	pub fn read_cstring<R>(&mut self, data: &mut R) -> Result<String>
diff --git a/lofty/src/iff/wav/mod.rs b/lofty/src/iff/wav/mod.rs
@@ -1,7 +1,7 @@
 //! WAV specific items
 
 mod properties;
-mod read;
+pub(crate) mod read;
 pub(crate) mod tag;
 
 use crate::id3::v2::tag::Id3v2Tag;
diff --git a/lofty/src/iff/wav/read.rs b/lofty/src/iff/wav/read.rs
@@ -11,7 +11,8 @@ use std::io::{Read, Seek, SeekFrom};
 
 use byteorder::{LittleEndian, ReadBytesExt};
 
-pub(super) fn verify_wav<T>(data: &mut T) -> Result<()>
+// Verifies that the stream is a WAV file and returns the stream length
+pub(crate) fn verify_wav<T>(data: &mut T) -> Result<u32>
 where
 	T: Read + Seek,
 {
@@ -27,7 +28,7 @@ where
 	}
 
 	log::debug!("File verified to be WAV");
-	Ok(())
+	Ok(u32::from_le_bytes(id[4..8].try_into().unwrap()))
 }
 
 pub(super) fn read_from<R>(data: &mut R, parse_options: ParseOptions) -> Result<WavFile>
@@ -50,7 +51,7 @@ where
 
 	let mut chunks = Chunks::<LittleEndian>::new(file_len);
 
-	while chunks.next(data).is_ok() {
+	while let Ok(true) = chunks.next(data) {
 		match &chunks.fourcc {
 			b"fmt " if parse_options.read_properties => {
 				if fmt.is_empty() {
diff --git a/lofty/src/iff/wav/tag/read.rs b/lofty/src/iff/wav/tag/read.rs
@@ -19,7 +19,7 @@ pub(in crate::iff::wav) fn parse_riff_info<R>(
 where
 	R: Read + Seek,
 {
-	while data.stream_position()? != end && chunks.next(data).is_ok() {
+	while data.stream_position()? != end && matches!(chunks.next(data), Ok(true)) {
 		let key_str = utf8_decode_str(&chunks.fourcc)
 			.map_err(|_| decode_err!(Wav, "Invalid item key found in RIFF INFO"))?;
 
diff --git a/lofty/src/iff/wav/tag/write.rs b/lofty/src/iff/wav/tag/write.rs
@@ -6,10 +6,12 @@ use crate::iff::wav::read::verify_wav;
 use crate::macros::err;
 use crate::util::io::{FileLike, Length, Truncate};
 
-use std::io::{Read, Seek, SeekFrom};
+use std::io::{Cursor, Read, Seek, SeekFrom};
 
 use byteorder::{LittleEndian, WriteBytesExt};
 
+const RIFF_CHUNK_HEADER_SIZE: usize = 8;
+
 pub(in crate::iff::wav) fn write_riff_info<'a, F, I>(
 	file: &mut F,
 	tag: &mut RIFFInfoListRef<'a, I>,
@@ -21,44 +23,69 @@ where
 	LoftyError: From<<F as Length>::Error>,
 	I: Iterator<Item = (&'a str, &'a str)>,
 {
-	verify_wav(file)?;
-	let file_len = file.len()?.saturating_sub(12);
+	let mut stream_length = verify_wav(file)?;
 
 	let mut riff_info_bytes = Vec::new();
 	create_riff_info(&mut tag.items, &mut riff_info_bytes)?;
 
-	let Some(info_list_size) = find_info_list(file, file_len)? else {
+	file.rewind()?;
+
+	let mut file_bytes = Cursor::new(Vec::new());
+	file.read_to_end(file_bytes.get_mut())?;
+
+	if file_bytes.get_ref().len() < (stream_length as usize + RIFF_CHUNK_HEADER_SIZE) {
+		err!(SizeMismatch);
+	}
+
+	// The first chunk format is RIFF....WAVE
+	file_bytes.seek(SeekFrom::Start(12))?;
+
+	let Some(info_list_size) = find_info_list(&mut file_bytes, u64::from(stream_length - 4))?
+	else {
 		// Simply append the info list to the end of the file and update the file size
-		file.seek(SeekFrom::End(0))?;
+		file_bytes.rewind()?;
+
+		let tag_position = stream_length as usize + RIFF_CHUNK_HEADER_SIZE;
+
+		file_bytes.seek(SeekFrom::Start(tag_position as u64))?;
 
-		file.write_all(&riff_info_bytes)?;
+		file_bytes
+			.get_mut()
+			.splice(tag_position..tag_position, riff_info_bytes.iter().copied());
 
-		let len = (file.stream_position()? - 8) as u32;
+		let len = (riff_info_bytes.len() + tag_position - 8) as u32;
 
-		file.seek(SeekFrom::Start(4))?;
-		file.write_u32::<LittleEndian>(len)?;
+		file_bytes.seek(SeekFrom::Start(4))?;
+		file_bytes.write_u32::<LittleEndian>(len)?;
+
+		file.rewind()?;
+		file.truncate(0)?;
+		file.write_all(file_bytes.get_ref())?;
 
 		return Ok(());
 	};
 
 	// Replace the existing tag
 
-	let info_list_start = file.seek(SeekFrom::Current(-12))? as usize;
-	let info_list_end = info_list_start + 8 + info_list_size as usize;
+	let info_list_start = file_bytes.seek(SeekFrom::Current(-12))? as usize;
+	let info_list_end = info_list_start + RIFF_CHUNK_HEADER_SIZE + info_list_size as usize;
 
-	file.rewind()?;
+	stream_length -= info_list_end as u32 - info_list_start as u32;
 
-	let mut file_bytes = Vec::new();
-	file.read_to_end(&mut file_bytes)?;
+	let new_tag_len = riff_info_bytes.len() as u32;
+	let _ = file_bytes
+		.get_mut()
+		.splice(info_list_start..info_list_end, riff_info_bytes);
 
-	let _ = file_bytes.splice(info_list_start..info_list_end, riff_info_bytes);
+	stream_length += new_tag_len;
 
-	let total_size = (file_bytes.len() - 8) as u32;
-	let _ = file_bytes.splice(4..8, total_size.to_le_bytes());
+	let _ = file_bytes
+		.get_mut()
+		.splice(4..8, stream_length.to_le_bytes());
 
 	file.rewind()?;
 	file.truncate(0)?;
-	file.write_all(&file_bytes)?;
+	file.write_all(file_bytes.get_ref())?;
 
 	Ok(())
 }
@@ -71,7 +98,7 @@ where
 
 	let mut chunks = Chunks::<LittleEndian>::new(file_size);
 
-	while chunks.next(data).is_ok() {
+	while let Ok(true) = chunks.next(data) {
 		if &chunks.fourcc == b"LIST" {
 			let mut list_type = [0; 4];
 			data.read_exact(&mut list_type)?;

Original file line number	Diff line number	Diff line change
`@@ -19,7 +19,7 @@ pub(in crate::iff::wav) fn parse_riff_info<R>(`
`19`	`19`	`where`
`20`	`20`	`R: Read + Seek,`
`21`	`21`	`{`
`22`		`- while data.stream_position()? != end && chunks.next(data).is_ok() {`
	`22`	`+ while data.stream_position()? != end && matches!(chunks.next(data), Ok(true)) {`
`23`	`23`	`let key_str = utf8_decode_str(&chunks.fourcc)`
`24`	`24`	`.map_err(\|_\| decode_err!(Wav, "Invalid item key found in RIFF INFO"))?;`
`25`	`25`