Skip to content

Commit 23c334e

Browse files
committed
ID3v2: Properly handle multi-value UTF-16 encoded frames
1 parent 9985a55 commit 23c334e

File tree

3 files changed

+16
-12
lines changed

3 files changed

+16
-12
lines changed

CHANGELOG.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
2121
parse it as another atom definition. As the specification is broad, there is no way for us to say *with certainty*
2222
that an identifier is invalid. Now we unfortunately have to guess the validity based on the commonly known atoms.
2323
For this, we follow [TagLib]'s [checks](https://github.com/taglib/taglib/blob/b40b834b1bdbd74593c5619e969e793d4d4886d9/taglib/mp4/mp4atom.cpp#L89).
24-
- **ID3v2**: No longer error on inputs shorter than 128 bytes (the length of an ID3v1 tag). ([PR](https://github.com/Serial-ATA/lofty-rs/pull/270))
24+
- **ID3v1**: No longer error on inputs shorter than 128 bytes (the length of an ID3v1 tag). ([PR](https://github.com/Serial-ATA/lofty-rs/pull/270))
25+
- **ID3v2**: No longer error on multi-value UTF-16 encoded text frames ([issue](https://github.com/Serial-ATA/lofty-rs/issues/265)) ([PR](https://github.com/Serial-ATA/lofty-rs/pull/284))
2526

2627
### Removed
2728
- **MP4**: `Ilst::{track_total, disc_number, disc_total}` ([PR](https://github.com/Serial-ATA/lofty-rs/pull/269))

src/id3/v2/items/extended_text_frame.rs

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
use crate::error::{Id3v2Error, Id3v2ErrorKind, LoftyError, Result};
22
use crate::id3::v2::frame::content::verify_encoding;
33
use crate::id3::v2::header::Id3v2Version;
4-
use crate::util::text::{
5-
decode_text, encode_text, read_to_terminator, utf16_decode_bytes, TextEncoding,
6-
};
4+
use crate::util::text::{decode_text, encode_text, utf16_decode_bytes, TextEncoding};
75

86
use std::hash::{Hash, Hasher};
97
use std::io::Read;
@@ -74,17 +72,19 @@ impl ExtendedTextFrame {
7472

7573
// It's possible for the description to be the only string with a BOM
7674
'utf16: {
77-
let bom = description.bom;
78-
let Some(raw_text) = read_to_terminator(reader, TextEncoding::UTF16) else {
75+
let mut raw_text = Vec::new();
76+
reader.read_to_end(&mut raw_text)?;
77+
78+
if raw_text.is_empty() {
7979
// Nothing left to do
8080
frame_content = String::new();
8181
break 'utf16;
82-
};
82+
}
8383

84+
let mut bom = description.bom;
8485
if raw_text.starts_with(&[0xFF, 0xFE]) || raw_text.starts_with(&[0xFE, 0xFF]) {
85-
frame_content =
86-
decode_text(&mut &raw_text[..], TextEncoding::UTF16, false)?.content;
87-
break 'utf16;
86+
// The text specifies a BOM
87+
bom = [raw_text[0], raw_text[1]];
8888
}
8989

9090
let endianness = match bom {

src/util/text.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -207,8 +207,11 @@ pub(crate) fn utf16_decode_bytes(bytes: &[u8], endianness: fn([u8; 2]) -> u16) -
207207

208208
let unverified: Vec<u16> = bytes
209209
.chunks_exact(2)
210-
.map_while(|c| match c {
211-
[0, 0] => None,
210+
// In ID3v2, it is possible to have multiple UTF-16 strings separated by null.
211+
// This also makes it possible for us to encounter multiple BOMs in a single string.
212+
// We must filter them out.
213+
.filter_map(|c| match c {
214+
[0xFF, 0xFE] | [0xFE, 0xFF] => None,
212215
_ => Some(endianness(c.try_into().unwrap())), // Infallible
213216
})
214217
.collect();

0 commit comments

Comments
 (0)