Skip to content

Commit 7571d96

Browse files
committed
Timestamp: Allow short-circuiting on partially invalid inputs
1 parent 98da062 commit 7571d96

File tree

3 files changed

+152
-28
lines changed

3 files changed

+152
-28
lines changed

CHANGELOG.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
2828

2929
### Fixed
3030
- **MusePack**: Fix potential panic when the beginning silence makes up the entire sample count ([PR](https://github.com/Serial-ATA/lofty-rs/pull/449))
31-
- **Timestamp**: Support timestamps without separators (ex. "20240906" vs "2024-09-06") ([issue](https://github.com/Serial-ATA/lofty-rs/issues/452)) ([PR](https://github.com/Serial-ATA/lofty-rs/issues/453))
31+
- **Timestamp**:
32+
- Support timestamps without separators (ex. "20240906" vs "2024-09-06") ([issue](https://github.com/Serial-ATA/lofty-rs/issues/452)) ([PR](https://github.com/Serial-ATA/lofty-rs/pull/453))
33+
- `Timestamp::parse` will now short-circuit when possible in `ParsingMode::{BestAttempt, Relaxed}` ([issue](https://github.com/Serial-ATA/lofty-rs/issues/462)) ([PR](https://github.com/Serial-ATA/lofty-rs/pull/463))
34+
- For example, the timestamp "2024-06-03 14:08:49" contains a space instead of the required "T" marker.
35+
In `ParsingMode::Strict`, this would be an error. Otherwise, the parser will just stop once it hits the space
36+
and return the timestamp up to that point.
3237
- **ID3v2**:
3338
- `ItemKey::Director` will now be written correctly as a TXXX frame ([PR](https://github.com/Serial-ATA/lofty-rs/issues/454))
3439
- When skipping invalid frames in `ParsingMode::{BestAttempt, Relaxed}`, the parser will no longer be able to go out of the bounds

lofty/src/id3/v2/items/timestamp_frame.rs

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use crate::config::ParsingMode;
2-
use crate::error::{ErrorKind, LoftyError, Result};
2+
use crate::error::Result;
33
use crate::id3::v2::{FrameFlags, FrameHeader, FrameId};
44
use crate::macros::err;
55
use crate::tag::items::Timestamp;
@@ -77,14 +77,18 @@ impl<'a> TimestampFrame<'a> {
7777
return Ok(None);
7878
};
7979
let Some(encoding) = TextEncoding::from_u8(encoding_byte) else {
80-
return Err(LoftyError::new(ErrorKind::TextDecode(
81-
"Found invalid encoding",
82-
)));
80+
if parse_mode != ParsingMode::Relaxed {
81+
err!(TextDecode("Found invalid encoding"))
82+
}
83+
return Ok(None);
8384
};
8485

8586
let value = decode_text(reader, TextDecodeOptions::new().encoding(encoding))?.content;
8687
if !value.is_ascii() {
87-
err!(BadTimestamp("Timestamp contains non-ASCII characters"))
88+
if parse_mode == ParsingMode::Strict {
89+
err!(BadTimestamp("Timestamp contains non-ASCII characters"))
90+
}
91+
return Ok(None);
8892
}
8993

9094
let header = FrameHeader::new(id, frame_flags);
@@ -96,7 +100,18 @@ impl<'a> TimestampFrame<'a> {
96100

97101
let reader = &mut value.as_bytes();
98102

99-
let Some(timestamp) = Timestamp::parse(reader, parse_mode)? else {
103+
let result;
104+
match Timestamp::parse(reader, parse_mode) {
105+
Ok(timestamp) => result = timestamp,
106+
Err(e) => {
107+
if parse_mode != ParsingMode::Relaxed {
108+
return Err(e);
109+
}
110+
return Ok(None);
111+
},
112+
}
113+
114+
let Some(timestamp) = result else {
100115
// Timestamp is empty
101116
return Ok(None);
102117
};
@@ -105,7 +120,7 @@ impl<'a> TimestampFrame<'a> {
105120
Ok(Some(frame))
106121
}
107122

108-
/// Convert an [`TimestampFrame`] to a byte vec
123+
/// Convert a [`TimestampFrame`] to a byte vec
109124
///
110125
/// # Errors
111126
///

lofty/src/tag/items/timestamp.rs

Lines changed: 124 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ impl Timestamp {
9898
match $expr {
9999
Ok((_, 0)) => break,
100100
Ok((val, _)) => Some(val as u8),
101-
Err(e) => return Err(e.into()),
101+
Err(e) => return Err(e),
102102
}
103103
};
104104
}
@@ -126,7 +126,7 @@ impl Timestamp {
126126

127127
let reader = &mut &content[..];
128128

129-
// We need to very that the year is exactly 4 bytes long. This doesn't matter for other segments.
129+
// We need to verify that the year is exactly 4 bytes long. This doesn't matter for other segments.
130130
let (year, bytes_read) = Self::segment::<4>(reader, None, parse_mode)?;
131131
if bytes_read != 4 {
132132
err!(BadTimestamp(
@@ -173,22 +173,31 @@ impl Timestamp {
173173
sep: Option<u8>,
174174
parse_mode: ParsingMode,
175175
) -> Result<(u16, usize)> {
176+
const STOP_PARSING: (u16, usize) = (0, 0);
177+
176178
if content.is_empty() {
177-
return Ok((0, 0));
179+
return Ok(STOP_PARSING);
178180
}
179181

180182
if let Some(sep) = sep {
181183
let byte = content.read_u8()?;
182184
if byte != sep {
183-
err!(BadTimestamp("Expected a separator"))
185+
if parse_mode == ParsingMode::Strict {
186+
err!(BadTimestamp("Expected a separator"))
187+
}
188+
return Ok(STOP_PARSING);
184189
}
185190
}
186191

187192
if content.len() < SIZE {
188-
err!(BadTimestamp("Timestamp segment is too short"))
193+
if parse_mode == ParsingMode::Strict {
194+
err!(BadTimestamp("Timestamp segment is too short"))
195+
}
196+
197+
return Ok(STOP_PARSING);
189198
}
190199

191-
let mut num = 0;
200+
let mut num = None;
192201
let mut byte_count = 0;
193202
for i in content[..SIZE].iter().copied() {
194203
// Common spec violation: Timestamps may use spaces instead of zeros, so the month of June
@@ -202,6 +211,8 @@ impl Timestamp {
202211
continue;
203212
}
204213

214+
// TODO: This is a spec violation for ID3v2, but not for ISO 8601 in general. Maybe consider
215+
// making this a warning and allow it for all parsing modes?
205216
if !i.is_ascii_digit() {
206217
// Another spec violation, timestamps in the wild may not use a zero or a space, so
207218
// we would have to treat "06", "6", and " 6" as valid.
@@ -220,13 +231,23 @@ impl Timestamp {
220231
))
221232
}
222233

223-
num = num * 10 + u16::from(i - b'0');
234+
num = Some(num.unwrap_or(0) * 10 + u16::from(i - b'0'));
224235
byte_count += 1;
225236
}
226237

238+
let Some(parsed_num) = num else {
239+
assert_ne!(
240+
parse_mode,
241+
ParsingMode::Strict,
242+
"The timestamp segment is empty, the parser should've failed before this point."
243+
);
244+
245+
return Ok(STOP_PARSING);
246+
};
247+
227248
*content = &content[byte_count..];
228249

229-
Ok((num, byte_count))
250+
Ok((parsed_num, byte_count))
230251
}
231252

232253
pub(crate) fn verify(&self) -> Result<()> {
@@ -316,21 +337,83 @@ mod tests {
316337
assert_eq!(timestamp.to_string().len(), 7);
317338
}
318339

340+
fn broken_timestamps() -> [(&'static [u8], Timestamp); 7] {
341+
[
342+
(
343+
b"2024-",
344+
Timestamp {
345+
year: 2024,
346+
..Timestamp::default()
347+
},
348+
),
349+
(
350+
b"2024-06-",
351+
Timestamp {
352+
year: 2024,
353+
month: Some(6),
354+
..Timestamp::default()
355+
},
356+
),
357+
(
358+
b"2024--",
359+
Timestamp {
360+
year: 2024,
361+
..Timestamp::default()
362+
},
363+
),
364+
(
365+
b"2024- -",
366+
Timestamp {
367+
year: 2024,
368+
..Timestamp::default()
369+
},
370+
),
371+
(
372+
b"2024-06-03T",
373+
Timestamp {
374+
year: 2024,
375+
month: Some(6),
376+
day: Some(3),
377+
..Timestamp::default()
378+
},
379+
),
380+
(
381+
b"2024:06",
382+
Timestamp {
383+
year: 2024,
384+
..Timestamp::default()
385+
},
386+
),
387+
(
388+
b"2024-0-",
389+
Timestamp {
390+
year: 2024,
391+
month: Some(0),
392+
..Timestamp::default()
393+
},
394+
),
395+
]
396+
}
397+
319398
#[test_log::test]
320-
fn reject_broken_timestamps() {
321-
let broken_timestamps: &[&[u8]] = &[
322-
b"2024-",
323-
b"2024-06-",
324-
b"2024--",
325-
b"2024- -",
326-
b"2024-06-03T",
327-
b"2024:06",
328-
b"2024-0-",
329-
];
399+
fn reject_broken_timestamps_strict() {
400+
for (timestamp, _) in broken_timestamps() {
401+
let parsed_timestamp = Timestamp::parse(&mut &timestamp[..], ParsingMode::Strict);
402+
assert!(parsed_timestamp.is_err());
403+
}
404+
}
330405

331-
for timestamp in broken_timestamps {
406+
#[test_log::test]
407+
fn accept_broken_timestamps_best_attempt() {
408+
for (timestamp, partial_result) in broken_timestamps() {
332409
let parsed_timestamp = Timestamp::parse(&mut &timestamp[..], ParsingMode::BestAttempt);
333-
assert!(parsed_timestamp.is_err());
410+
assert!(parsed_timestamp.is_ok());
411+
assert_eq!(
412+
parsed_timestamp.unwrap(),
413+
Some(partial_result),
414+
"{}",
415+
timestamp.escape_ascii()
416+
);
334417
}
335418
}
336419

@@ -466,4 +549,25 @@ mod tests {
466549
assert_eq!(parsed_timestamp, Some(expected));
467550
}
468551
}
552+
553+
#[test_log::test]
554+
fn timestamp_no_time_marker() {
555+
let timestamp = "2024-06-03 14:08:49";
556+
557+
let parsed_timestamp_strict =
558+
Timestamp::parse(&mut timestamp.as_bytes(), ParsingMode::Strict);
559+
assert!(parsed_timestamp_strict.is_err());
560+
561+
let parsed_timestamp_best_attempt =
562+
Timestamp::parse(&mut timestamp.as_bytes(), ParsingMode::BestAttempt).unwrap();
563+
assert_eq!(
564+
parsed_timestamp_best_attempt,
565+
Some(Timestamp {
566+
year: 2024,
567+
month: Some(6),
568+
day: Some(3),
569+
..Timestamp::default()
570+
})
571+
);
572+
}
469573
}

0 commit comments

Comments
 (0)