From 8ca19d870cc61483d1a990d5d1e5e6c3e593bac1 Mon Sep 17 00:00:00 2001 From: Oliver Parker Date: Mon, 30 Jun 2025 11:17:24 +0100 Subject: [PATCH 01/12] test: add tests for new logic --- tests/validators/test_date.py | 24 ++++++++++++++++++++++++ tests/validators/test_datetime.py | 24 ++++++++++++++++++++++++ tests/validators/test_time.py | 24 ++++++++++++++++++++++++ tests/validators/test_timedelta.py | 24 ++++++++++++++++++++++++ 4 files changed, 96 insertions(+) diff --git a/tests/validators/test_date.py b/tests/validators/test_date.py index 8300a15ad..5aa58a6bd 100644 --- a/tests/validators/test_date.py +++ b/tests/validators/test_date.py @@ -303,3 +303,27 @@ def test_date_past_future_today(): assert v.isinstance_python(today) is False assert v.isinstance_python(today - timedelta(days=1)) is False assert v.isinstance_python(today + timedelta(days=1)) is True + +@pytest.mark.parametrize( + 'val_temporal_unit, input_value, expected', + [ + # 'seconds' mode: treat as seconds since epoch + ('seconds', 1654646400, date(2022, 6, 8)), + ('seconds', '1654646400', date(2022, 6, 8)), + ('seconds', 1654646400.0, date(2022, 6, 8)), + # 'milliseconds' mode: treat as milliseconds since epoch + ('milliseconds', 1654646400000, date(2022, 6, 8)), + ('milliseconds', '1654646400000', date(2022, 6, 8)), + ('milliseconds', 1654646400000.0, date(2022, 6, 8)), + # 'infer' mode: large numbers are ms, small are s + ('infer', 1654646400, date(2022, 6, 8)), + ('infer', 1654646400000, date(2022, 6, 8)), + ], +) +def test_val_temporal_unit_date(val_temporal_unit, input_value, expected): + v = SchemaValidator( + cs.date_schema(), + config={'val_temporal_unit': val_temporal_unit}, + ) + output = v.validate_python(input_value) + assert output == expected diff --git a/tests/validators/test_datetime.py b/tests/validators/test_datetime.py index 5e319dc23..15ba9a56b 100644 --- a/tests/validators/test_datetime.py +++ b/tests/validators/test_datetime.py @@ -515,3 +515,27 @@ def test_tz_cmp() -> None: assert validated1 > validated2 assert validated2 < validated1 + +@pytest.mark.parametrize( + 'val_temporal_unit, input_value, expected', + [ + # 'seconds' mode: treat as seconds since epoch + ('seconds', 1654646400, datetime(2022, 6, 8, tzinfo=timezone.utc)), + ('seconds', '1654646400', datetime(2022, 6, 8, tzinfo=timezone.utc)), + ('seconds', 1654646400.123456, datetime(2022, 6, 8, 0, 0, 0, 123456, tzinfo=timezone.utc)), + # 'milliseconds' mode: treat as milliseconds since epoch + ('milliseconds', 1654646400123, datetime(2022, 6, 8, 0, 0, 0, 123000, tzinfo=timezone.utc)), + ('milliseconds', '1654646400123', datetime(2022, 6, 8, 0, 0, 0, 123000, tzinfo=timezone.utc)), + ('milliseconds', 1654646400123.456, datetime(2022, 6, 8, 0, 0, 0, 123456, tzinfo=timezone.utc)), + # 'infer' mode: large numbers are ms, small are s + ('infer', 1654646400, datetime(2022, 6, 8, tzinfo=timezone.utc)), + ('infer', 1654646400123, datetime(2022, 6, 8, 0, 0, 0, 123000, tzinfo=timezone.utc)), + ], +) +def test_val_temporal_unit_datetime(val_temporal_unit, input_value, expected): + v = SchemaValidator( + cs.datetime_schema(), + config={'val_temporal_unit': val_temporal_unit}, + ) + output = v.validate_python(input_value) + assert output == expected \ No newline at end of file diff --git a/tests/validators/test_time.py b/tests/validators/test_time.py index 9a643acfb..ef82e0196 100644 --- a/tests/validators/test_time.py +++ b/tests/validators/test_time.py @@ -295,3 +295,27 @@ def test_neg_7200(): def test_tz_constraint_too_high(): with pytest.raises(SchemaError, match='OverflowError: Python int too large to convert to C long'): SchemaValidator(core_schema.time_schema(tz_constraint=2**64)) + +@pytest.mark.parametrize( + 'val_temporal_unit, input_value, expected', + [ + # 'seconds' mode: treat as seconds since midnight + ('seconds', 3661, time(1, 1, 1, tzinfo=timezone.utc)), + ('seconds', '3661', time(1, 1, 1, tzinfo=timezone.utc)), + ('seconds', 3661.123456, time(1, 1, 1, 123456, tzinfo=timezone.utc)), + # 'milliseconds' mode: treat as milliseconds since midnight + ('milliseconds', 3661123, time(1, 1, 1, 123000, tzinfo=timezone.utc)), + ('milliseconds', '3661123', time(1, 1, 1, 123000, tzinfo=timezone.utc)), + ('milliseconds', 3661123.456, time(1, 1, 1, 123456, tzinfo=timezone.utc)), + # 'infer' mode: large numbers are ms, small are s + ('infer', 3661, time(1, 1, 1, tzinfo=timezone.utc)), + ('infer', 3661123, time(1, 1, 1, 123000, tzinfo=timezone.utc)), + ], +) +def test_val_temporal_unit_time(val_temporal_unit, input_value, expected): + v = SchemaValidator( + core_schema.time_schema(), + config={'val_temporal_unit': val_temporal_unit}, + ) + output = v.validate_python(input_value) + assert output == expected diff --git a/tests/validators/test_timedelta.py b/tests/validators/test_timedelta.py index 243fbfb95..1d1ec5bb3 100644 --- a/tests/validators/test_timedelta.py +++ b/tests/validators/test_timedelta.py @@ -298,3 +298,27 @@ def test_pandas(): v.validate_python(one_55) with pytest.raises(ValidationError, match=msg): v.validate_python(one_55.to_pytimedelta()) + +@pytest.mark.parametrize( + 'val_temporal_unit, input_value, expected', + [ + # 'seconds' mode: treat as seconds + ('seconds', 3661, timedelta(hours=1, seconds=1)), + ('seconds', '3661', timedelta(hours=1, seconds=1)), + ('seconds', 3661.123456, timedelta(hours=1, seconds=1, microseconds=123456)), + # 'milliseconds' mode: treat as milliseconds + ('milliseconds', 3661123, timedelta(hours=1, seconds=1, microseconds=123000)), + ('milliseconds', '3661123', timedelta(hours=1, seconds=1, microseconds=123000)), + ('milliseconds', 3661123.456, timedelta(hours=1, seconds=1, microseconds=123456)), + # 'infer' mode: large numbers are ms, small are s + ('infer', 3661, timedelta(hours=1, seconds=1)), + ('infer', 3661123, timedelta(hours=1, seconds=1, microseconds=123000)), + ], +) +def test_val_temporal_unit_timedelta(val_temporal_unit, input_value, expected): + v = SchemaValidator( + core_schema.timedelta_schema(), + config={'val_temporal_unit': val_temporal_unit}, + ) + output = v.validate_python(input_value) + assert output == expected From c9f8fac6c4a98e0ec275e3a2a8ac2e68b7ab33a6 Mon Sep 17 00:00:00 2001 From: Oliver Parker Date: Mon, 30 Jun 2025 14:24:35 +0100 Subject: [PATCH 02/12] fix: add validator switch --- src/validators/config.rs | 55 +++++++++++++++++++++++++++--- src/validators/datetime.rs | 6 ++-- tests/validators/test_date.py | 1 + tests/validators/test_datetime.py | 3 +- tests/validators/test_time.py | 1 + tests/validators/test_timedelta.py | 1 + 6 files changed, 59 insertions(+), 8 deletions(-) diff --git a/src/validators/config.rs b/src/validators/config.rs index a14104628..f84004d8f 100644 --- a/src/validators/config.rs +++ b/src/validators/config.rs @@ -1,17 +1,18 @@ use std::borrow::Cow; use std::str::FromStr; +use crate::build_tools::py_schema_err; +use crate::errors::ErrorType; +use crate::input::EitherBytes; +use crate::serializers::BytesMode; +use crate::tools::SchemaDict; +use crate::validators::config::TemporalUnitMode::Seconds; use base64::engine::general_purpose::GeneralPurpose; use base64::engine::{DecodePaddingMode, GeneralPurposeConfig}; use base64::{alphabet, DecodeError, Engine}; use pyo3::types::{PyDict, PyString}; use pyo3::{intern, prelude::*}; -use crate::errors::ErrorType; -use crate::input::EitherBytes; -use crate::serializers::BytesMode; -use crate::tools::SchemaDict; - const URL_SAFE_OPTIONAL_PADDING: GeneralPurpose = GeneralPurpose::new( &alphabet::URL_SAFE, GeneralPurposeConfig::new().with_decode_padding_mode(DecodePaddingMode::Indifferent), @@ -21,6 +22,50 @@ const STANDARD_OPTIONAL_PADDING: GeneralPurpose = GeneralPurpose::new( GeneralPurposeConfig::new().with_decode_padding_mode(DecodePaddingMode::Indifferent), ); +#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)] +pub enum TemporalUnitMode { + Seconds, + Milliseconds, + #[default] + Infer, +} + +impl FromStr for TemporalUnitMode { + type Err = PyErr; + + fn from_str(s: &str) -> Result { + match s { + "seconds" => Ok(Self::Seconds), + "milliseconds" => Ok(Self::Milliseconds), + "infer" => Ok(Self::Infer), + + s => py_schema_err!( + "Invalid temporal_unit_mode serialization mode: `{}`, expected seconds, milliseconds or infer", + s + ), + } + } +} + +#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)] +pub struct ValTemporalUnit { + pub mode: TemporalUnitMode, +} + +impl ValTemporalUnit { + pub fn from_config(config: Option<&Bound<'_, PyDict>>) -> PyResult { + let Some(config_dict) = config else { + return Ok(Self::default()); + }; + let raw_mode = config_dict.get_as::>(intern!(config_dict.py(), "val_temporal_unit"))?; + let temporal_unit = raw_mode.map_or_else( + || Ok(TemporalUnitMode::default()), + |raw| TemporalUnitMode::from_str(&raw.to_cow()?), + )?; + Ok(Self { mode: temporal_unit }) + } +} + #[derive(Default, Debug, Clone, Copy, PartialEq, Eq)] pub struct ValBytesMode { pub ser: BytesMode, diff --git a/src/validators/datetime.rs b/src/validators/datetime.rs index f6384cd2c..e193341a1 100644 --- a/src/validators/datetime.rs +++ b/src/validators/datetime.rs @@ -13,16 +13,17 @@ use crate::errors::ToErrorValue; use crate::errors::{py_err_string, ErrorType, ErrorTypeDefaults, ValError, ValResult}; use crate::input::{EitherDateTime, Input}; -use crate::tools::SchemaDict; - use super::Exactness; use super::{BuildValidator, CombinedValidator, DefinitionsBuilder, ValidationState, Validator}; +use crate::tools::SchemaDict; +use crate::validators::config::{TemporalUnitMode, ValTemporalUnit}; #[derive(Debug, Clone)] pub struct DateTimeValidator { strict: bool, constraints: Option, microseconds_precision: speedate::MicrosecondsPrecisionOverflowBehavior, + val_temporal_unit: ValTemporalUnit, } pub(crate) fn extract_microseconds_precision( @@ -51,6 +52,7 @@ impl BuildValidator for DateTimeValidator { strict: is_strict(schema, config)?, constraints: DateTimeConstraints::from_py(schema)?, microseconds_precision: extract_microseconds_precision(schema, config)?, + val_temporal_unit: ValTemporalUnit::from_config(config)?, } .into()) } diff --git a/tests/validators/test_date.py b/tests/validators/test_date.py index 5aa58a6bd..5c63e9bf3 100644 --- a/tests/validators/test_date.py +++ b/tests/validators/test_date.py @@ -304,6 +304,7 @@ def test_date_past_future_today(): assert v.isinstance_python(today - timedelta(days=1)) is False assert v.isinstance_python(today + timedelta(days=1)) is True + @pytest.mark.parametrize( 'val_temporal_unit, input_value, expected', [ diff --git a/tests/validators/test_datetime.py b/tests/validators/test_datetime.py index 15ba9a56b..b9527a676 100644 --- a/tests/validators/test_datetime.py +++ b/tests/validators/test_datetime.py @@ -516,6 +516,7 @@ def test_tz_cmp() -> None: assert validated1 > validated2 assert validated2 < validated1 + @pytest.mark.parametrize( 'val_temporal_unit, input_value, expected', [ @@ -538,4 +539,4 @@ def test_val_temporal_unit_datetime(val_temporal_unit, input_value, expected): config={'val_temporal_unit': val_temporal_unit}, ) output = v.validate_python(input_value) - assert output == expected \ No newline at end of file + assert output == expected diff --git a/tests/validators/test_time.py b/tests/validators/test_time.py index ef82e0196..25ccbc75d 100644 --- a/tests/validators/test_time.py +++ b/tests/validators/test_time.py @@ -296,6 +296,7 @@ def test_tz_constraint_too_high(): with pytest.raises(SchemaError, match='OverflowError: Python int too large to convert to C long'): SchemaValidator(core_schema.time_schema(tz_constraint=2**64)) + @pytest.mark.parametrize( 'val_temporal_unit, input_value, expected', [ diff --git a/tests/validators/test_timedelta.py b/tests/validators/test_timedelta.py index 1d1ec5bb3..bdcf6b083 100644 --- a/tests/validators/test_timedelta.py +++ b/tests/validators/test_timedelta.py @@ -299,6 +299,7 @@ def test_pandas(): with pytest.raises(ValidationError, match=msg): v.validate_python(one_55.to_pytimedelta()) + @pytest.mark.parametrize( 'val_temporal_unit, input_value, expected', [ From d611124236028f2d7020e1a0b8a424bdcbb2cb19 Mon Sep 17 00:00:00 2001 From: Oliver Parker Date: Mon, 30 Jun 2025 17:10:20 +0100 Subject: [PATCH 03/12] fix: wip --- src/input/datetime.rs | 12 ++++++++---- src/input/input_abstract.rs | 3 ++- src/input/input_json.rs | 12 +++++++----- src/input/input_python.rs | 12 +++++++----- src/input/input_string.rs | 5 +++-- src/validators/config.rs | 21 +++++++++++++-------- src/validators/date.rs | 4 ++-- src/validators/datetime.rs | 12 +++++++----- src/validators/mod.rs | 2 +- tests/validators/test_datetime.py | 2 ++ 10 files changed, 52 insertions(+), 33 deletions(-) diff --git a/src/input/datetime.rs b/src/input/datetime.rs index 279e79159..75e22c379 100644 --- a/src/input/datetime.rs +++ b/src/input/datetime.rs @@ -6,6 +6,7 @@ use pyo3::pyclass::CompareOp; use pyo3::types::PyTuple; use pyo3::types::{PyDate, PyDateTime, PyDelta, PyDeltaAccess, PyDict, PyTime, PyTzInfo}; use pyo3::IntoPyObjectExt; +use speedate::TimestampUnit; use speedate::{ Date, DateTime, DateTimeConfig, Duration, MicrosecondsPrecisionOverflowBehavior, ParseError, Time, TimeConfig, }; @@ -21,6 +22,7 @@ use super::Input; use crate::errors::ToErrorValue; use crate::errors::{ErrorType, ValError, ValResult}; use crate::tools::py_err; +use crate::validators::TemporalUnitMode; #[cfg_attr(debug_assertions, derive(Debug))] pub enum EitherDate<'py> { @@ -364,6 +366,7 @@ pub fn bytes_as_datetime<'py>( input: &(impl Input<'py> + ?Sized), bytes: &[u8], microseconds_overflow_behavior: MicrosecondsPrecisionOverflowBehavior, + mode: TemporalUnitMode ) -> ValResult> { match DateTime::parse_bytes_with_config( bytes, @@ -372,7 +375,7 @@ pub fn bytes_as_datetime<'py>( microseconds_precision_overflow_behavior: microseconds_overflow_behavior, unix_timestamp_offset: Some(0), }, - ..Default::default() + timestamp_unit: mode.into() }, ) { Ok(dt) => Ok(dt.into()), @@ -390,6 +393,7 @@ pub fn int_as_datetime<'py>( input: &(impl Input<'py> + ?Sized), timestamp: i64, timestamp_microseconds: u32, + mode: TemporalUnitMode ) -> ValResult> { match DateTime::from_timestamp_with_config( timestamp, @@ -399,7 +403,7 @@ pub fn int_as_datetime<'py>( unix_timestamp_offset: Some(0), ..Default::default() }, - ..Default::default() + timestamp_unit: mode.into() }, ) { Ok(dt) => Ok(dt.into()), @@ -427,12 +431,12 @@ macro_rules! nan_check { }; } -pub fn float_as_datetime<'py>(input: &(impl Input<'py> + ?Sized), timestamp: f64) -> ValResult> { +pub fn float_as_datetime<'py>(input: &(impl Input<'py> + ?Sized), timestamp: f64, mode: TemporalUnitMode) -> ValResult> { nan_check!(input, timestamp, DatetimeParsing); let microseconds = timestamp.fract().abs() * 1_000_000.0; // checking for extra digits in microseconds is unreliable with large floats, // so we just round to the nearest microsecond - int_as_datetime(input, timestamp.floor() as i64, microseconds.round() as u32) + int_as_datetime(input, timestamp.floor() as i64, microseconds.round() as u32, mode) } pub fn date_as_datetime<'py>(date: &Bound<'py, PyDate>) -> PyResult> { diff --git a/src/input/input_abstract.rs b/src/input/input_abstract.rs index 119c862ac..c82a4339d 100644 --- a/src/input/input_abstract.rs +++ b/src/input/input_abstract.rs @@ -8,7 +8,7 @@ use pyo3::{intern, prelude::*, IntoPyObjectExt}; use crate::errors::{ErrorTypeDefaults, InputValue, LocItem, ValError, ValResult}; use crate::lookup_key::{LookupKey, LookupPath}; use crate::tools::py_err; -use crate::validators::ValBytesMode; +use crate::validators::{TemporalUnitMode, ValBytesMode}; use super::datetime::{EitherDate, EitherDateTime, EitherTime, EitherTimedelta}; use super::return_enums::{EitherBytes, EitherComplex, EitherInt, EitherString}; @@ -170,6 +170,7 @@ pub trait Input<'py>: fmt::Debug { &self, strict: bool, microseconds_overflow_behavior: speedate::MicrosecondsPrecisionOverflowBehavior, + mode: TemporalUnitMode, ) -> ValMatch>; fn validate_timedelta( diff --git a/src/input/input_json.rs b/src/input/input_json.rs index 6828f5927..381ef4f25 100644 --- a/src/input/input_json.rs +++ b/src/input/input_json.rs @@ -12,7 +12,7 @@ use crate::input::return_enums::EitherComplex; use crate::lookup_key::{LookupKey, LookupPath}; use crate::validators::complex::string_to_complex; use crate::validators::decimal::create_decimal; -use crate::validators::ValBytesMode; +use crate::validators::{TemporalUnitMode, ValBytesMode}; use super::datetime::{ bytes_as_date, bytes_as_datetime, bytes_as_time, bytes_as_timedelta, float_as_datetime, float_as_duration, @@ -313,13 +313,14 @@ impl<'py, 'data> Input<'py> for JsonValue<'data> { &self, strict: bool, microseconds_overflow_behavior: speedate::MicrosecondsPrecisionOverflowBehavior, + mode: TemporalUnitMode, ) -> ValResult>> { match self { JsonValue::Str(v) => { - bytes_as_datetime(self, v.as_bytes(), microseconds_overflow_behavior).map(ValidationMatch::strict) + bytes_as_datetime(self, v.as_bytes(), microseconds_overflow_behavior, mode).map(ValidationMatch::strict) } - JsonValue::Int(v) if !strict => int_as_datetime(self, *v, 0).map(ValidationMatch::lax), - JsonValue::Float(v) if !strict => float_as_datetime(self, *v).map(ValidationMatch::lax), + JsonValue::Int(v) if !strict => int_as_datetime(self, *v, 0, mode).map(ValidationMatch::lax), + JsonValue::Float(v) if !strict => float_as_datetime(self, *v, mode).map(ValidationMatch::lax), _ => Err(ValError::new(ErrorTypeDefaults::DatetimeType, self)), } } @@ -501,8 +502,9 @@ impl<'py> Input<'py> for str { &self, _strict: bool, microseconds_overflow_behavior: MicrosecondsPrecisionOverflowBehavior, + mode: TemporalUnitMode, ) -> ValResult>> { - bytes_as_datetime(self, self.as_bytes(), microseconds_overflow_behavior).map(ValidationMatch::lax) + bytes_as_datetime(self, self.as_bytes(), microseconds_overflow_behavior, mode).map(ValidationMatch::lax) } fn validate_timedelta( diff --git a/src/input/input_python.rs b/src/input/input_python.rs index e82cbaed7..5ffd12ab3 100644 --- a/src/input/input_python.rs +++ b/src/input/input_python.rs @@ -17,8 +17,9 @@ use crate::errors::{ErrorType, ErrorTypeDefaults, InputValue, LocItem, ValError, use crate::tools::{extract_i64, safe_repr}; use crate::validators::complex::string_to_complex; use crate::validators::decimal::{create_decimal, get_decimal_type}; -use crate::validators::Exactness; +use crate::validators::{Exactness}; use crate::validators::ValBytesMode; +use crate::validators::TemporalUnitMode; use crate::ArgsKwargs; use super::datetime::{ @@ -559,6 +560,7 @@ impl<'py> Input<'py> for Bound<'py, PyAny> { &self, strict: bool, microseconds_overflow_behavior: MicrosecondsPrecisionOverflowBehavior, + mode: TemporalUnitMode, ) -> ValResult>> { if let Ok(dt) = self.downcast_exact::() { return Ok(ValidationMatch::exact(dt.clone().into())); @@ -570,15 +572,15 @@ impl<'py> Input<'py> for Bound<'py, PyAny> { if !strict { return if let Ok(py_str) = self.downcast::() { let str = py_string_str(py_str)?; - bytes_as_datetime(self, str.as_bytes(), microseconds_overflow_behavior) + bytes_as_datetime(self, str.as_bytes(), microseconds_overflow_behavior, mode) } else if let Ok(py_bytes) = self.downcast::() { - bytes_as_datetime(self, py_bytes.as_bytes(), microseconds_overflow_behavior) + bytes_as_datetime(self, py_bytes.as_bytes(), microseconds_overflow_behavior, mode) } else if self.is_exact_instance_of::() { Err(ValError::new(ErrorTypeDefaults::DatetimeType, self)) } else if let Some(int) = extract_i64(self) { - int_as_datetime(self, int, 0) + int_as_datetime(self, int, 0, mode) } else if let Ok(float) = self.extract::() { - float_as_datetime(self, float) + float_as_datetime(self, float, mode) } else if let Ok(date) = self.downcast::() { Ok(date_as_datetime(date)?) } else { diff --git a/src/input/input_string.rs b/src/input/input_string.rs index 0ab4ad014..1401bd822 100644 --- a/src/input/input_string.rs +++ b/src/input/input_string.rs @@ -9,7 +9,7 @@ use crate::lookup_key::{LookupKey, LookupPath}; use crate::tools::safe_repr; use crate::validators::complex::string_to_complex; use crate::validators::decimal::create_decimal; -use crate::validators::ValBytesMode; +use crate::validators::{ValBytesMode, TemporalUnitMode}; use super::datetime::{ bytes_as_date, bytes_as_datetime, bytes_as_time, bytes_as_timedelta, EitherDate, EitherDateTime, EitherTime, @@ -224,9 +224,10 @@ impl<'py> Input<'py> for StringMapping<'py> { &self, _strict: bool, microseconds_overflow_behavior: MicrosecondsPrecisionOverflowBehavior, + mode: TemporalUnitMode, ) -> ValResult>> { match self { - Self::String(s) => bytes_as_datetime(self, py_string_str(s)?.as_bytes(), microseconds_overflow_behavior) + Self::String(s) => bytes_as_datetime(self, py_string_str(s)?.as_bytes(), microseconds_overflow_behavior, mode) .map(ValidationMatch::strict), Self::Mapping(_) => Err(ValError::new(ErrorTypeDefaults::DatetimeType, self)), } diff --git a/src/validators/config.rs b/src/validators/config.rs index f84004d8f..77b953ff4 100644 --- a/src/validators/config.rs +++ b/src/validators/config.rs @@ -6,12 +6,12 @@ use crate::errors::ErrorType; use crate::input::EitherBytes; use crate::serializers::BytesMode; use crate::tools::SchemaDict; -use crate::validators::config::TemporalUnitMode::Seconds; use base64::engine::general_purpose::GeneralPurpose; use base64::engine::{DecodePaddingMode, GeneralPurposeConfig}; use base64::{alphabet, DecodeError, Engine}; use pyo3::types::{PyDict, PyString}; use pyo3::{intern, prelude::*}; +use speedate::TimestampUnit; const URL_SAFE_OPTIONAL_PADDING: GeneralPurpose = GeneralPurpose::new( &alphabet::URL_SAFE, @@ -47,12 +47,7 @@ impl FromStr for TemporalUnitMode { } } -#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)] -pub struct ValTemporalUnit { - pub mode: TemporalUnitMode, -} - -impl ValTemporalUnit { +impl TemporalUnitMode { pub fn from_config(config: Option<&Bound<'_, PyDict>>) -> PyResult { let Some(config_dict) = config else { return Ok(Self::default()); @@ -62,7 +57,17 @@ impl ValTemporalUnit { || Ok(TemporalUnitMode::default()), |raw| TemporalUnitMode::from_str(&raw.to_cow()?), )?; - Ok(Self { mode: temporal_unit }) + Ok(temporal_unit) + } +} + +impl From for TimestampUnit { + fn from(value: TemporalUnitMode) -> Self { + match value { + TemporalUnitMode::Seconds => TimestampUnit::Second, + TemporalUnitMode::Milliseconds => TimestampUnit::Millisecond, + TemporalUnitMode::Infer => TimestampUnit::Infer + } } } diff --git a/src/validators/date.rs b/src/validators/date.rs index 6fec1f89e..f9a59c6c3 100644 --- a/src/validators/date.rs +++ b/src/validators/date.rs @@ -11,7 +11,7 @@ use crate::input::{EitherDate, Input}; use crate::validators::datetime::{NowConstraint, NowOp}; -use super::Exactness; +use super::{Exactness, TemporalUnitMode}; use super::{BuildValidator, CombinedValidator, DefinitionsBuilder, ValidationState, Validator}; #[derive(Debug, Clone)] @@ -110,7 +110,7 @@ impl Validator for DateValidator { /// /// Ok(None) means that this is not relevant to dates (the input was not a datetime nor a string) fn date_from_datetime<'py>(input: &(impl Input<'py> + ?Sized)) -> Result>, ValError> { - let either_dt = match input.validate_datetime(false, speedate::MicrosecondsPrecisionOverflowBehavior::Truncate) { + let either_dt = match input.validate_datetime(false, speedate::MicrosecondsPrecisionOverflowBehavior::Truncate, TemporalUnitMode::default()) { Ok(val_match) => val_match.into_inner(), // if the error was a parsing error, update the error type from DatetimeParsing to DateFromDatetimeParsing // and return it diff --git a/src/validators/datetime.rs b/src/validators/datetime.rs index e193341a1..b10c22b2e 100644 --- a/src/validators/datetime.rs +++ b/src/validators/datetime.rs @@ -3,6 +3,7 @@ use pyo3::intern; use pyo3::prelude::*; use pyo3::sync::GILOnceCell; use pyo3::types::{PyDict, PyString}; +use speedate::TimestampUnit; use speedate::{DateTime, MicrosecondsPrecisionOverflowBehavior, Time}; use std::cmp::Ordering; use strum::EnumMessage; @@ -16,14 +17,14 @@ use crate::input::{EitherDateTime, Input}; use super::Exactness; use super::{BuildValidator, CombinedValidator, DefinitionsBuilder, ValidationState, Validator}; use crate::tools::SchemaDict; -use crate::validators::config::{TemporalUnitMode, ValTemporalUnit}; +use crate::validators::config::{TemporalUnitMode}; #[derive(Debug, Clone)] pub struct DateTimeValidator { strict: bool, constraints: Option, microseconds_precision: speedate::MicrosecondsPrecisionOverflowBehavior, - val_temporal_unit: ValTemporalUnit, + val_temporal_unit: TemporalUnitMode, } pub(crate) fn extract_microseconds_precision( @@ -52,7 +53,7 @@ impl BuildValidator for DateTimeValidator { strict: is_strict(schema, config)?, constraints: DateTimeConstraints::from_py(schema)?, microseconds_precision: extract_microseconds_precision(schema, config)?, - val_temporal_unit: ValTemporalUnit::from_config(config)?, + val_temporal_unit: TemporalUnitMode::from_config(config)?, } .into()) } @@ -68,7 +69,8 @@ impl Validator for DateTimeValidator { state: &mut ValidationState<'_, 'py>, ) -> ValResult { let strict = state.strict_or(self.strict); - let datetime = match input.validate_datetime(strict, self.microseconds_precision) { + let datetime = match input.validate_datetime( + strict, self.microseconds_precision, self.val_temporal_unit) { Ok(val_match) => val_match.unpack(state), // if the error was a parsing error, in lax mode we allow dates and add the time 00:00:00 Err(line_errors @ ValError::LineErrors(..)) if !strict => { @@ -213,7 +215,7 @@ impl DateTimeConstraints { fn py_datetime_as_datetime(schema: &Bound<'_, PyDict>, key: &Bound<'_, PyString>) -> PyResult> { match schema.get_item(key)? { - Some(value) => match value.validate_datetime(false, MicrosecondsPrecisionOverflowBehavior::Truncate) { + Some(value) => match value.validate_datetime(false, MicrosecondsPrecisionOverflowBehavior::Truncate, TemporalUnitMode::default()) { Ok(v) => Ok(Some(v.into_inner().as_raw()?)), Err(_) => Err(PyValueError::new_err(format!( "'{key}' must be coercible to a datetime instance", diff --git a/src/validators/mod.rs b/src/validators/mod.rs index 2fd79c495..93be2975a 100644 --- a/src/validators/mod.rs +++ b/src/validators/mod.rs @@ -15,7 +15,7 @@ use crate::input::{Input, InputType, StringMapping}; use crate::py_gc::PyGcTraverse; use crate::recursion_guard::RecursionState; use crate::tools::SchemaDict; -pub(crate) use config::ValBytesMode; +pub(crate) use config::{ValBytesMode, TemporalUnitMode}; mod any; mod arguments; diff --git a/tests/validators/test_datetime.py b/tests/validators/test_datetime.py index b9527a676..c0b0daf49 100644 --- a/tests/validators/test_datetime.py +++ b/tests/validators/test_datetime.py @@ -525,12 +525,14 @@ def test_tz_cmp() -> None: ('seconds', '1654646400', datetime(2022, 6, 8, tzinfo=timezone.utc)), ('seconds', 1654646400.123456, datetime(2022, 6, 8, 0, 0, 0, 123456, tzinfo=timezone.utc)), # 'milliseconds' mode: treat as milliseconds since epoch + ('milliseconds', 1654646400, datetime(1970, 1, 20, 3, 37, 26, 400000, tzinfo=timezone.utc)), ('milliseconds', 1654646400123, datetime(2022, 6, 8, 0, 0, 0, 123000, tzinfo=timezone.utc)), ('milliseconds', '1654646400123', datetime(2022, 6, 8, 0, 0, 0, 123000, tzinfo=timezone.utc)), ('milliseconds', 1654646400123.456, datetime(2022, 6, 8, 0, 0, 0, 123456, tzinfo=timezone.utc)), # 'infer' mode: large numbers are ms, small are s ('infer', 1654646400, datetime(2022, 6, 8, tzinfo=timezone.utc)), ('infer', 1654646400123, datetime(2022, 6, 8, 0, 0, 0, 123000, tzinfo=timezone.utc)), + ('infer', 1654646400123.456, datetime(2022, 6, 8, 0, 0, 0, 123456, tzinfo=timezone.utc)) ], ) def test_val_temporal_unit_datetime(val_temporal_unit, input_value, expected): From 34e3d235bb7b90f709c12c8c3bdac34e91ced936 Mon Sep 17 00:00:00 2001 From: Oliver Parker Date: Mon, 30 Jun 2025 17:36:26 +0100 Subject: [PATCH 04/12] feat: add support for datetimes --- src/input/datetime.rs | 15 +++++++++------ src/input/input_python.rs | 4 ++-- src/input/input_string.rs | 8 +++++--- src/validators/config.rs | 4 ++-- src/validators/date.rs | 8 ++++++-- src/validators/datetime.rs | 12 +++++++----- src/validators/mod.rs | 2 +- tests/validators/test_datetime.py | 19 +++++++++++++++++-- 8 files changed, 49 insertions(+), 23 deletions(-) diff --git a/src/input/datetime.rs b/src/input/datetime.rs index 75e22c379..cd5eea15d 100644 --- a/src/input/datetime.rs +++ b/src/input/datetime.rs @@ -6,7 +6,6 @@ use pyo3::pyclass::CompareOp; use pyo3::types::PyTuple; use pyo3::types::{PyDate, PyDateTime, PyDelta, PyDeltaAccess, PyDict, PyTime, PyTzInfo}; use pyo3::IntoPyObjectExt; -use speedate::TimestampUnit; use speedate::{ Date, DateTime, DateTimeConfig, Duration, MicrosecondsPrecisionOverflowBehavior, ParseError, Time, TimeConfig, }; @@ -366,7 +365,7 @@ pub fn bytes_as_datetime<'py>( input: &(impl Input<'py> + ?Sized), bytes: &[u8], microseconds_overflow_behavior: MicrosecondsPrecisionOverflowBehavior, - mode: TemporalUnitMode + mode: TemporalUnitMode, ) -> ValResult> { match DateTime::parse_bytes_with_config( bytes, @@ -375,7 +374,7 @@ pub fn bytes_as_datetime<'py>( microseconds_precision_overflow_behavior: microseconds_overflow_behavior, unix_timestamp_offset: Some(0), }, - timestamp_unit: mode.into() + timestamp_unit: mode.into(), }, ) { Ok(dt) => Ok(dt.into()), @@ -393,7 +392,7 @@ pub fn int_as_datetime<'py>( input: &(impl Input<'py> + ?Sized), timestamp: i64, timestamp_microseconds: u32, - mode: TemporalUnitMode + mode: TemporalUnitMode, ) -> ValResult> { match DateTime::from_timestamp_with_config( timestamp, @@ -403,7 +402,7 @@ pub fn int_as_datetime<'py>( unix_timestamp_offset: Some(0), ..Default::default() }, - timestamp_unit: mode.into() + timestamp_unit: mode.into(), }, ) { Ok(dt) => Ok(dt.into()), @@ -431,7 +430,11 @@ macro_rules! nan_check { }; } -pub fn float_as_datetime<'py>(input: &(impl Input<'py> + ?Sized), timestamp: f64, mode: TemporalUnitMode) -> ValResult> { +pub fn float_as_datetime<'py>( + input: &(impl Input<'py> + ?Sized), + timestamp: f64, + mode: TemporalUnitMode, +) -> ValResult> { nan_check!(input, timestamp, DatetimeParsing); let microseconds = timestamp.fract().abs() * 1_000_000.0; // checking for extra digits in microseconds is unreliable with large floats, diff --git a/src/input/input_python.rs b/src/input/input_python.rs index 5ffd12ab3..bb86dc925 100644 --- a/src/input/input_python.rs +++ b/src/input/input_python.rs @@ -17,9 +17,9 @@ use crate::errors::{ErrorType, ErrorTypeDefaults, InputValue, LocItem, ValError, use crate::tools::{extract_i64, safe_repr}; use crate::validators::complex::string_to_complex; use crate::validators::decimal::{create_decimal, get_decimal_type}; -use crate::validators::{Exactness}; -use crate::validators::ValBytesMode; +use crate::validators::Exactness; use crate::validators::TemporalUnitMode; +use crate::validators::ValBytesMode; use crate::ArgsKwargs; use super::datetime::{ diff --git a/src/input/input_string.rs b/src/input/input_string.rs index 1401bd822..c98dd0f8a 100644 --- a/src/input/input_string.rs +++ b/src/input/input_string.rs @@ -9,7 +9,7 @@ use crate::lookup_key::{LookupKey, LookupPath}; use crate::tools::safe_repr; use crate::validators::complex::string_to_complex; use crate::validators::decimal::create_decimal; -use crate::validators::{ValBytesMode, TemporalUnitMode}; +use crate::validators::{TemporalUnitMode, ValBytesMode}; use super::datetime::{ bytes_as_date, bytes_as_datetime, bytes_as_time, bytes_as_timedelta, EitherDate, EitherDateTime, EitherTime, @@ -227,8 +227,10 @@ impl<'py> Input<'py> for StringMapping<'py> { mode: TemporalUnitMode, ) -> ValResult>> { match self { - Self::String(s) => bytes_as_datetime(self, py_string_str(s)?.as_bytes(), microseconds_overflow_behavior, mode) - .map(ValidationMatch::strict), + Self::String(s) => { + bytes_as_datetime(self, py_string_str(s)?.as_bytes(), microseconds_overflow_behavior, mode) + .map(ValidationMatch::strict) + } Self::Mapping(_) => Err(ValError::new(ErrorTypeDefaults::DatetimeType, self)), } } diff --git a/src/validators/config.rs b/src/validators/config.rs index 77b953ff4..9d7c24770 100644 --- a/src/validators/config.rs +++ b/src/validators/config.rs @@ -65,8 +65,8 @@ impl From for TimestampUnit { fn from(value: TemporalUnitMode) -> Self { match value { TemporalUnitMode::Seconds => TimestampUnit::Second, - TemporalUnitMode::Milliseconds => TimestampUnit::Millisecond, - TemporalUnitMode::Infer => TimestampUnit::Infer + TemporalUnitMode::Milliseconds => TimestampUnit::Millisecond, + TemporalUnitMode::Infer => TimestampUnit::Infer, } } } diff --git a/src/validators/date.rs b/src/validators/date.rs index f9a59c6c3..0c4f48d5f 100644 --- a/src/validators/date.rs +++ b/src/validators/date.rs @@ -11,8 +11,8 @@ use crate::input::{EitherDate, Input}; use crate::validators::datetime::{NowConstraint, NowOp}; -use super::{Exactness, TemporalUnitMode}; use super::{BuildValidator, CombinedValidator, DefinitionsBuilder, ValidationState, Validator}; +use super::{Exactness, TemporalUnitMode}; #[derive(Debug, Clone)] pub struct DateValidator { @@ -110,7 +110,11 @@ impl Validator for DateValidator { /// /// Ok(None) means that this is not relevant to dates (the input was not a datetime nor a string) fn date_from_datetime<'py>(input: &(impl Input<'py> + ?Sized)) -> Result>, ValError> { - let either_dt = match input.validate_datetime(false, speedate::MicrosecondsPrecisionOverflowBehavior::Truncate, TemporalUnitMode::default()) { + let either_dt = match input.validate_datetime( + false, + speedate::MicrosecondsPrecisionOverflowBehavior::Truncate, + TemporalUnitMode::default(), + ) { Ok(val_match) => val_match.into_inner(), // if the error was a parsing error, update the error type from DatetimeParsing to DateFromDatetimeParsing // and return it diff --git a/src/validators/datetime.rs b/src/validators/datetime.rs index b10c22b2e..9b7463ac9 100644 --- a/src/validators/datetime.rs +++ b/src/validators/datetime.rs @@ -3,7 +3,6 @@ use pyo3::intern; use pyo3::prelude::*; use pyo3::sync::GILOnceCell; use pyo3::types::{PyDict, PyString}; -use speedate::TimestampUnit; use speedate::{DateTime, MicrosecondsPrecisionOverflowBehavior, Time}; use std::cmp::Ordering; use strum::EnumMessage; @@ -17,7 +16,7 @@ use crate::input::{EitherDateTime, Input}; use super::Exactness; use super::{BuildValidator, CombinedValidator, DefinitionsBuilder, ValidationState, Validator}; use crate::tools::SchemaDict; -use crate::validators::config::{TemporalUnitMode}; +use crate::validators::config::TemporalUnitMode; #[derive(Debug, Clone)] pub struct DateTimeValidator { @@ -69,8 +68,7 @@ impl Validator for DateTimeValidator { state: &mut ValidationState<'_, 'py>, ) -> ValResult { let strict = state.strict_or(self.strict); - let datetime = match input.validate_datetime( - strict, self.microseconds_precision, self.val_temporal_unit) { + let datetime = match input.validate_datetime(strict, self.microseconds_precision, self.val_temporal_unit) { Ok(val_match) => val_match.unpack(state), // if the error was a parsing error, in lax mode we allow dates and add the time 00:00:00 Err(line_errors @ ValError::LineErrors(..)) if !strict => { @@ -215,7 +213,11 @@ impl DateTimeConstraints { fn py_datetime_as_datetime(schema: &Bound<'_, PyDict>, key: &Bound<'_, PyString>) -> PyResult> { match schema.get_item(key)? { - Some(value) => match value.validate_datetime(false, MicrosecondsPrecisionOverflowBehavior::Truncate, TemporalUnitMode::default()) { + Some(value) => match value.validate_datetime( + false, + MicrosecondsPrecisionOverflowBehavior::Truncate, + TemporalUnitMode::default(), + ) { Ok(v) => Ok(Some(v.into_inner().as_raw()?)), Err(_) => Err(PyValueError::new_err(format!( "'{key}' must be coercible to a datetime instance", diff --git a/src/validators/mod.rs b/src/validators/mod.rs index 93be2975a..3d0416d8d 100644 --- a/src/validators/mod.rs +++ b/src/validators/mod.rs @@ -15,7 +15,7 @@ use crate::input::{Input, InputType, StringMapping}; use crate::py_gc::PyGcTraverse; use crate::recursion_guard::RecursionState; use crate::tools::SchemaDict; -pub(crate) use config::{ValBytesMode, TemporalUnitMode}; +pub(crate) use config::{TemporalUnitMode, ValBytesMode}; mod any; mod arguments; diff --git a/tests/validators/test_datetime.py b/tests/validators/test_datetime.py index c0b0daf49..34c246502 100644 --- a/tests/validators/test_datetime.py +++ b/tests/validators/test_datetime.py @@ -47,6 +47,11 @@ def test_constraints_schema_validation() -> None: (Decimal('1654646400.1234564'), datetime(2022, 6, 8, 0, 0, 0, 123456, tzinfo=timezone.utc)), (Decimal('1654646400.1234568'), datetime(2022, 6, 8, 0, 0, 0, 123457, tzinfo=timezone.utc)), ('1654646400.1234568', datetime(2022, 6, 8, 0, 0, 0, 123457, tzinfo=timezone.utc)), + pytest.param( + Decimal('1654646400123.456'), + datetime(2022, 6, 8, 0, 0, 0, 123456, tzinfo=timezone.utc), + marks=pytest.mark.xfail(reason='Currently failing behaviour, probably needs looking at.', strict=True), + ), (253_402_300_800_000, Err('should be a valid datetime, dates after 9999 are not supported as unix timestamps')), ( -80_000_000_000_000, @@ -528,11 +533,21 @@ def test_tz_cmp() -> None: ('milliseconds', 1654646400, datetime(1970, 1, 20, 3, 37, 26, 400000, tzinfo=timezone.utc)), ('milliseconds', 1654646400123, datetime(2022, 6, 8, 0, 0, 0, 123000, tzinfo=timezone.utc)), ('milliseconds', '1654646400123', datetime(2022, 6, 8, 0, 0, 0, 123000, tzinfo=timezone.utc)), - ('milliseconds', 1654646400123.456, datetime(2022, 6, 8, 0, 0, 0, 123456, tzinfo=timezone.utc)), + pytest.param( + 'milliseconds', + 1654646400123.456, + datetime(2022, 6, 8, 0, 0, 0, 123456, tzinfo=timezone.utc), + marks=pytest.mark.xfail(reason='Current behaviour means this fails', strict=True), + ), # 'infer' mode: large numbers are ms, small are s ('infer', 1654646400, datetime(2022, 6, 8, tzinfo=timezone.utc)), ('infer', 1654646400123, datetime(2022, 6, 8, 0, 0, 0, 123000, tzinfo=timezone.utc)), - ('infer', 1654646400123.456, datetime(2022, 6, 8, 0, 0, 0, 123456, tzinfo=timezone.utc)) + pytest.param( + 'infer', + 1654646400123.456, + datetime(2022, 6, 8, 0, 0, 0, 123456, tzinfo=timezone.utc), + marks=pytest.mark.xfail(reason='Current behaviour means this fails', strict=True), + ), ], ) def test_val_temporal_unit_datetime(val_temporal_unit, input_value, expected): From e1e7cc4b03497fc080688eb6fc64b929829069fb Mon Sep 17 00:00:00 2001 From: Oliver Parker Date: Mon, 30 Jun 2025 19:06:40 +0100 Subject: [PATCH 05/12] feat: add date --- src/input/datetime.rs | 5 +++-- src/input/input_abstract.rs | 2 +- src/input/input_json.rs | 8 ++++---- src/input/input_python.rs | 4 ++-- src/input/input_string.rs | 4 ++-- src/validators/date.rs | 12 +++++++----- src/validators/datetime.rs | 2 +- 7 files changed, 20 insertions(+), 17 deletions(-) diff --git a/src/input/datetime.rs b/src/input/datetime.rs index cd5eea15d..b23de052f 100644 --- a/src/input/datetime.rs +++ b/src/input/datetime.rs @@ -6,6 +6,7 @@ use pyo3::pyclass::CompareOp; use pyo3::types::PyTuple; use pyo3::types::{PyDate, PyDateTime, PyDelta, PyDeltaAccess, PyDict, PyTime, PyTzInfo}; use pyo3::IntoPyObjectExt; +use speedate::DateConfig; use speedate::{ Date, DateTime, DateTimeConfig, Duration, MicrosecondsPrecisionOverflowBehavior, ParseError, Time, TimeConfig, }; @@ -325,8 +326,8 @@ impl<'py> EitherDateTime<'py> { } } -pub fn bytes_as_date<'py>(input: &(impl Input<'py> + ?Sized), bytes: &[u8]) -> ValResult> { - match Date::parse_bytes(bytes) { +pub fn bytes_as_date<'py>(input: &(impl Input<'py> + ?Sized), bytes: &[u8], mode: TemporalUnitMode) -> ValResult> { + match Date::parse_bytes_with_config(bytes, &DateConfig::builder().timestamp_unit(mode.into()).build()) { Ok(date) => Ok(date.into()), Err(err) => Err(ValError::new( ErrorType::DateParsing { diff --git a/src/input/input_abstract.rs b/src/input/input_abstract.rs index c82a4339d..e84c132df 100644 --- a/src/input/input_abstract.rs +++ b/src/input/input_abstract.rs @@ -158,7 +158,7 @@ pub trait Input<'py>: fmt::Debug { fn validate_iter(&self) -> ValResult>; - fn validate_date(&self, strict: bool) -> ValMatch>; + fn validate_date(&self, strict: bool, mode: TemporalUnitMode) -> ValMatch>; fn validate_time( &self, diff --git a/src/input/input_json.rs b/src/input/input_json.rs index 381ef4f25..b44551b7d 100644 --- a/src/input/input_json.rs +++ b/src/input/input_json.rs @@ -277,9 +277,9 @@ impl<'py, 'data> Input<'py> for JsonValue<'data> { } } - fn validate_date(&self, _strict: bool) -> ValResult>> { + fn validate_date(&self, _strict: bool, mode:TemporalUnitMode) -> ValResult>> { match self { - JsonValue::Str(v) => bytes_as_date(self, v.as_bytes()).map(ValidationMatch::strict), + JsonValue::Str(v) => bytes_as_date(self, v.as_bytes(), mode).map(ValidationMatch::strict), _ => Err(ValError::new(ErrorTypeDefaults::DateType, self)), } } @@ -486,8 +486,8 @@ impl<'py> Input<'py> for str { Ok(string_to_vec(self).into()) } - fn validate_date(&self, _strict: bool) -> ValResult>> { - bytes_as_date(self, self.as_bytes()).map(ValidationMatch::lax) + fn validate_date(&self, _strict: bool, mode: TemporalUnitMode) -> ValResult>> { + bytes_as_date(self, self.as_bytes(), mode).map(ValidationMatch::lax) } fn validate_time( diff --git a/src/input/input_python.rs b/src/input/input_python.rs index bb86dc925..71f2d37dc 100644 --- a/src/input/input_python.rs +++ b/src/input/input_python.rs @@ -495,7 +495,7 @@ impl<'py> Input<'py> for Bound<'py, PyAny> { } } - fn validate_date(&self, strict: bool) -> ValResult>> { + fn validate_date(&self, strict: bool, mode: TemporalUnitMode) -> ValResult>> { if let Ok(date) = self.downcast_exact::() { Ok(ValidationMatch::exact(date.clone().into())) } else if self.is_instance_of::() { @@ -516,7 +516,7 @@ impl<'py> Input<'py> for Bound<'py, PyAny> { None } } { - bytes_as_date(self, bytes).map(ValidationMatch::lax) + bytes_as_date(self, bytes, mode).map(ValidationMatch::lax) } else { Err(ValError::new(ErrorTypeDefaults::DateType, self)) } diff --git a/src/input/input_string.rs b/src/input/input_string.rs index c98dd0f8a..699672b07 100644 --- a/src/input/input_string.rs +++ b/src/input/input_string.rs @@ -201,9 +201,9 @@ impl<'py> Input<'py> for StringMapping<'py> { Err(ValError::new(ErrorTypeDefaults::IterableType, self)) } - fn validate_date(&self, _strict: bool) -> ValResult>> { + fn validate_date(&self, _strict: bool, mode:TemporalUnitMode) -> ValResult>> { match self { - Self::String(s) => bytes_as_date(self, py_string_str(s)?.as_bytes()).map(ValidationMatch::strict), + Self::String(s) => bytes_as_date(self, py_string_str(s)?.as_bytes(), mode).map(ValidationMatch::strict), Self::Mapping(_) => Err(ValError::new(ErrorTypeDefaults::DateType, self)), } } diff --git a/src/validators/date.rs b/src/validators/date.rs index 0c4f48d5f..0ea8946d3 100644 --- a/src/validators/date.rs +++ b/src/validators/date.rs @@ -18,6 +18,7 @@ use super::{Exactness, TemporalUnitMode}; pub struct DateValidator { strict: bool, constraints: Option, + val_temporal_unit: TemporalUnitMode, } impl BuildValidator for DateValidator { @@ -31,6 +32,7 @@ impl BuildValidator for DateValidator { Ok(Self { strict: is_strict(schema, config)?, constraints: DateConstraints::from_py(schema)?, + val_temporal_unit: TemporalUnitMode::from_config(config)? } .into()) } @@ -46,12 +48,12 @@ impl Validator for DateValidator { state: &mut ValidationState<'_, 'py>, ) -> ValResult { let strict = state.strict_or(self.strict); - let date = match input.validate_date(strict) { + let date = match input.validate_date(strict, self.val_temporal_unit) { Ok(val_match) => val_match.unpack(state), // if the error was a parsing error, in lax mode we allow datetimes at midnight Err(line_errors @ ValError::LineErrors(..)) if !strict => { state.floor_exactness(Exactness::Lax); - date_from_datetime(input)?.ok_or(line_errors)? + date_from_datetime(input, self.val_temporal_unit)?.ok_or(line_errors)? } Err(otherwise) => return Err(otherwise), }; @@ -109,11 +111,11 @@ impl Validator for DateValidator { /// "exact date", e.g. has a zero time component. /// /// Ok(None) means that this is not relevant to dates (the input was not a datetime nor a string) -fn date_from_datetime<'py>(input: &(impl Input<'py> + ?Sized)) -> Result>, ValError> { +fn date_from_datetime<'py>(input: &(impl Input<'py> + ?Sized), mode:TemporalUnitMode) -> Result>, ValError> { let either_dt = match input.validate_datetime( false, speedate::MicrosecondsPrecisionOverflowBehavior::Truncate, - TemporalUnitMode::default(), + mode, ) { Ok(val_match) => val_match.into_inner(), // if the error was a parsing error, update the error type from DatetimeParsing to DateFromDatetimeParsing @@ -181,7 +183,7 @@ impl DateConstraints { fn convert_pydate(schema: &Bound<'_, PyDict>, key: &Bound<'_, PyString>) -> PyResult> { match schema.get_item(key)? { - Some(value) => match value.validate_date(false) { + Some(value) => match value.validate_date(false, TemporalUnitMode::default()) { Ok(v) => Ok(Some(v.into_inner().as_raw()?)), Err(_) => Err(PyValueError::new_err(format!( "'{key}' must be coercible to a date instance", diff --git a/src/validators/datetime.rs b/src/validators/datetime.rs index 9b7463ac9..7fe0fc2f4 100644 --- a/src/validators/datetime.rs +++ b/src/validators/datetime.rs @@ -144,7 +144,7 @@ impl Validator for DateTimeValidator { /// In lax mode, if the input is not a datetime, we try parsing the input as a date and add the "00:00:00" time. /// Ok(None) means that this is not relevant to datetimes (the input was not a date nor a string) fn datetime_from_date<'py>(input: &(impl Input<'py> + ?Sized)) -> Result>, ValError> { - let either_date = match input.validate_date(false) { + let either_date = match input.validate_date(false, TemporalUnitMode::default()) { Ok(val_match) => val_match.into_inner(), // if the error was a parsing error, update the error type from DateParsing to DatetimeFromDateParsing Err(ValError::LineErrors(mut line_errors)) => { From 987b424d3e0d0fb1e4552fafabae02af8f01a3ee Mon Sep 17 00:00:00 2001 From: Oliver Parker Date: Mon, 30 Jun 2025 19:17:00 +0100 Subject: [PATCH 06/12] fix: some stuff for time, needs more support in speedate --- src/input/datetime.rs | 6 ++++-- src/input/input_abstract.rs | 1 + src/input/input_json.rs | 10 ++++++---- src/input/input_python.rs | 9 +++++---- src/input/input_string.rs | 3 ++- src/validators/time.rs | 7 +++++-- 6 files changed, 23 insertions(+), 13 deletions(-) diff --git a/src/input/datetime.rs b/src/input/datetime.rs index b23de052f..9563c8bbb 100644 --- a/src/input/datetime.rs +++ b/src/input/datetime.rs @@ -343,6 +343,7 @@ pub fn bytes_as_time<'py>( input: &(impl Input<'py> + ?Sized), bytes: &[u8], microseconds_overflow_behavior: MicrosecondsPrecisionOverflowBehavior, + mode: TemporalUnitMode, ) -> ValResult> { match Time::parse_bytes_with_config( bytes, @@ -465,6 +466,7 @@ pub fn int_as_time<'py>( input: &(impl Input<'py> + ?Sized), timestamp: i64, timestamp_microseconds: u32, + mode: TemporalUnitMode ) -> ValResult> { let time_timestamp: u32 = match timestamp { t if t < 0_i64 => { @@ -500,11 +502,11 @@ pub fn int_as_time<'py>( } } -pub fn float_as_time<'py>(input: &(impl Input<'py> + ?Sized), timestamp: f64) -> ValResult> { +pub fn float_as_time<'py>(input: &(impl Input<'py> + ?Sized), timestamp: f64, mode: TemporalUnitMode) -> ValResult> { nan_check!(input, timestamp, TimeParsing); let microseconds = timestamp.fract().abs() * 1_000_000.0; // round for same reason as above - int_as_time(input, timestamp.floor() as i64, microseconds.round() as u32) + int_as_time(input, timestamp.floor() as i64, microseconds.round() as u32, mode) } fn map_timedelta_err(input: impl ToErrorValue, err: ParseError) -> ValError { diff --git a/src/input/input_abstract.rs b/src/input/input_abstract.rs index e84c132df..6d5ef6102 100644 --- a/src/input/input_abstract.rs +++ b/src/input/input_abstract.rs @@ -164,6 +164,7 @@ pub trait Input<'py>: fmt::Debug { &self, strict: bool, microseconds_overflow_behavior: speedate::MicrosecondsPrecisionOverflowBehavior, + mode: TemporalUnitMode ) -> ValMatch>; fn validate_datetime( diff --git a/src/input/input_json.rs b/src/input/input_json.rs index b44551b7d..0f0dc2a1a 100644 --- a/src/input/input_json.rs +++ b/src/input/input_json.rs @@ -287,13 +287,14 @@ impl<'py, 'data> Input<'py> for JsonValue<'data> { &self, strict: bool, microseconds_overflow_behavior: MicrosecondsPrecisionOverflowBehavior, + mode: TemporalUnitMode ) -> ValResult>> { match self { JsonValue::Str(v) => { - bytes_as_time(self, v.as_bytes(), microseconds_overflow_behavior).map(ValidationMatch::strict) + bytes_as_time(self, v.as_bytes(), microseconds_overflow_behavior, mode).map(ValidationMatch::strict) } - JsonValue::Int(v) if !strict => int_as_time(self, *v, 0).map(ValidationMatch::lax), - JsonValue::Float(v) if !strict => float_as_time(self, *v).map(ValidationMatch::lax), + JsonValue::Int(v) if !strict => int_as_time(self, *v, 0, mode).map(ValidationMatch::lax), + JsonValue::Float(v) if !strict => float_as_time(self, *v, mode).map(ValidationMatch::lax), JsonValue::BigInt(_) if !strict => Err(ValError::new( ErrorType::TimeParsing { error: Cow::Borrowed( @@ -494,8 +495,9 @@ impl<'py> Input<'py> for str { &self, _strict: bool, microseconds_overflow_behavior: MicrosecondsPrecisionOverflowBehavior, + mode: TemporalUnitMode ) -> ValResult>> { - bytes_as_time(self, self.as_bytes(), microseconds_overflow_behavior).map(ValidationMatch::lax) + bytes_as_time(self, self.as_bytes(), microseconds_overflow_behavior, mode).map(ValidationMatch::lax) } fn validate_datetime( diff --git a/src/input/input_python.rs b/src/input/input_python.rs index 71f2d37dc..0e0e1881b 100644 --- a/src/input/input_python.rs +++ b/src/input/input_python.rs @@ -526,6 +526,7 @@ impl<'py> Input<'py> for Bound<'py, PyAny> { &self, strict: bool, microseconds_overflow_behavior: MicrosecondsPrecisionOverflowBehavior, + mode: TemporalUnitMode ) -> ValResult>> { if let Ok(time) = self.downcast_exact::() { return Ok(ValidationMatch::exact(time.clone().into())); @@ -537,15 +538,15 @@ impl<'py> Input<'py> for Bound<'py, PyAny> { if !strict { return if let Ok(py_str) = self.downcast::() { let str = py_string_str(py_str)?; - bytes_as_time(self, str.as_bytes(), microseconds_overflow_behavior) + bytes_as_time(self, str.as_bytes(), microseconds_overflow_behavior, mode) } else if let Ok(py_bytes) = self.downcast::() { - bytes_as_time(self, py_bytes.as_bytes(), microseconds_overflow_behavior) + bytes_as_time(self, py_bytes.as_bytes(), microseconds_overflow_behavior, mode) } else if self.is_exact_instance_of::() { Err(ValError::new(ErrorTypeDefaults::TimeType, self)) } else if let Some(int) = extract_i64(self) { - int_as_time(self, int, 0) + int_as_time(self, int, 0, mode) } else if let Ok(float) = self.extract::() { - float_as_time(self, float) + float_as_time(self, float, mode) } else { break 'lax; } diff --git a/src/input/input_string.rs b/src/input/input_string.rs index 699672b07..28356e713 100644 --- a/src/input/input_string.rs +++ b/src/input/input_string.rs @@ -212,9 +212,10 @@ impl<'py> Input<'py> for StringMapping<'py> { &self, _strict: bool, microseconds_overflow_behavior: MicrosecondsPrecisionOverflowBehavior, + mode: TemporalUnitMode ) -> ValResult>> { match self { - Self::String(s) => bytes_as_time(self, py_string_str(s)?.as_bytes(), microseconds_overflow_behavior) + Self::String(s) => bytes_as_time(self, py_string_str(s)?.as_bytes(), microseconds_overflow_behavior, mode) .map(ValidationMatch::strict), Self::Mapping(_) => Err(ValError::new(ErrorTypeDefaults::TimeType, self)), } diff --git a/src/validators/time.rs b/src/validators/time.rs index db98846b1..fb5918c47 100644 --- a/src/validators/time.rs +++ b/src/validators/time.rs @@ -12,6 +12,7 @@ use crate::input::Input; use super::datetime::extract_microseconds_precision; use super::datetime::TZConstraint; +use super::TemporalUnitMode; use super::{BuildValidator, CombinedValidator, DefinitionsBuilder, ValidationState, Validator}; #[derive(Debug, Clone)] @@ -19,6 +20,7 @@ pub struct TimeValidator { strict: bool, constraints: Option, microseconds_precision: MicrosecondsPrecisionOverflowBehavior, + val_temporal_unit: TemporalUnitMode } impl BuildValidator for TimeValidator { @@ -33,6 +35,7 @@ impl BuildValidator for TimeValidator { strict: is_strict(schema, config)?, constraints: TimeConstraints::from_py(schema)?, microseconds_precision: extract_microseconds_precision(schema, config)?, + val_temporal_unit: TemporalUnitMode::from_config(config)? }; Ok(s.into()) } @@ -48,7 +51,7 @@ impl Validator for TimeValidator { state: &mut ValidationState<'_, 'py>, ) -> ValResult { let time = input - .validate_time(state.strict_or(self.strict), self.microseconds_precision)? + .validate_time(state.strict_or(self.strict), self.microseconds_precision, self.val_temporal_unit)? .unpack(state); if let Some(constraints) = &self.constraints { let raw_time = time.as_raw()?; @@ -88,7 +91,7 @@ impl Validator for TimeValidator { fn convert_pytime(schema: &Bound<'_, PyDict>, key: &Bound<'_, PyString>) -> PyResult> { match schema.get_item(key)? { - Some(value) => match value.validate_time(false, MicrosecondsPrecisionOverflowBehavior::default()) { + Some(value) => match value.validate_time(false, MicrosecondsPrecisionOverflowBehavior::default(), TemporalUnitMode::default()) { Ok(v) => Ok(Some(v.into_inner().as_raw()?)), Err(_) => Err(PyValueError::new_err(format!( "'{key}' must be coercible to a time instance", From ef3814f073a2c3f4a849fadea455e137fb1786a3 Mon Sep 17 00:00:00 2001 From: Oliver Parker Date: Mon, 30 Jun 2025 21:15:15 +0100 Subject: [PATCH 07/12] feat: fin --- src/input/datetime.rs | 12 ++++--- src/input/input_abstract.rs | 1 - src/input/input_json.rs | 12 +++---- src/input/input_python.rs | 9 +++-- src/input/input_string.rs | 5 ++- src/validators/date.rs | 54 +++++++++++++++--------------- src/validators/time.rs | 7 ++-- tests/validators/test_time.py | 24 ------------- tests/validators/test_timedelta.py | 23 ------------- 9 files changed, 47 insertions(+), 100 deletions(-) diff --git a/src/input/datetime.rs b/src/input/datetime.rs index 9563c8bbb..86ff4c49f 100644 --- a/src/input/datetime.rs +++ b/src/input/datetime.rs @@ -326,7 +326,11 @@ impl<'py> EitherDateTime<'py> { } } -pub fn bytes_as_date<'py>(input: &(impl Input<'py> + ?Sized), bytes: &[u8], mode: TemporalUnitMode) -> ValResult> { +pub fn bytes_as_date<'py>( + input: &(impl Input<'py> + ?Sized), + bytes: &[u8], + mode: TemporalUnitMode, +) -> ValResult> { match Date::parse_bytes_with_config(bytes, &DateConfig::builder().timestamp_unit(mode.into()).build()) { Ok(date) => Ok(date.into()), Err(err) => Err(ValError::new( @@ -343,7 +347,6 @@ pub fn bytes_as_time<'py>( input: &(impl Input<'py> + ?Sized), bytes: &[u8], microseconds_overflow_behavior: MicrosecondsPrecisionOverflowBehavior, - mode: TemporalUnitMode, ) -> ValResult> { match Time::parse_bytes_with_config( bytes, @@ -466,7 +469,6 @@ pub fn int_as_time<'py>( input: &(impl Input<'py> + ?Sized), timestamp: i64, timestamp_microseconds: u32, - mode: TemporalUnitMode ) -> ValResult> { let time_timestamp: u32 = match timestamp { t if t < 0_i64 => { @@ -502,11 +504,11 @@ pub fn int_as_time<'py>( } } -pub fn float_as_time<'py>(input: &(impl Input<'py> + ?Sized), timestamp: f64, mode: TemporalUnitMode) -> ValResult> { +pub fn float_as_time<'py>(input: &(impl Input<'py> + ?Sized), timestamp: f64) -> ValResult> { nan_check!(input, timestamp, TimeParsing); let microseconds = timestamp.fract().abs() * 1_000_000.0; // round for same reason as above - int_as_time(input, timestamp.floor() as i64, microseconds.round() as u32, mode) + int_as_time(input, timestamp.floor() as i64, microseconds.round() as u32) } fn map_timedelta_err(input: impl ToErrorValue, err: ParseError) -> ValError { diff --git a/src/input/input_abstract.rs b/src/input/input_abstract.rs index 6d5ef6102..e84c132df 100644 --- a/src/input/input_abstract.rs +++ b/src/input/input_abstract.rs @@ -164,7 +164,6 @@ pub trait Input<'py>: fmt::Debug { &self, strict: bool, microseconds_overflow_behavior: speedate::MicrosecondsPrecisionOverflowBehavior, - mode: TemporalUnitMode ) -> ValMatch>; fn validate_datetime( diff --git a/src/input/input_json.rs b/src/input/input_json.rs index 0f0dc2a1a..27a710058 100644 --- a/src/input/input_json.rs +++ b/src/input/input_json.rs @@ -277,7 +277,7 @@ impl<'py, 'data> Input<'py> for JsonValue<'data> { } } - fn validate_date(&self, _strict: bool, mode:TemporalUnitMode) -> ValResult>> { + fn validate_date(&self, _strict: bool, mode: TemporalUnitMode) -> ValResult>> { match self { JsonValue::Str(v) => bytes_as_date(self, v.as_bytes(), mode).map(ValidationMatch::strict), _ => Err(ValError::new(ErrorTypeDefaults::DateType, self)), @@ -287,14 +287,13 @@ impl<'py, 'data> Input<'py> for JsonValue<'data> { &self, strict: bool, microseconds_overflow_behavior: MicrosecondsPrecisionOverflowBehavior, - mode: TemporalUnitMode ) -> ValResult>> { match self { JsonValue::Str(v) => { - bytes_as_time(self, v.as_bytes(), microseconds_overflow_behavior, mode).map(ValidationMatch::strict) + bytes_as_time(self, v.as_bytes(), microseconds_overflow_behavior).map(ValidationMatch::strict) } - JsonValue::Int(v) if !strict => int_as_time(self, *v, 0, mode).map(ValidationMatch::lax), - JsonValue::Float(v) if !strict => float_as_time(self, *v, mode).map(ValidationMatch::lax), + JsonValue::Int(v) if !strict => int_as_time(self, *v, 0).map(ValidationMatch::lax), + JsonValue::Float(v) if !strict => float_as_time(self, *v).map(ValidationMatch::lax), JsonValue::BigInt(_) if !strict => Err(ValError::new( ErrorType::TimeParsing { error: Cow::Borrowed( @@ -495,9 +494,8 @@ impl<'py> Input<'py> for str { &self, _strict: bool, microseconds_overflow_behavior: MicrosecondsPrecisionOverflowBehavior, - mode: TemporalUnitMode ) -> ValResult>> { - bytes_as_time(self, self.as_bytes(), microseconds_overflow_behavior, mode).map(ValidationMatch::lax) + bytes_as_time(self, self.as_bytes(), microseconds_overflow_behavior).map(ValidationMatch::lax) } fn validate_datetime( diff --git a/src/input/input_python.rs b/src/input/input_python.rs index 0e0e1881b..71f2d37dc 100644 --- a/src/input/input_python.rs +++ b/src/input/input_python.rs @@ -526,7 +526,6 @@ impl<'py> Input<'py> for Bound<'py, PyAny> { &self, strict: bool, microseconds_overflow_behavior: MicrosecondsPrecisionOverflowBehavior, - mode: TemporalUnitMode ) -> ValResult>> { if let Ok(time) = self.downcast_exact::() { return Ok(ValidationMatch::exact(time.clone().into())); @@ -538,15 +537,15 @@ impl<'py> Input<'py> for Bound<'py, PyAny> { if !strict { return if let Ok(py_str) = self.downcast::() { let str = py_string_str(py_str)?; - bytes_as_time(self, str.as_bytes(), microseconds_overflow_behavior, mode) + bytes_as_time(self, str.as_bytes(), microseconds_overflow_behavior) } else if let Ok(py_bytes) = self.downcast::() { - bytes_as_time(self, py_bytes.as_bytes(), microseconds_overflow_behavior, mode) + bytes_as_time(self, py_bytes.as_bytes(), microseconds_overflow_behavior) } else if self.is_exact_instance_of::() { Err(ValError::new(ErrorTypeDefaults::TimeType, self)) } else if let Some(int) = extract_i64(self) { - int_as_time(self, int, 0, mode) + int_as_time(self, int, 0) } else if let Ok(float) = self.extract::() { - float_as_time(self, float, mode) + float_as_time(self, float) } else { break 'lax; } diff --git a/src/input/input_string.rs b/src/input/input_string.rs index 28356e713..97fb17a3d 100644 --- a/src/input/input_string.rs +++ b/src/input/input_string.rs @@ -201,7 +201,7 @@ impl<'py> Input<'py> for StringMapping<'py> { Err(ValError::new(ErrorTypeDefaults::IterableType, self)) } - fn validate_date(&self, _strict: bool, mode:TemporalUnitMode) -> ValResult>> { + fn validate_date(&self, _strict: bool, mode: TemporalUnitMode) -> ValResult>> { match self { Self::String(s) => bytes_as_date(self, py_string_str(s)?.as_bytes(), mode).map(ValidationMatch::strict), Self::Mapping(_) => Err(ValError::new(ErrorTypeDefaults::DateType, self)), @@ -212,10 +212,9 @@ impl<'py> Input<'py> for StringMapping<'py> { &self, _strict: bool, microseconds_overflow_behavior: MicrosecondsPrecisionOverflowBehavior, - mode: TemporalUnitMode ) -> ValResult>> { match self { - Self::String(s) => bytes_as_time(self, py_string_str(s)?.as_bytes(), microseconds_overflow_behavior, mode) + Self::String(s) => bytes_as_time(self, py_string_str(s)?.as_bytes(), microseconds_overflow_behavior) .map(ValidationMatch::strict), Self::Mapping(_) => Err(ValError::new(ErrorTypeDefaults::TimeType, self)), } diff --git a/src/validators/date.rs b/src/validators/date.rs index 0ea8946d3..6a5c1faa9 100644 --- a/src/validators/date.rs +++ b/src/validators/date.rs @@ -32,7 +32,7 @@ impl BuildValidator for DateValidator { Ok(Self { strict: is_strict(schema, config)?, constraints: DateConstraints::from_py(schema)?, - val_temporal_unit: TemporalUnitMode::from_config(config)? + val_temporal_unit: TemporalUnitMode::from_config(config)?, } .into()) } @@ -111,34 +111,34 @@ impl Validator for DateValidator { /// "exact date", e.g. has a zero time component. /// /// Ok(None) means that this is not relevant to dates (the input was not a datetime nor a string) -fn date_from_datetime<'py>(input: &(impl Input<'py> + ?Sized), mode:TemporalUnitMode) -> Result>, ValError> { - let either_dt = match input.validate_datetime( - false, - speedate::MicrosecondsPrecisionOverflowBehavior::Truncate, - mode, - ) { - Ok(val_match) => val_match.into_inner(), - // if the error was a parsing error, update the error type from DatetimeParsing to DateFromDatetimeParsing - // and return it - Err(ValError::LineErrors(mut line_errors)) => { - if line_errors.iter_mut().fold(false, |has_parsing_error, line_error| { - if let ErrorType::DatetimeParsing { error, .. } = &mut line_error.error_type { - line_error.error_type = ErrorType::DateFromDatetimeParsing { - error: std::mem::take(error), - context: None, - }; - true - } else { - has_parsing_error +fn date_from_datetime<'py>( + input: &(impl Input<'py> + ?Sized), + mode: TemporalUnitMode, +) -> Result>, ValError> { + let either_dt = + match input.validate_datetime(false, speedate::MicrosecondsPrecisionOverflowBehavior::Truncate, mode) { + Ok(val_match) => val_match.into_inner(), + // if the error was a parsing error, update the error type from DatetimeParsing to DateFromDatetimeParsing + // and return it + Err(ValError::LineErrors(mut line_errors)) => { + if line_errors.iter_mut().fold(false, |has_parsing_error, line_error| { + if let ErrorType::DatetimeParsing { error, .. } = &mut line_error.error_type { + line_error.error_type = ErrorType::DateFromDatetimeParsing { + error: std::mem::take(error), + context: None, + }; + true + } else { + has_parsing_error + } + }) { + return Err(ValError::LineErrors(line_errors)); } - }) { - return Err(ValError::LineErrors(line_errors)); + return Ok(None); } - return Ok(None); - } - // for any other error, don't return it - Err(_) => return Ok(None), - }; + // for any other error, don't return it + Err(_) => return Ok(None), + }; let dt = either_dt.as_raw()?; let zero_time = Time { hour: 0, diff --git a/src/validators/time.rs b/src/validators/time.rs index fb5918c47..db98846b1 100644 --- a/src/validators/time.rs +++ b/src/validators/time.rs @@ -12,7 +12,6 @@ use crate::input::Input; use super::datetime::extract_microseconds_precision; use super::datetime::TZConstraint; -use super::TemporalUnitMode; use super::{BuildValidator, CombinedValidator, DefinitionsBuilder, ValidationState, Validator}; #[derive(Debug, Clone)] @@ -20,7 +19,6 @@ pub struct TimeValidator { strict: bool, constraints: Option, microseconds_precision: MicrosecondsPrecisionOverflowBehavior, - val_temporal_unit: TemporalUnitMode } impl BuildValidator for TimeValidator { @@ -35,7 +33,6 @@ impl BuildValidator for TimeValidator { strict: is_strict(schema, config)?, constraints: TimeConstraints::from_py(schema)?, microseconds_precision: extract_microseconds_precision(schema, config)?, - val_temporal_unit: TemporalUnitMode::from_config(config)? }; Ok(s.into()) } @@ -51,7 +48,7 @@ impl Validator for TimeValidator { state: &mut ValidationState<'_, 'py>, ) -> ValResult { let time = input - .validate_time(state.strict_or(self.strict), self.microseconds_precision, self.val_temporal_unit)? + .validate_time(state.strict_or(self.strict), self.microseconds_precision)? .unpack(state); if let Some(constraints) = &self.constraints { let raw_time = time.as_raw()?; @@ -91,7 +88,7 @@ impl Validator for TimeValidator { fn convert_pytime(schema: &Bound<'_, PyDict>, key: &Bound<'_, PyString>) -> PyResult> { match schema.get_item(key)? { - Some(value) => match value.validate_time(false, MicrosecondsPrecisionOverflowBehavior::default(), TemporalUnitMode::default()) { + Some(value) => match value.validate_time(false, MicrosecondsPrecisionOverflowBehavior::default()) { Ok(v) => Ok(Some(v.into_inner().as_raw()?)), Err(_) => Err(PyValueError::new_err(format!( "'{key}' must be coercible to a time instance", diff --git a/tests/validators/test_time.py b/tests/validators/test_time.py index 25ccbc75d..a1ad3fd26 100644 --- a/tests/validators/test_time.py +++ b/tests/validators/test_time.py @@ -296,27 +296,3 @@ def test_tz_constraint_too_high(): with pytest.raises(SchemaError, match='OverflowError: Python int too large to convert to C long'): SchemaValidator(core_schema.time_schema(tz_constraint=2**64)) - -@pytest.mark.parametrize( - 'val_temporal_unit, input_value, expected', - [ - # 'seconds' mode: treat as seconds since midnight - ('seconds', 3661, time(1, 1, 1, tzinfo=timezone.utc)), - ('seconds', '3661', time(1, 1, 1, tzinfo=timezone.utc)), - ('seconds', 3661.123456, time(1, 1, 1, 123456, tzinfo=timezone.utc)), - # 'milliseconds' mode: treat as milliseconds since midnight - ('milliseconds', 3661123, time(1, 1, 1, 123000, tzinfo=timezone.utc)), - ('milliseconds', '3661123', time(1, 1, 1, 123000, tzinfo=timezone.utc)), - ('milliseconds', 3661123.456, time(1, 1, 1, 123456, tzinfo=timezone.utc)), - # 'infer' mode: large numbers are ms, small are s - ('infer', 3661, time(1, 1, 1, tzinfo=timezone.utc)), - ('infer', 3661123, time(1, 1, 1, 123000, tzinfo=timezone.utc)), - ], -) -def test_val_temporal_unit_time(val_temporal_unit, input_value, expected): - v = SchemaValidator( - core_schema.time_schema(), - config={'val_temporal_unit': val_temporal_unit}, - ) - output = v.validate_python(input_value) - assert output == expected diff --git a/tests/validators/test_timedelta.py b/tests/validators/test_timedelta.py index bdcf6b083..36c4b4655 100644 --- a/tests/validators/test_timedelta.py +++ b/tests/validators/test_timedelta.py @@ -300,26 +300,3 @@ def test_pandas(): v.validate_python(one_55.to_pytimedelta()) -@pytest.mark.parametrize( - 'val_temporal_unit, input_value, expected', - [ - # 'seconds' mode: treat as seconds - ('seconds', 3661, timedelta(hours=1, seconds=1)), - ('seconds', '3661', timedelta(hours=1, seconds=1)), - ('seconds', 3661.123456, timedelta(hours=1, seconds=1, microseconds=123456)), - # 'milliseconds' mode: treat as milliseconds - ('milliseconds', 3661123, timedelta(hours=1, seconds=1, microseconds=123000)), - ('milliseconds', '3661123', timedelta(hours=1, seconds=1, microseconds=123000)), - ('milliseconds', 3661123.456, timedelta(hours=1, seconds=1, microseconds=123456)), - # 'infer' mode: large numbers are ms, small are s - ('infer', 3661, timedelta(hours=1, seconds=1)), - ('infer', 3661123, timedelta(hours=1, seconds=1, microseconds=123000)), - ], -) -def test_val_temporal_unit_timedelta(val_temporal_unit, input_value, expected): - v = SchemaValidator( - core_schema.timedelta_schema(), - config={'val_temporal_unit': val_temporal_unit}, - ) - output = v.validate_python(input_value) - assert output == expected From 7df52a18bd72ce64e0c0ee4cc175599849163da1 Mon Sep 17 00:00:00 2001 From: Oliver Parker Date: Mon, 30 Jun 2025 21:41:06 +0100 Subject: [PATCH 08/12] fix: lint --- tests/validators/test_time.py | 1 - tests/validators/test_timedelta.py | 2 -- 2 files changed, 3 deletions(-) diff --git a/tests/validators/test_time.py b/tests/validators/test_time.py index a1ad3fd26..9a643acfb 100644 --- a/tests/validators/test_time.py +++ b/tests/validators/test_time.py @@ -295,4 +295,3 @@ def test_neg_7200(): def test_tz_constraint_too_high(): with pytest.raises(SchemaError, match='OverflowError: Python int too large to convert to C long'): SchemaValidator(core_schema.time_schema(tz_constraint=2**64)) - diff --git a/tests/validators/test_timedelta.py b/tests/validators/test_timedelta.py index 36c4b4655..243fbfb95 100644 --- a/tests/validators/test_timedelta.py +++ b/tests/validators/test_timedelta.py @@ -298,5 +298,3 @@ def test_pandas(): v.validate_python(one_55) with pytest.raises(ValidationError, match=msg): v.validate_python(one_55.to_pytimedelta()) - - From 11ed996e4f91a21ca7fe93110c93b8faef075cd3 Mon Sep 17 00:00:00 2001 From: Oliver Parker Date: Tue, 1 Jul 2025 09:48:56 +0100 Subject: [PATCH 09/12] fix: add some tests with the same timestamps but with seconds, milliseconds and infer to show behaviour --- tests/validators/test_date.py | 6 ++++++ tests/validators/test_datetime.py | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/tests/validators/test_date.py b/tests/validators/test_date.py index 5c63e9bf3..3c8180a49 100644 --- a/tests/validators/test_date.py +++ b/tests/validators/test_date.py @@ -312,13 +312,19 @@ def test_date_past_future_today(): ('seconds', 1654646400, date(2022, 6, 8)), ('seconds', '1654646400', date(2022, 6, 8)), ('seconds', 1654646400.0, date(2022, 6, 8)), + ('seconds', 8640000000.0 , date(2243, 10, 17)), + ('seconds', 92534400000.0 , date(4902, 4, 20)), # 'milliseconds' mode: treat as milliseconds since epoch ('milliseconds', 1654646400000, date(2022, 6, 8)), ('milliseconds', '1654646400000', date(2022, 6, 8)), ('milliseconds', 1654646400000.0, date(2022, 6, 8)), + ('milliseconds', 8640000000.0 , date(1970, 4, 11)), + ('milliseconds', 92534400000.0 , date(1972, 12, 7)), # 'infer' mode: large numbers are ms, small are s ('infer', 1654646400, date(2022, 6, 8)), ('infer', 1654646400000, date(2022, 6, 8)), + ('infer', 8640000000.0 , date(2243, 10, 17)), + ('infer', 92534400000.0 , date(1972, 12, 7)), ], ) def test_val_temporal_unit_date(val_temporal_unit, input_value, expected): diff --git a/tests/validators/test_datetime.py b/tests/validators/test_datetime.py index 34c246502..ffc683d61 100644 --- a/tests/validators/test_datetime.py +++ b/tests/validators/test_datetime.py @@ -529,10 +529,14 @@ def test_tz_cmp() -> None: ('seconds', 1654646400, datetime(2022, 6, 8, tzinfo=timezone.utc)), ('seconds', '1654646400', datetime(2022, 6, 8, tzinfo=timezone.utc)), ('seconds', 1654646400.123456, datetime(2022, 6, 8, 0, 0, 0, 123456, tzinfo=timezone.utc)), + ('seconds', 8640000000.0 , datetime(2243, 10, 17, tzinfo=timezone.utc)), + ('seconds', 92534400000.0 , datetime(4902, 4, 20, tzinfo=timezone.utc)), # 'milliseconds' mode: treat as milliseconds since epoch ('milliseconds', 1654646400, datetime(1970, 1, 20, 3, 37, 26, 400000, tzinfo=timezone.utc)), ('milliseconds', 1654646400123, datetime(2022, 6, 8, 0, 0, 0, 123000, tzinfo=timezone.utc)), ('milliseconds', '1654646400123', datetime(2022, 6, 8, 0, 0, 0, 123000, tzinfo=timezone.utc)), + ('milliseconds', 8640000000.0 , datetime(1970, 4, 11, tzinfo=timezone.utc)), + ('milliseconds', 92534400000.0 , datetime(1972, 12, 7, tzinfo=timezone.utc)), pytest.param( 'milliseconds', 1654646400123.456, @@ -548,6 +552,8 @@ def test_tz_cmp() -> None: datetime(2022, 6, 8, 0, 0, 0, 123456, tzinfo=timezone.utc), marks=pytest.mark.xfail(reason='Current behaviour means this fails', strict=True), ), + ('infer', 8640000000.0 , datetime(2243, 10, 17, tzinfo=timezone.utc)), + ('infer', 92534400000.0 , datetime(1972, 12, 7, tzinfo=timezone.utc)), ], ) def test_val_temporal_unit_datetime(val_temporal_unit, input_value, expected): From 7721a53b9a0f6efc5a8e043f30f6f31601bfeeb5 Mon Sep 17 00:00:00 2001 From: Oliver Parker Date: Tue, 1 Jul 2025 09:49:19 +0100 Subject: [PATCH 10/12] fix: format --- tests/validators/test_date.py | 12 ++++++------ tests/validators/test_datetime.py | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/validators/test_date.py b/tests/validators/test_date.py index 3c8180a49..3b53d3751 100644 --- a/tests/validators/test_date.py +++ b/tests/validators/test_date.py @@ -312,19 +312,19 @@ def test_date_past_future_today(): ('seconds', 1654646400, date(2022, 6, 8)), ('seconds', '1654646400', date(2022, 6, 8)), ('seconds', 1654646400.0, date(2022, 6, 8)), - ('seconds', 8640000000.0 , date(2243, 10, 17)), - ('seconds', 92534400000.0 , date(4902, 4, 20)), + ('seconds', 8640000000.0, date(2243, 10, 17)), + ('seconds', 92534400000.0, date(4902, 4, 20)), # 'milliseconds' mode: treat as milliseconds since epoch ('milliseconds', 1654646400000, date(2022, 6, 8)), ('milliseconds', '1654646400000', date(2022, 6, 8)), ('milliseconds', 1654646400000.0, date(2022, 6, 8)), - ('milliseconds', 8640000000.0 , date(1970, 4, 11)), - ('milliseconds', 92534400000.0 , date(1972, 12, 7)), + ('milliseconds', 8640000000.0, date(1970, 4, 11)), + ('milliseconds', 92534400000.0, date(1972, 12, 7)), # 'infer' mode: large numbers are ms, small are s ('infer', 1654646400, date(2022, 6, 8)), ('infer', 1654646400000, date(2022, 6, 8)), - ('infer', 8640000000.0 , date(2243, 10, 17)), - ('infer', 92534400000.0 , date(1972, 12, 7)), + ('infer', 8640000000.0, date(2243, 10, 17)), + ('infer', 92534400000.0, date(1972, 12, 7)), ], ) def test_val_temporal_unit_date(val_temporal_unit, input_value, expected): diff --git a/tests/validators/test_datetime.py b/tests/validators/test_datetime.py index ffc683d61..66f17c648 100644 --- a/tests/validators/test_datetime.py +++ b/tests/validators/test_datetime.py @@ -529,14 +529,14 @@ def test_tz_cmp() -> None: ('seconds', 1654646400, datetime(2022, 6, 8, tzinfo=timezone.utc)), ('seconds', '1654646400', datetime(2022, 6, 8, tzinfo=timezone.utc)), ('seconds', 1654646400.123456, datetime(2022, 6, 8, 0, 0, 0, 123456, tzinfo=timezone.utc)), - ('seconds', 8640000000.0 , datetime(2243, 10, 17, tzinfo=timezone.utc)), - ('seconds', 92534400000.0 , datetime(4902, 4, 20, tzinfo=timezone.utc)), + ('seconds', 8640000000.0, datetime(2243, 10, 17, tzinfo=timezone.utc)), + ('seconds', 92534400000.0, datetime(4902, 4, 20, tzinfo=timezone.utc)), # 'milliseconds' mode: treat as milliseconds since epoch ('milliseconds', 1654646400, datetime(1970, 1, 20, 3, 37, 26, 400000, tzinfo=timezone.utc)), ('milliseconds', 1654646400123, datetime(2022, 6, 8, 0, 0, 0, 123000, tzinfo=timezone.utc)), ('milliseconds', '1654646400123', datetime(2022, 6, 8, 0, 0, 0, 123000, tzinfo=timezone.utc)), - ('milliseconds', 8640000000.0 , datetime(1970, 4, 11, tzinfo=timezone.utc)), - ('milliseconds', 92534400000.0 , datetime(1972, 12, 7, tzinfo=timezone.utc)), + ('milliseconds', 8640000000.0, datetime(1970, 4, 11, tzinfo=timezone.utc)), + ('milliseconds', 92534400000.0, datetime(1972, 12, 7, tzinfo=timezone.utc)), pytest.param( 'milliseconds', 1654646400123.456, @@ -552,8 +552,8 @@ def test_tz_cmp() -> None: datetime(2022, 6, 8, 0, 0, 0, 123456, tzinfo=timezone.utc), marks=pytest.mark.xfail(reason='Current behaviour means this fails', strict=True), ), - ('infer', 8640000000.0 , datetime(2243, 10, 17, tzinfo=timezone.utc)), - ('infer', 92534400000.0 , datetime(1972, 12, 7, tzinfo=timezone.utc)), + ('infer', 8640000000.0, datetime(2243, 10, 17, tzinfo=timezone.utc)), + ('infer', 92534400000.0, datetime(1972, 12, 7, tzinfo=timezone.utc)), ], ) def test_val_temporal_unit_datetime(val_temporal_unit, input_value, expected): From 67fcdc3b6d7c306e2ec9b00b6506f2f5bc564e81 Mon Sep 17 00:00:00 2001 From: Oliver Parker Date: Thu, 10 Jul 2025 20:54:02 +0100 Subject: [PATCH 11/12] fix: fix issue with fractional part of mstimestamps --- src/input/datetime.rs | 20 +++++++++++++++++++- tests/validators/test_datetime.py | 9 +++------ 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/src/input/datetime.rs b/src/input/datetime.rs index 86ff4c49f..00d3ffaf4 100644 --- a/src/input/datetime.rs +++ b/src/input/datetime.rs @@ -441,7 +441,25 @@ pub fn float_as_datetime<'py>( mode: TemporalUnitMode, ) -> ValResult> { nan_check!(input, timestamp, DatetimeParsing); - let microseconds = timestamp.fract().abs() * 1_000_000.0; + let microseconds = match mode { + TemporalUnitMode::Seconds => { + timestamp.fract().abs() * 1_000_000.0 + } + TemporalUnitMode::Milliseconds => { + timestamp.fract().abs() * 1_000.0 + } + TemporalUnitMode::Infer => { + // Use the same watershed from speedate to determine if we treat the float as seconds or milliseconds. + // TODO: should we expose this from speedate? + if timestamp.abs() <= 20_000_000_000.0 { + // treat as seconds + timestamp.fract().abs() * 1_000_000.0 + } else { + // treat as milliseconds + timestamp.fract().abs() * 1_000.0 + } + } + }; // checking for extra digits in microseconds is unreliable with large floats, // so we just round to the nearest microsecond int_as_datetime(input, timestamp.floor() as i64, microseconds.round() as u32, mode) diff --git a/tests/validators/test_datetime.py b/tests/validators/test_datetime.py index 66f17c648..495759576 100644 --- a/tests/validators/test_datetime.py +++ b/tests/validators/test_datetime.py @@ -47,10 +47,9 @@ def test_constraints_schema_validation() -> None: (Decimal('1654646400.1234564'), datetime(2022, 6, 8, 0, 0, 0, 123456, tzinfo=timezone.utc)), (Decimal('1654646400.1234568'), datetime(2022, 6, 8, 0, 0, 0, 123457, tzinfo=timezone.utc)), ('1654646400.1234568', datetime(2022, 6, 8, 0, 0, 0, 123457, tzinfo=timezone.utc)), - pytest.param( + ( Decimal('1654646400123.456'), datetime(2022, 6, 8, 0, 0, 0, 123456, tzinfo=timezone.utc), - marks=pytest.mark.xfail(reason='Currently failing behaviour, probably needs looking at.', strict=True), ), (253_402_300_800_000, Err('should be a valid datetime, dates after 9999 are not supported as unix timestamps')), ( @@ -537,20 +536,18 @@ def test_tz_cmp() -> None: ('milliseconds', '1654646400123', datetime(2022, 6, 8, 0, 0, 0, 123000, tzinfo=timezone.utc)), ('milliseconds', 8640000000.0, datetime(1970, 4, 11, tzinfo=timezone.utc)), ('milliseconds', 92534400000.0, datetime(1972, 12, 7, tzinfo=timezone.utc)), - pytest.param( + ( 'milliseconds', 1654646400123.456, datetime(2022, 6, 8, 0, 0, 0, 123456, tzinfo=timezone.utc), - marks=pytest.mark.xfail(reason='Current behaviour means this fails', strict=True), ), # 'infer' mode: large numbers are ms, small are s ('infer', 1654646400, datetime(2022, 6, 8, tzinfo=timezone.utc)), ('infer', 1654646400123, datetime(2022, 6, 8, 0, 0, 0, 123000, tzinfo=timezone.utc)), - pytest.param( + ( 'infer', 1654646400123.456, datetime(2022, 6, 8, 0, 0, 0, 123456, tzinfo=timezone.utc), - marks=pytest.mark.xfail(reason='Current behaviour means this fails', strict=True), ), ('infer', 8640000000.0, datetime(2243, 10, 17, tzinfo=timezone.utc)), ('infer', 92534400000.0, datetime(1972, 12, 7, tzinfo=timezone.utc)), From e9846136cb13c655f99811d9066ca11f5180d081 Mon Sep 17 00:00:00 2001 From: Oliver Parker Date: Thu, 10 Jul 2025 21:10:00 +0100 Subject: [PATCH 12/12] style: lint --- src/input/datetime.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/input/datetime.rs b/src/input/datetime.rs index 00d3ffaf4..a69da9227 100644 --- a/src/input/datetime.rs +++ b/src/input/datetime.rs @@ -442,12 +442,8 @@ pub fn float_as_datetime<'py>( ) -> ValResult> { nan_check!(input, timestamp, DatetimeParsing); let microseconds = match mode { - TemporalUnitMode::Seconds => { - timestamp.fract().abs() * 1_000_000.0 - } - TemporalUnitMode::Milliseconds => { - timestamp.fract().abs() * 1_000.0 - } + TemporalUnitMode::Seconds => timestamp.fract().abs() * 1_000_000.0, + TemporalUnitMode::Milliseconds => timestamp.fract().abs() * 1_000.0, TemporalUnitMode::Infer => { // Use the same watershed from speedate to determine if we treat the float as seconds or milliseconds. // TODO: should we expose this from speedate?