Skip to content

Commit 6e73860

Browse files
authored
fix(cubesql): Fix incorrect datetime parsing in filters rewrite rules (#9732)
* fix(cubesql): Fix incorrect datetime parsing in filters rewrite rules * remove copy/paste * add test * fix suggestions * fix map err
1 parent 066045e commit 6e73860

File tree

4 files changed

+87
-54
lines changed

4 files changed

+87
-54
lines changed
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
use crate::compile::engine::df::scan::DataFusionError;
2+
use chrono::{NaiveDate, NaiveDateTime};
3+
4+
pub fn parse_date_str(s: &str) -> Result<NaiveDateTime, DataFusionError> {
5+
let parsed = NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S%.f")
6+
.or_else(|_| NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S%.f"))
7+
.or_else(|_| NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S"))
8+
.or_else(|_| NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S%.fZ"))
9+
.or_else(|_| {
10+
NaiveDate::parse_from_str(s, "%Y-%m-%d").map(|date| date.and_hms_opt(0, 0, 0).unwrap())
11+
});
12+
13+
parsed.map_err(|e| {
14+
DataFusionError::Internal(format!("Can't parse date/time string literal: {}", e))
15+
})
16+
}

rust/cubesql/cubesql/src/compile/engine/df/scan.rs

Lines changed: 4 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ use std::{
2828
task::{Context, Poll},
2929
};
3030

31+
use crate::compile::date_parser::parse_date_str;
3132
use crate::{
3233
compile::{
3334
engine::df::wrapper::{CubeScanWrappedSqlNode, CubeScanWrapperNode, SqlQuery},
@@ -38,7 +39,7 @@ use crate::{
3839
transport::{CubeStreamReceiver, LoadRequestMeta, SpanId, TransportService},
3940
CubeError,
4041
};
41-
use chrono::{Datelike, NaiveDate, NaiveDateTime};
42+
use chrono::{Datelike, NaiveDate};
4243
use datafusion::{
4344
arrow::{
4445
array::{
@@ -917,21 +918,7 @@ pub fn transform_response<V: ValueObject>(
917918
field_name,
918919
{
919920
(FieldValue::String(s), builder) => {
920-
let timestamp = NaiveDateTime::parse_from_str(s.as_ref(), "%Y-%m-%dT%H:%M:%S%.f")
921-
.or_else(|_| NaiveDateTime::parse_from_str(s.as_ref(), "%Y-%m-%d %H:%M:%S%.f"))
922-
.or_else(|_| NaiveDateTime::parse_from_str(s.as_ref(), "%Y-%m-%dT%H:%M:%S"))
923-
.or_else(|_| NaiveDateTime::parse_from_str(s.as_ref(), "%Y-%m-%dT%H:%M:%S%.fZ"))
924-
.or_else(|_| {
925-
NaiveDate::parse_from_str(s.as_ref(), "%Y-%m-%d").map(|date| {
926-
date.and_hms_opt(0, 0, 0).unwrap()
927-
})
928-
})
929-
.map_err(|e| {
930-
DataFusionError::Execution(format!(
931-
"Can't parse timestamp: '{}': {}",
932-
s, e
933-
))
934-
})?;
921+
let timestamp = parse_date_str(s.as_ref())?;
935922
// TODO switch parsing to microseconds
936923
if timestamp.and_utc().timestamp_millis() > (((1i64) << 62) / 1_000_000) {
937924
builder.append_null()?;
@@ -959,21 +946,7 @@ pub fn transform_response<V: ValueObject>(
959946
field_name,
960947
{
961948
(FieldValue::String(s), builder) => {
962-
let timestamp = NaiveDateTime::parse_from_str(s.as_ref(), "%Y-%m-%dT%H:%M:%S%.f")
963-
.or_else(|_| NaiveDateTime::parse_from_str(s.as_ref(), "%Y-%m-%d %H:%M:%S%.f"))
964-
.or_else(|_| NaiveDateTime::parse_from_str(s.as_ref(), "%Y-%m-%dT%H:%M:%S"))
965-
.or_else(|_| NaiveDateTime::parse_from_str(s.as_ref(), "%Y-%m-%dT%H:%M:%S%.fZ"))
966-
.or_else(|_| {
967-
NaiveDate::parse_from_str(s.as_ref(), "%Y-%m-%d").map(|date| {
968-
date.and_hms_opt(0, 0, 0).unwrap()
969-
})
970-
})
971-
.map_err(|e| {
972-
DataFusionError::Execution(format!(
973-
"Can't parse timestamp: '{}': {}",
974-
s, e
975-
))
976-
})?;
949+
let timestamp = parse_date_str(s.as_ref())?;
977950
// TODO switch parsing to microseconds
978951
if timestamp.and_utc().timestamp_millis() > (((1 as i64) << 62) / 1_000_000) {
979952
builder.append_null()?;

rust/cubesql/cubesql/src/compile/mod.rs

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ pub mod service;
1414
pub mod session;
1515

1616
// Internal API
17+
mod date_parser;
1718
pub mod test;
1819

1920
// Re-export for Public API
@@ -15541,6 +15542,47 @@ LIMIT {{ limit }}{% endif %}"#.to_string(),
1554115542
);
1554215543
}
1554315544

15545+
#[tokio::test]
15546+
async fn test_daterange_filter_literals() -> Result<(), CubeError> {
15547+
init_testing_logger();
15548+
15549+
let query_plan = convert_select_to_query_plan(
15550+
// language=PostgreSQL
15551+
r#"SELECT
15552+
DATE_TRUNC('month', order_date) AS order_date,
15553+
COUNT(*) AS month_count
15554+
FROM "KibanaSampleDataEcommerce" ecom
15555+
WHERE ecom.order_date >= '2025-01-01' and ecom.order_date < '2025-02-01'
15556+
GROUP BY 1"#
15557+
.to_string(),
15558+
DatabaseProtocol::PostgreSQL,
15559+
)
15560+
.await;
15561+
15562+
let logical_plan = query_plan.as_logical_plan();
15563+
assert_eq!(
15564+
logical_plan.find_cube_scan().request,
15565+
V1LoadRequestQuery {
15566+
measures: Some(vec!["KibanaSampleDataEcommerce.count".to_string()]),
15567+
segments: Some(vec![]),
15568+
dimensions: Some(vec![]),
15569+
time_dimensions: Some(vec![V1LoadRequestQueryTimeDimension {
15570+
dimension: "KibanaSampleDataEcommerce.order_date".to_owned(),
15571+
granularity: Some("month".to_string()),
15572+
date_range: Some(json!(vec![
15573+
// WHY NOT "2025-01-01T00:00:00.000Z".to_string(), ?
15574+
"2025-01-01".to_string(),
15575+
"2025-01-31T23:59:59.999Z".to_string()
15576+
])),
15577+
}]),
15578+
order: Some(vec![]),
15579+
..Default::default()
15580+
}
15581+
);
15582+
15583+
Ok(())
15584+
}
15585+
1554415586
#[tokio::test]
1554515587
async fn test_time_dimension_range_filter_chain_or() {
1554615588
init_testing_logger();
@@ -15584,7 +15626,7 @@ LIMIT {{ limit }}{% endif %}"#.to_string(),
1558415626
operator: Some("inDateRange".to_string()),
1558515627
values: Some(vec![
1558615628
"2019-01-01 00:00:00.0".to_string(),
15587-
"2020-01-01 00:00:00.0".to_string(),
15629+
"2019-12-31T23:59:59.999Z".to_string(),
1558815630
]),
1558915631
or: None,
1559015632
and: None,
@@ -15594,7 +15636,7 @@ LIMIT {{ limit }}{% endif %}"#.to_string(),
1559415636
operator: Some("inDateRange".to_string()),
1559515637
values: Some(vec![
1559615638
"2021-01-01 00:00:00.0".to_string(),
15597-
"2022-01-01 00:00:00.0".to_string(),
15639+
"2021-12-31T23:59:59.999Z".to_string(),
1559815640
]),
1559915641
or: None,
1560015642
and: None,

rust/cubesql/cubesql/src/compile/rewrite/rules/filters.rs

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use super::utils;
2+
use crate::compile::date_parser::parse_date_str;
23
use crate::{
34
compile::rewrite::{
45
alias_expr,
@@ -36,7 +37,7 @@ use chrono::{
3637
Numeric::{Day, Hour, Minute, Month, Second, Year},
3738
Pad::Zero,
3839
},
39-
DateTime, Datelike, Days, Duration, Months, NaiveDate, NaiveDateTime, Timelike, Weekday,
40+
DateTime, Datelike, Days, Duration, Months, NaiveDateTime, Timelike, Weekday,
4041
};
4142
use cubeclient::models::V1CubeMeta;
4243
use datafusion::{
@@ -4568,36 +4569,36 @@ impl FilterRules {
45684569
let date_range_start_op_var = date_range_start_op_var.parse().unwrap();
45694570
let date_range_end_op_var = date_range_end_op_var.parse().unwrap();
45704571
move |egraph, subst| {
4571-
fn resolve_time_delta(date_var: &String, op: &String) -> String {
4572+
fn resolve_time_delta(date_var: &String, op: &String) -> Option<String> {
45724573
if op == "afterDate" {
45734574
return increment_iso_timestamp_time(date_var);
45744575
} else if op == "beforeDate" {
45754576
return decrement_iso_timestamp_time(date_var);
45764577
} else {
4577-
return date_var.clone();
4578+
return Some(date_var.clone());
45784579
}
45794580
}
45804581

4581-
fn increment_iso_timestamp_time(date_var: &String) -> String {
4582-
let timestamp = NaiveDateTime::parse_from_str(date_var, "%Y-%m-%dT%H:%M:%S%.fZ");
4582+
fn increment_iso_timestamp_time(date_var: &String) -> Option<String> {
4583+
let timestamp = parse_date_str(date_var);
45834584
let value = match timestamp {
45844585
Ok(val) => format_iso_timestamp(
45854586
val.checked_add_signed(Duration::milliseconds(1)).unwrap(),
45864587
),
4587-
Err(_) => date_var.clone(),
4588+
Err(_) => return None,
45884589
};
4589-
return value;
4590+
return Some(value);
45904591
}
45914592

4592-
fn decrement_iso_timestamp_time(date_var: &String) -> String {
4593-
let timestamp = NaiveDateTime::parse_from_str(date_var, "%Y-%m-%dT%H:%M:%S%.fZ");
4593+
fn decrement_iso_timestamp_time(date_var: &String) -> Option<String> {
4594+
let timestamp = parse_date_str(date_var);
45944595
let value = match timestamp {
45954596
Ok(val) => format_iso_timestamp(
45964597
val.checked_sub_signed(Duration::milliseconds(1)).unwrap(),
45974598
),
4598-
Err(_) => date_var.clone(),
4599+
Err(_) => return None,
45994600
};
4600-
return value;
4601+
return Some(value);
46014602
}
46024603

46034604
for date_range_start in
@@ -4630,10 +4631,16 @@ impl FilterRules {
46304631
}
46314632

46324633
let mut result = Vec::new();
4633-
let resolved_start_date =
4634-
resolve_time_delta(&date_range_start[0], date_range_start_op);
4635-
let resolved_end_date =
4636-
resolve_time_delta(&date_range_end[0], date_range_end_op);
4634+
let Some(resolved_start_date) =
4635+
resolve_time_delta(&date_range_start[0], date_range_start_op)
4636+
else {
4637+
return false;
4638+
};
4639+
let Some(resolved_end_date) =
4640+
resolve_time_delta(&date_range_end[0], date_range_end_op)
4641+
else {
4642+
return false;
4643+
};
46374644

46384645
if swap_left_and_right {
46394646
result.extend(vec![resolved_end_date]);
@@ -5222,12 +5229,7 @@ impl FilterRules {
52225229
let Some(str) = str else {
52235230
return Some(None);
52245231
};
5225-
let dt = NaiveDateTime::parse_from_str(str, "%Y-%m-%d %H:%M:%S%.f")
5226-
.or_else(|_| NaiveDateTime::parse_from_str(str, "%Y-%m-%d %H:%M:%S"))
5227-
.or_else(|_| {
5228-
NaiveDate::parse_from_str(str, "%Y-%m-%d")
5229-
.map(|date| date.and_hms_opt(0, 0, 0).unwrap())
5230-
});
5232+
let dt = parse_date_str(str.as_str());
52315233
let Ok(dt) = dt else {
52325234
return None;
52335235
};

0 commit comments

Comments
 (0)