databendlabs
diff --git a/‎src/query/storages/parquet/src/parquet_rs/statistics/column.rs
Lines changed: 23 additions & 5 deletions b/‎src/query/storages/parquet/src/parquet_rs/statistics/column.rs
Lines changed: 23 additions & 5 deletions
diff --git a/‎tests/data/parquet/timestamp/gen.py
Lines changed: 24 additions & 0 deletions b/‎tests/data/parquet/timestamp/gen.py
Lines changed: 24 additions & 0 deletions
diff --git a/‎tests/data/parquet/timestamp/timestamp_ms.parquet
3.07 KB b/‎tests/data/parquet/timestamp/timestamp_ms.parquet
3.07 KB
diff --git a/‎tests/data/parquet/timestamp/timestamp_ns.parquet
3.07 KB b/‎tests/data/parquet/timestamp/timestamp_ns.parquet
3.07 KB
diff --git a/‎tests/data/parquet/timestamp/timestamp_s.parquet
3.05 KB b/‎tests/data/parquet/timestamp/timestamp_s.parquet
3.05 KB
diff --git a/‎tests/data/parquet/timestamp/timestamp_us.parquet
3.07 KB b/‎tests/data/parquet/timestamp/timestamp_us.parquet
3.07 KB
diff --git a/‎tests/sqllogictests/suites/stage_parquet/timestamp
Lines changed: 31 additions & 0 deletions b/‎tests/sqllogictests/suites/stage_parquet/timestamp
Lines changed: 31 additions & 0 deletions
diff --git a/‎tests/suites/1_stateful/08_select_stage/08_00_parquet/08_00_00_basic.sh
Lines changed: 1 addition & 1 deletion b/‎tests/suites/1_stateful/08_select_stage/08_00_parquet/08_00_00_basic.sh
Lines changed: 1 addition & 1 deletion
@@ -79,7 +79,17 @@ pub fn convert_column_statistics(s: &Statistics, typ: &TableDataType) -> ColumnS
                     TableDataType::Number(NumberDataType::Int64) => {
                         (Scalar::from(max), Scalar::from(min))
                     }
-                    TableDataType::Timestamp => (Scalar::Timestamp(max), Scalar::Timestamp(min)),
+                    TableDataType::Timestamp => {
+                        let multi = match max.checked_ilog10().unwrap_or_default() + 1 {
+                            0..=10 => 1_000_000,
+                            11..=13 => 1_000,
+                            _ => 1,
+                        };
+                        (
+                            Scalar::Timestamp(max * multi),
+                            Scalar::Timestamp(min * multi),
+                        )
+                    }
                     TableDataType::Decimal(DecimalDataType::Decimal128(size)) => (
                         Scalar::Decimal(DecimalScalar::Decimal128(i128::from(max), *size)),
                         Scalar::Decimal(DecimalScalar::Decimal128(i128::from(min), *size)),
@@ -91,10 +101,18 @@ pub fn convert_column_statistics(s: &Statistics, typ: &TableDataType) -> ColumnS
                     _ => (Scalar::Null, Scalar::Null),
                 }
             }
-            Statistics::Int96(s) => (
-                Scalar::Timestamp(s.max().to_i64()),
-                Scalar::Timestamp(s.min().to_i64()),
-            ),
+            Statistics::Int96(s) => {
+                let (max, min) = (s.max().to_i64(), s.min().to_i64());
+                let multi = match max.checked_ilog10().unwrap_or_default() + 1 {
+                    0..=10 => 1_000_000,
+                    11..=13 => 1_000,
+                    _ => 1,
+                };
+                (
+                    Scalar::Timestamp(max * multi),
+                    Scalar::Timestamp(min * multi),
+                )
+            }
             Statistics::Float(s) => (Scalar::from(*s.max()), Scalar::from(*s.min())),
             Statistics::Double(s) => (Scalar::from(*s.max()), Scalar::from(*s.min())),
             Statistics::ByteArray(s) => (
 
@@ -0,0 +1,24 @@
+import pandas as pd
+import pyarrow as pa
+import pyarrow.parquet as pq
+from datetime import datetime
+
+names = ['s', "ms",  "us", 'ns']  # small, large, small, large
+for i in range(len(names)):
+    num_row = 300
+    timestamps = [
+        '2023-10-13T10:00:00',
+        '2023-10-14T11:00:00',
+        '2023-10-15T12:00:00',
+        '2023-10-16T12:00:00',
+    ] * num_row
+    datetime_objects = [datetime.fromisoformat(timestamp) for timestamp in timestamps]
+    timestamp_array = pa.array(datetime_objects, type = pa.timestamp(f"{names[i]}"))
+
+    table = pa.Table.from_arrays([timestamp_array], ['col_timestamp'])
+
+    pq.write_table(
+        table,
+        f"timestamp_{names[i]}.parquet",
+        row_group_size= num_row/2,
+    )
@@ -0,0 +1,31 @@
+query TI
+select col_timestamp, count() from  @data/parquet/timestamp/timestamp_s.parquet where col_timestamp between '2023-10-13' and '2023-10-18' group by col_timestamp  order by col_timestamp
+----
+2023-10-13 10:00:00.000000	300
+2023-10-14 11:00:00.000000	300
+2023-10-15 12:00:00.000000	300
+2023-10-16 12:00:00.000000	300
+
+query TI
+select col_timestamp, count() from  @data/parquet/timestamp/timestamp_ms.parquet where col_timestamp between '2023-10-13' and '2023-10-18' group by col_timestamp order by col_timestamp
+----
+2023-10-13 10:00:00.000000	300
+2023-10-14 11:00:00.000000	300
+2023-10-15 12:00:00.000000	300
+2023-10-16 12:00:00.000000	300
+
+query TI
+select col_timestamp, count() from  @data/parquet/timestamp/timestamp_us.parquet where col_timestamp between '2023-10-13' and '2023-10-18' group by col_timestamp order by col_timestamp
+----
+2023-10-13 10:00:00.000000	300
+2023-10-14 11:00:00.000000	300
+2023-10-15 12:00:00.000000	300
+2023-10-16 12:00:00.000000	300
+
+query TI
+select col_timestamp, count() from  @data/parquet/timestamp/timestamp_ns.parquet where col_timestamp between '2023-10-13' and '2023-10-18' group by col_timestamp order by col_timestamp
+----
+2023-10-13 10:00:00.000000	300
+2023-10-14 11:00:00.000000	300
+2023-10-15 12:00:00.000000	300
+2023-10-16 12:00:00.000000	300
@@ -37,4 +37,4 @@ echo "set use_parquet2 = ${USE_PARQUET2} ; select * from @s3 (FILE_FORMAT => 'PA
 
 rm -rf ${DATADIR_PATH}
 
-done
+done
Original file line number	Diff line number	Diff line change
`@@ -37,4 +37,4 @@ echo "set use_parquet2 = ${USE_PARQUET2} ; select * from @s3 (FILE_FORMAT => 'PA`
`37`	`37`
`38`	`38`	`rm -rf ${DATADIR_PATH}`
`39`	`39`
`40`		`-done`
	`40`	`+done`