Skip to content

Commit 959e416

Browse files
authored
refactor: refine column statistic map serialization (#16728)
While serializing ColumnStatistics map, excludes columns which is of unsupported type
1 parent d376d8c commit 959e416

File tree

6 files changed

+39
-27
lines changed

6 files changed

+39
-27
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/query/storages/common/index/src/index.rs

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,5 @@
1515
use databend_common_expression::types::DataType;
1616

1717
pub trait Index {
18-
fn supported_type(data_type: &DataType) -> bool {
19-
// we support nullable column but Nulls are not added into the bloom filter.
20-
let inner_type = data_type.remove_nullable();
21-
matches!(
22-
inner_type,
23-
DataType::Number(_)
24-
| DataType::Date
25-
| DataType::Timestamp
26-
| DataType::String
27-
| DataType::Decimal(_)
28-
)
29-
}
18+
fn supported_type(data_type: &DataType) -> bool;
3019
}

src/query/storages/common/index/src/range_index.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,4 +247,8 @@ pub fn statistics_to_domain(mut stats: Vec<&ColumnStatistics>, data_type: &DataT
247247
}
248248
}
249249

250-
impl Index for RangeIndex {}
250+
impl Index for RangeIndex {
251+
fn supported_type(data_type: &DataType) -> bool {
252+
databend_storages_common_table_meta::meta::supported_stat_type(data_type)
253+
}
254+
}

src/query/storages/common/table_meta/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ databend-common-expression = { workspace = true }
2020
databend-common-io = { workspace = true }
2121
databend-common-storage = { path = "../../../../common/storage" }
2222
enum-as-inner = "0.5"
23+
log = { workspace = true }
2324
parquet = { workspace = true }
2425
rmp-serde = "1.1.1"
2526
serde = { workspace = true }

src/query/storages/common/table_meta/src/meta/statistics.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
use std::collections::HashMap;
1616

1717
use databend_common_base::base::uuid::Uuid;
18+
use databend_common_expression::types::DataType;
1819
use databend_common_expression::ColumnId;
1920

2021
use crate::meta::ColumnStatistics;
@@ -36,3 +37,15 @@ pub struct BlockSlotDescription {
3637
// otherwise, the block should be taken care of by other executors
3738
pub slot: u32,
3839
}
40+
41+
pub fn supported_stat_type(data_type: &DataType) -> bool {
42+
let inner_type = data_type.remove_nullable();
43+
matches!(
44+
inner_type,
45+
DataType::Number(_)
46+
| DataType::Date
47+
| DataType::Timestamp
48+
| DataType::String
49+
| DataType::Decimal(_)
50+
)
51+
}

src/query/storages/common/table_meta/src/meta/v2/statistics.rs

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414

1515
use std::collections::HashMap;
1616
use std::fmt;
17-
use std::hash::Hash;
1817
use std::marker::PhantomData;
1918

2019
use databend_common_expression::converts::datavalues::from_scalar;
@@ -24,8 +23,10 @@ use databend_common_expression::ColumnId;
2423
use databend_common_expression::Scalar;
2524
use databend_common_expression::TableDataType;
2625
use databend_common_expression::TableField;
26+
use log::info;
2727
use serde::de::Error;
2828

29+
use crate::meta::supported_stat_type;
2930
use crate::meta::v0;
3031

3132
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone, PartialEq, Eq)]
@@ -367,25 +368,20 @@ pub fn deserialize_col_stats<'de, D>(
367368
where D: serde::Deserializer<'de> {
368369
deserializer.deserialize_map(ColStatsVisitor::new())
369370
}
370-
371-
struct ColStatsVisitor<K, V> {
372-
marker: PhantomData<fn() -> HashMap<K, V>>,
371+
struct ColStatsVisitor {
372+
marker: PhantomData<fn() -> HashMap<ColumnId, ColumnStatistics>>,
373373
}
374374

375-
impl<K, V> ColStatsVisitor<K, V> {
375+
impl ColStatsVisitor {
376376
fn new() -> Self {
377377
ColStatsVisitor {
378378
marker: PhantomData,
379379
}
380380
}
381381
}
382382

383-
impl<'de, K, V> serde::de::Visitor<'de> for ColStatsVisitor<K, V>
384-
where
385-
K: serde::Deserialize<'de> + Hash + Eq,
386-
V: serde::Deserialize<'de>,
387-
{
388-
type Value = HashMap<K, V>;
383+
impl<'de> serde::de::Visitor<'de> for ColStatsVisitor {
384+
type Value = HashMap<ColumnId, ColumnStatistics>;
389385

390386
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
391387
formatter.write_str("a map")
@@ -395,9 +391,17 @@ where
395391
where M: serde::de::MapAccess<'de> {
396392
let mut map = HashMap::with_capacity(access.size_hint().unwrap_or(0));
397393

398-
while let Some(key) = access.next_key()? {
399-
if let Ok(value) = access.next_value() {
400-
map.insert(key, value);
394+
while let Some(key) = access.next_key::<ColumnId>()? {
395+
if let Ok(value) = access.next_value::<ColumnStatistics>() {
396+
let data_type = value.max.as_ref().infer_data_type();
397+
if supported_stat_type(&data_type) {
398+
map.insert(key, value);
399+
} else {
400+
info!(
401+
"column of id {} is excluded from column statistics, unsupported data type {}",
402+
key, data_type
403+
);
404+
}
401405
}
402406
}
403407

0 commit comments

Comments
 (0)