From 4be8de3a45d6a4ca25230840fae73002e58798e8 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Fri, 13 Jun 2025 15:29:01 +0800 Subject: [PATCH] refactor(fuse): optimize ExprBloomFilter to use references instead of clones This change modifies the ExprBloomFilter implementation to accept a reference to BinaryFuse16 instead of taking ownership of the filter. This optimization eliminates unnecessary cloning when applying runtime bloom filters during data deserialization in data sources. Key changes: - Add lifetime parameter to ExprBloomFilter struct - Change filter field from owned BinaryFuse16 to a reference - Update constructor to accept and store references - Remove redundant clone() calls in native and parquet deserializers --- .../operations/read/native_data_source_deserializer.rs | 2 +- .../operations/read/parquet_data_source_deserializer.rs | 2 +- src/query/storages/fuse/src/pruning/expr_bloom_filter.rs | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/query/storages/fuse/src/operations/read/native_data_source_deserializer.rs b/src/query/storages/fuse/src/operations/read/native_data_source_deserializer.rs index d4f525c5a8981..151b26b9378db 100644 --- a/src/query/storages/fuse/src/operations/read/native_data_source_deserializer.rs +++ b/src/query/storages/fuse/src/operations/read/native_data_source_deserializer.rs @@ -652,7 +652,7 @@ impl NativeDeserializeDataTransform { let mut bitmap = MutableBitmap::from_len_zeroed(probe_block.num_rows()); let probe_column = probe_block.get_last_column().clone(); // Apply the filter to the probe column. - ExprBloomFilter::new(filter.clone()).apply(probe_column, &mut bitmap)?; + ExprBloomFilter::new(filter).apply(probe_column, &mut bitmap)?; let unset_bits = bitmap.null_count(); if unset_bits == bitmap.len() { diff --git a/src/query/storages/fuse/src/operations/read/parquet_data_source_deserializer.rs b/src/query/storages/fuse/src/operations/read/parquet_data_source_deserializer.rs index 1add3998f2b4c..c7b16471bd52f 100644 --- a/src/query/storages/fuse/src/operations/read/parquet_data_source_deserializer.rs +++ b/src/query/storages/fuse/src/operations/read/parquet_data_source_deserializer.rs @@ -167,7 +167,7 @@ impl DeserializeDataTransform { .convert_to_full_column(&probe_block_entry.data_type, data_block.num_rows()); // Apply bloom filter - ExprBloomFilter::new(filter.clone()).apply(probe_column, &mut bitmap)?; + ExprBloomFilter::new(filter).apply(probe_column, &mut bitmap)?; bitmaps.push(bitmap); } if !bitmaps.is_empty() { diff --git a/src/query/storages/fuse/src/pruning/expr_bloom_filter.rs b/src/query/storages/fuse/src/pruning/expr_bloom_filter.rs index 161e52d8e1a01..c32de93de81e8 100644 --- a/src/query/storages/fuse/src/pruning/expr_bloom_filter.rs +++ b/src/query/storages/fuse/src/pruning/expr_bloom_filter.rs @@ -27,13 +27,13 @@ use xorf::BinaryFuse16; use xorf::Filter; /// Bloom filter for runtime filtering of data rows. -pub struct ExprBloomFilter { - filter: BinaryFuse16, +pub struct ExprBloomFilter<'a> { + filter: &'a BinaryFuse16, } -impl ExprBloomFilter { +impl<'a> ExprBloomFilter<'a> { /// Create a new bloom filter. - pub fn new(filter: BinaryFuse16) -> Self { + pub fn new(filter: &'a BinaryFuse16) -> ExprBloomFilter<'a> { Self { filter } }