Skip to content

Commit 84ea90b

Browse files
committed
support explain display vetor pruning, add write logs
1 parent 889a7dc commit 84ea90b

File tree

13 files changed

+150
-14
lines changed

13 files changed

+150
-14
lines changed

Cargo.lock

Lines changed: 0 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/common/metrics/src/metrics/storage.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,14 @@ static BYTES_BLOCK_INVERTED_INDEX_PRUNING_BEFORE: LazyLock<Counter> =
254254
LazyLock::new(|| register_counter("fuse_bytes_block_inverted_index_pruning_before"));
255255
static BYTES_BLOCK_INVERTED_INDEX_PRUNING_AFTER: LazyLock<Counter> =
256256
LazyLock::new(|| register_counter("fuse_bytes_block_inverted_index_pruning_after"));
257+
static BLOCKS_VECTOR_INDEX_PRUNING_BEFORE: LazyLock<Counter> =
258+
LazyLock::new(|| register_counter("fuse_blocks_vector_index_pruning_before"));
259+
static BLOCKS_VECTOR_INDEX_PRUNING_AFTER: LazyLock<Counter> =
260+
LazyLock::new(|| register_counter("fuse_blocks_vector_index_pruning_after"));
261+
static BYTES_BLOCK_VECTOR_INDEX_PRUNING_BEFORE: LazyLock<Counter> =
262+
LazyLock::new(|| register_counter("fuse_bytes_block_vector_index_pruning_before"));
263+
static BYTES_BLOCK_VECTOR_INDEX_PRUNING_AFTER: LazyLock<Counter> =
264+
LazyLock::new(|| register_counter("fuse_bytes_block_vector_index_pruning_after"));
257265
static PRUNING_PREWHERE_NUMS: LazyLock<Counter> =
258266
LazyLock::new(|| register_counter("fuse_pruning_prewhere_nums"));
259267
static PRUNING_MILLISECONDS: LazyLock<Histogram> =
@@ -716,6 +724,22 @@ pub fn metrics_inc_bytes_block_inverted_index_pruning_after(c: u64) {
716724
BYTES_BLOCK_INVERTED_INDEX_PRUNING_AFTER.inc_by(c);
717725
}
718726

727+
pub fn metrics_inc_blocks_vector_index_pruning_before(c: u64) {
728+
BLOCKS_VECTOR_INDEX_PRUNING_BEFORE.inc_by(c);
729+
}
730+
731+
pub fn metrics_inc_blocks_vector_index_pruning_after(c: u64) {
732+
BLOCKS_VECTOR_INDEX_PRUNING_AFTER.inc_by(c);
733+
}
734+
735+
pub fn metrics_inc_bytes_block_vector_index_pruning_before(c: u64) {
736+
BYTES_BLOCK_VECTOR_INDEX_PRUNING_BEFORE.inc_by(c);
737+
}
738+
739+
pub fn metrics_inc_bytes_block_vector_index_pruning_after(c: u64) {
740+
BYTES_BLOCK_VECTOR_INDEX_PRUNING_AFTER.inc_by(c);
741+
}
742+
719743
pub fn metrics_inc_pruning_prewhere_nums(c: u64) {
720744
PRUNING_PREWHERE_NUMS.inc_by(c);
721745
}

src/query/catalog/src/plan/pruning_statistics.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@ pub struct PruningStatistics {
2929
/// Block inverted index filter pruning stats.
3030
pub blocks_inverted_index_pruning_before: usize,
3131
pub blocks_inverted_index_pruning_after: usize,
32+
33+
/// Block vector index filter pruning stats.
34+
pub blocks_vector_index_pruning_before: usize,
35+
pub blocks_vector_index_pruning_after: usize,
3236
}
3337

3438
impl PruningStatistics {
@@ -41,5 +45,7 @@ impl PruningStatistics {
4145
self.blocks_bloom_pruning_after += other.blocks_bloom_pruning_after;
4246
self.blocks_inverted_index_pruning_before += other.blocks_inverted_index_pruning_before;
4347
self.blocks_inverted_index_pruning_after += other.blocks_inverted_index_pruning_after;
48+
self.blocks_vector_index_pruning_before += other.blocks_vector_index_pruning_before;
49+
self.blocks_vector_index_pruning_after += other.blocks_vector_index_pruning_after;
4450
}
4551
}

src/query/expression/src/evaluator.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -921,11 +921,13 @@ impl<'a> Evaluator<'a> {
921921
}
922922
}
923923
(DataType::Array(inner_src_ty), DataType::Vector(inner_dest_ty)) => {
924-
if !matches!(&**inner_src_ty, DataType::Number(_) | DataType::Decimal(_))
925-
|| matches!(inner_dest_ty, VectorDataType::Int8(_))
924+
if !matches!(
925+
inner_src_ty.remove_nullable(),
926+
DataType::Number(_) | DataType::Decimal(_)
927+
) || matches!(inner_dest_ty, VectorDataType::Int8(_))
926928
{
927929
return Err(ErrorCode::BadArguments(format!(
928-
"unable to cast type `{src_type}` to type `{dest_type}`"
930+
"unable to cast type `{src_type}` to vector type `{dest_type}`"
929931
))
930932
.set_span(span));
931933
}
@@ -971,6 +973,7 @@ impl<'a> Evaluator<'a> {
971973
)
972974
.set_span(span));
973975
}
976+
let col = col.remove_nullable();
974977
match col {
975978
Column::Number(num_col) => {
976979
for i in 0..dimension {

src/query/expression/src/type_check.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -639,7 +639,10 @@ fn can_cast_to(src_ty: &DataType, dest_ty: &DataType) -> bool {
639639
true
640640
}
641641
(DataType::Array(fields_src_ty), DataType::Vector(_))
642-
if matches!(&**fields_src_ty, DataType::Number(_) | DataType::Decimal(_)) =>
642+
if matches!(
643+
fields_src_ty.remove_nullable(),
644+
DataType::Number(_) | DataType::Decimal(_)
645+
) =>
643646
{
644647
true
645648
}

src/query/sql/src/executor/format.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1734,6 +1734,18 @@ fn part_stats_info_to_format_tree(info: &PartStatistics) -> Vec<FormatTreeNode<S
17341734
);
17351735
}
17361736

1737+
// vector index pruning status.
1738+
if info.pruning_stats.blocks_vector_index_pruning_before > 0 {
1739+
if !blocks_pruning_description.is_empty() {
1740+
blocks_pruning_description += ", ";
1741+
}
1742+
blocks_pruning_description += &format!(
1743+
"vector pruning: {} to {}",
1744+
info.pruning_stats.blocks_vector_index_pruning_before,
1745+
info.pruning_stats.blocks_vector_index_pruning_after
1746+
);
1747+
}
1748+
17371749
// Combine segment pruning and blocks pruning descriptions if any
17381750
if info.pruning_stats.segments_range_pruning_before > 0
17391751
|| !blocks_pruning_description.is_empty()

src/query/storages/common/index/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ databend-common-ast = { workspace = true }
1414
databend-common-exception = { workspace = true }
1515
databend-common-expression = { workspace = true }
1616
databend-common-functions = { workspace = true }
17-
databend-common-vector = { workspace = true }
1817
databend-storages-common-table-meta = { workspace = true }
1918

2019
anyerror = { workspace = true }

src/query/storages/fuse/src/io/write/vector_index_writer.rs

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,11 @@ use databend_storages_common_index::DistanceType;
3636
use databend_storages_common_index::HNSWIndex;
3737
use databend_storages_common_table_meta::meta::Location;
3838
use databend_storages_common_table_meta::table::TableCompression;
39+
use log::debug;
40+
use log::info;
41+
42+
const DEFAULT_M: usize = 16;
43+
const DEFAULT_EF_CONSTRUCT: usize = 100;
3944

4045
#[derive(Debug, Clone)]
4146
pub struct VectorIndexState {
@@ -70,6 +75,11 @@ impl VectorIndexBuilder {
7075
table_indexes: &BTreeMap<String, TableIndex>,
7176
schema: TableSchemaRef,
7277
) -> Option<VectorIndexBuilder> {
78+
info!(
79+
"Starting vector index creation with {} table indexes",
80+
table_indexes.len()
81+
);
82+
7383
LicenseManagerSwitch::instance()
7484
.check_enterprise_enabled(ctx.get_license_key(), Feature::VectorIndex)
7585
.ok()?;
@@ -86,6 +96,7 @@ impl VectorIndexBuilder {
8696
continue;
8797
}
8898

99+
info!("Processing vector index: {}", index.name);
89100
let mut offsets = Vec::with_capacity(index.column_ids.len());
90101
for column_id in &index.column_ids {
91102
for (offset, field) in schema.fields.iter().enumerate() {
@@ -97,6 +108,10 @@ impl VectorIndexBuilder {
97108
}
98109
// ignore invalid index
99110
if offsets.len() != index.column_ids.len() {
111+
debug!(
112+
"Ignoring invalid vector index: {}, missing columns",
113+
index.name
114+
);
100115
continue;
101116
}
102117
for (offset, _) in &offsets {
@@ -106,13 +121,13 @@ impl VectorIndexBuilder {
106121

107122
// Parse index parameters
108123
let m = match index.options.get("m") {
109-
Some(value) => value.parse::<usize>().unwrap_or(16),
110-
None => 16,
124+
Some(value) => value.parse::<usize>().unwrap_or(DEFAULT_M),
125+
None => DEFAULT_M,
111126
};
112127

113128
let ef_construct = match index.options.get("ef_construct") {
114-
Some(value) => value.parse::<usize>().unwrap_or(64),
115-
None => 64,
129+
Some(value) => value.parse::<usize>().unwrap_or(DEFAULT_EF_CONSTRUCT),
130+
None => DEFAULT_EF_CONSTRUCT,
116131
};
117132

118133
let mut distances = Vec::new();
@@ -132,8 +147,16 @@ impl VectorIndexBuilder {
132147
None => continue,
133148
};
134149
if distances.is_empty() {
150+
debug!(
151+
"Ignoring vector index: {}, no valid distance types",
152+
index.name
153+
);
135154
continue;
136155
}
156+
info!(
157+
"Added vector index parameters for {}: m={}, ef_construct={}, distances={:?}",
158+
index.name, m, ef_construct, distances
159+
);
137160
let index_param = VectorIndexParam {
138161
index_name: index.name.clone(),
139162
index_version: index.version.clone(),
@@ -162,6 +185,11 @@ impl VectorIndexBuilder {
162185
}
163186

164187
pub fn add_block(&mut self, block: &DataBlock) -> Result<()> {
188+
info!(
189+
"Adding block with {} rows to vector index",
190+
block.num_rows()
191+
);
192+
165193
for offset in &self.field_offsets_set {
166194
let block_entry = block.get_by_offset(*offset);
167195
let column = block_entry.to_column();
@@ -178,6 +206,7 @@ impl VectorIndexBuilder {
178206
#[async_backtrace::framed]
179207
pub fn finalize(&mut self, location: &Location) -> Result<VectorIndexState> {
180208
let start = Instant::now();
209+
info!("Start build vector HNSW index for location: {}", location.0);
181210

182211
let mut columns = BTreeMap::new();
183212
for offset in &self.field_offsets_set {
@@ -196,6 +225,7 @@ impl VectorIndexBuilder {
196225
let mut metadata = BTreeMap::new();
197226

198227
for (field_offsets, index_param) in self.field_offsets.iter().zip(&self.index_params) {
228+
debug!("Building HNSW index for {}", index_param.index_name);
199229
for (offset, column_id) in field_offsets {
200230
let Some(column) = concated_columns.get(offset) else {
201231
return Err(ErrorCode::Internal("Can't find vector column"));
@@ -238,9 +268,14 @@ impl VectorIndexBuilder {
238268
};
239269

240270
// Perf.
271+
let elapsed_ms = start.elapsed().as_millis() as u64;
241272
{
242-
metrics_inc_block_vector_index_generate_milliseconds(start.elapsed().as_millis() as u64);
273+
metrics_inc_block_vector_index_generate_milliseconds(elapsed_ms);
243274
}
275+
info!(
276+
"Finish build vector HNSW index: location={}, size={} bytes in {} ms",
277+
location.0, size, elapsed_ms
278+
);
244279

245280
Ok(state)
246281
}

src/query/storages/fuse/src/pruning/fuse_pruner.rs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ use databend_common_expression::RemoteExpr;
2626
use databend_common_expression::TableSchemaRef;
2727
use databend_common_expression::SEGMENT_NAME_COL_NAME;
2828
use databend_common_functions::BUILTIN_FUNCTIONS;
29+
use databend_common_metrics::storage::metrics_inc_blocks_vector_index_pruning_after;
30+
use databend_common_metrics::storage::metrics_inc_blocks_vector_index_pruning_before;
31+
use databend_common_metrics::storage::metrics_inc_bytes_block_vector_index_pruning_after;
32+
use databend_common_metrics::storage::metrics_inc_bytes_block_vector_index_pruning_before;
2933
use databend_common_sql::BloomIndexColumns;
3034
use databend_common_sql::DefaultExprBinder;
3135
use databend_storages_common_cache::CacheAccessor;
@@ -570,7 +574,27 @@ impl FusePruner {
570574
sort,
571575
limit,
572576
)?;
577+
578+
// Perf.
579+
{
580+
let block_size = metas.iter().map(|(_, m)| m.block_size).sum();
581+
metrics_inc_blocks_vector_index_pruning_before(metas.len() as u64);
582+
metrics_inc_bytes_block_vector_index_pruning_before(block_size);
583+
self.pruning_ctx
584+
.pruning_stats
585+
.set_blocks_vector_index_pruning_before(metas.len() as u64);
586+
}
573587
let pruned_metas = vector_pruner.prune(metas.clone()).await?;
588+
589+
// Perf.
590+
{
591+
let block_size = pruned_metas.iter().map(|(_, m)| m.block_size).sum();
592+
metrics_inc_blocks_vector_index_pruning_after(pruned_metas.len() as u64);
593+
metrics_inc_bytes_block_vector_index_pruning_after(block_size);
594+
self.pruning_ctx
595+
.pruning_stats
596+
.set_blocks_vector_index_pruning_after(pruned_metas.len() as u64);
597+
}
574598
return Ok(pruned_metas);
575599
}
576600
Ok(metas)
@@ -594,6 +618,11 @@ impl FusePruner {
594618
let blocks_inverted_index_pruning_after =
595619
stats.get_blocks_inverted_index_pruning_after() as usize;
596620

621+
let blocks_vector_index_pruning_before =
622+
stats.get_blocks_vector_index_pruning_before() as usize;
623+
let blocks_vector_index_pruning_after =
624+
stats.get_blocks_vector_index_pruning_after() as usize;
625+
597626
databend_common_catalog::plan::PruningStatistics {
598627
segments_range_pruning_before,
599628
segments_range_pruning_after,
@@ -603,6 +632,8 @@ impl FusePruner {
603632
blocks_bloom_pruning_after,
604633
blocks_inverted_index_pruning_before,
605634
blocks_inverted_index_pruning_after,
635+
blocks_vector_index_pruning_before,
636+
blocks_vector_index_pruning_after,
606637
}
607638
}
608639

src/query/storages/fuse/src/pruning/pruning_statistics.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@ pub struct FusePruningStatistics {
3232
/// Block inverted index filter pruning stats.
3333
pub blocks_inverted_index_pruning_before: AtomicU64,
3434
pub blocks_inverted_index_pruning_after: AtomicU64,
35+
36+
/// Block vector index filter pruning stats.
37+
pub blocks_vector_index_pruning_before: AtomicU64,
38+
pub blocks_vector_index_pruning_after: AtomicU64,
3539
}
3640

3741
impl FusePruningStatistics {
@@ -108,4 +112,24 @@ impl FusePruningStatistics {
108112
self.blocks_inverted_index_pruning_after
109113
.load(Ordering::Relaxed)
110114
}
115+
116+
pub fn set_blocks_vector_index_pruning_before(&self, v: u64) {
117+
self.blocks_vector_index_pruning_before
118+
.fetch_add(v, Ordering::Relaxed);
119+
}
120+
121+
pub fn get_blocks_vector_index_pruning_before(&self) -> u64 {
122+
self.blocks_vector_index_pruning_before
123+
.load(Ordering::Relaxed)
124+
}
125+
126+
pub fn set_blocks_vector_index_pruning_after(&self, v: u64) {
127+
self.blocks_vector_index_pruning_after
128+
.fetch_add(v, Ordering::Relaxed);
129+
}
130+
131+
pub fn get_blocks_vector_index_pruning_after(&self) -> u64 {
132+
self.blocks_vector_index_pruning_after
133+
.load(Ordering::Relaxed)
134+
}
111135
}

0 commit comments

Comments
 (0)