From 96d16a07b65fd319c8c9b77db6ca95a3cc9f6384 Mon Sep 17 00:00:00 2001 From: baishen Date: Tue, 3 Jun 2025 17:02:18 +0800 Subject: [PATCH 1/4] feat(query): Implement Vector Index with HNSW Algorithm --- Cargo.lock | 45 +- Cargo.toml | 6 +- src/common/metrics/src/metrics/storage.rs | 50 +- src/common/vector/src/distance.rs | 80 +- src/common/vector/src/lib.rs | 2 + src/query/catalog/src/plan/internal_column.rs | 22 + src/query/catalog/src/plan/pushdown.rs | 24 + src/query/config/src/config.rs | 30 + src/query/config/src/inner.rs | 12 + src/query/ee/tests/it/main.rs | 1 + src/query/ee/tests/it/vector_index/mod.rs | 15 + src/query/ee/tests/it/vector_index/pruning.rs | 757 ++++++++++++++++++ src/query/expression/src/evaluator.rs | 83 ++ src/query/expression/src/schema.rs | 12 +- src/query/expression/src/type_check.rs | 6 +- src/query/expression/src/types/decimal.rs | 6 + src/query/expression/src/types/number.rs | 8 + src/query/formats/src/field_decoder/nested.rs | 17 +- src/query/functions/src/scalars/vector.rs | 204 ++++- .../it/scalars/testdata/function_list.txt | 7 + .../tests/it/scalars/testdata/vector.txt | 190 ++++- .../functions/tests/it/scalars/vector.rs | 61 +- .../service/src/test_kits/block_writer.rs | 2 + .../it/storages/fuse/bloom_index_meta_size.rs | 2 + .../fuse/operations/internal_column.rs | 1 + .../operations/mutation/recluster_mutator.rs | 2 + .../mutation/segments_compact_mutator.rs | 2 + .../it/storages/fuse/operations/read_plan.rs | 2 + .../tests/it/storages/fuse/statistics.rs | 2 + .../storages/testdata/configs_table_basic.txt | 3 + .../physical_plans/physical_table_scan.rs | 1 + .../sql/src/planner/binder/bind_context.rs | 5 + .../bind_mutation/mutation_expression.rs | 2 +- src/query/sql/src/planner/binder/binder.rs | 9 +- src/query/sql/src/planner/binder/ddl/index.rs | 37 + .../planner/binder/internal_column_factory.rs | 6 + src/query/sql/src/planner/binder/table.rs | 1 + .../src/planner/optimizer/ir/expr/s_expr.rs | 15 +- src/query/sql/src/planner/plans/scan.rs | 3 + .../sql/src/planner/semantic/type_check.rs | 197 +++++ .../storages/common/cache/src/cache_items.rs | 2 + src/query/storages/common/cache/src/caches.rs | 35 + .../storages/common/cache/src/manager.rs | 47 ++ src/query/storages/common/index/Cargo.toml | 14 + .../index/src/hnsw_index/common/bitpacking.rs | 407 ++++++++++ .../src/hnsw_index/common/bitpacking_links.rs | 192 +++++ .../hnsw_index/common/bitpacking_ordered.rs | 311 +++++++ .../common/fixed_length_priority_queue.rs | 112 +++ .../common/index/src/hnsw_index/common/mod.rs | 22 + .../index/src/hnsw_index/common/types.rs | 48 ++ .../index/src/hnsw_index/common/utils.rs | 31 + .../index/src/hnsw_index/common/zeros.rs | 33 + .../index/src/hnsw_index/entry_points.rs | 162 ++++ .../index/src/hnsw_index/graph_layers.rs | 291 +++++++ .../src/hnsw_index/graph_layers_builder.rs | 571 +++++++++++++ .../index/src/hnsw_index/graph_links.rs | 130 +++ .../src/hnsw_index/graph_links/header.rs | 52 ++ .../src/hnsw_index/graph_links/serializer.rs | 233 ++++++ .../index/src/hnsw_index/graph_links/view.rs | 189 +++++ .../common/index/src/hnsw_index/hnsw.rs | 374 +++++++++ .../common/index/src/hnsw_index/mod.rs | 91 +++ .../index/src/hnsw_index/point_scorer.rs | 174 ++++ .../quantization/encoded_storage.rs | 68 ++ .../quantization/encoded_vectors.rs | 96 +++ .../quantization/encoded_vectors_u8.rs | 364 +++++++++ .../index/src/hnsw_index/quantization/mod.rs | 24 + .../src/hnsw_index/quantization/quantile.rs | 97 +++ .../index/src/hnsw_index/search_context.rs | 61 ++ .../index/src/hnsw_index/visited_pool.rs | 180 +++++ src/query/storages/common/index/src/lib.rs | 7 + .../storages/common/pruner/src/block_meta.rs | 2 + src/query/storages/common/pruner/src/lib.rs | 2 +- .../storages/common/pruner/src/topn_pruner.rs | 6 +- .../common/table_meta/src/meta/v2/segment.rs | 10 + .../src/meta/v3/frozen/block_meta.rs | 2 + src/query/storages/fuse/src/constants.rs | 1 + src/query/storages/fuse/src/fuse_table.rs | 4 +- src/query/storages/fuse/src/io/locations.rs | 21 + src/query/storages/fuse/src/io/mod.rs | 1 + .../fuse/src/io/read/meta/meta_readers.rs | 27 + src/query/storages/fuse/src/io/read/mod.rs | 2 + .../fuse/src/io/read/vector_index/mod.rs | 18 + .../read/vector_index/vector_index_loader.rs | 224 ++++++ .../read/vector_index/vector_index_reader.rs | 112 +++ .../fuse/src/io/write/block_writer.rs | 38 + src/query/storages/fuse/src/io/write/mod.rs | 3 + .../fuse/src/io/write/stream/block_builder.rs | 30 +- .../fuse/src/io/write/vector_index_writer.rs | 247 ++++++ .../processors/transform_serialize_block.rs | 7 + .../storages/fuse/src/operations/merge.rs | 7 + .../storages/fuse/src/operations/read/util.rs | 1 + .../fuse/src/operations/read_partitions.rs | 14 +- .../storages/fuse/src/pruning/block_pruner.rs | 2 + .../storages/fuse/src/pruning/fuse_pruner.rs | 43 +- src/query/storages/fuse/src/pruning/mod.rs | 2 + .../fuse/src/pruning/vector_index_pruner.rs | 230 ++++++ .../column_oriented_block_prune.rs | 1 + .../pruning_pipeline/topn_prune_transform.rs | 6 +- .../parquet_reader/reader/row_group_reader.rs | 1 + .../09_0000_vector_index_base.test | 157 +++- .../functions/02_0063_function_vector.test | 30 + 101 files changed, 7486 insertions(+), 113 deletions(-) create mode 100644 src/query/ee/tests/it/vector_index/mod.rs create mode 100644 src/query/ee/tests/it/vector_index/pruning.rs create mode 100644 src/query/storages/common/index/src/hnsw_index/common/bitpacking.rs create mode 100644 src/query/storages/common/index/src/hnsw_index/common/bitpacking_links.rs create mode 100644 src/query/storages/common/index/src/hnsw_index/common/bitpacking_ordered.rs create mode 100644 src/query/storages/common/index/src/hnsw_index/common/fixed_length_priority_queue.rs create mode 100644 src/query/storages/common/index/src/hnsw_index/common/mod.rs create mode 100644 src/query/storages/common/index/src/hnsw_index/common/types.rs create mode 100644 src/query/storages/common/index/src/hnsw_index/common/utils.rs create mode 100644 src/query/storages/common/index/src/hnsw_index/common/zeros.rs create mode 100644 src/query/storages/common/index/src/hnsw_index/entry_points.rs create mode 100644 src/query/storages/common/index/src/hnsw_index/graph_layers.rs create mode 100644 src/query/storages/common/index/src/hnsw_index/graph_layers_builder.rs create mode 100644 src/query/storages/common/index/src/hnsw_index/graph_links.rs create mode 100644 src/query/storages/common/index/src/hnsw_index/graph_links/header.rs create mode 100644 src/query/storages/common/index/src/hnsw_index/graph_links/serializer.rs create mode 100644 src/query/storages/common/index/src/hnsw_index/graph_links/view.rs create mode 100644 src/query/storages/common/index/src/hnsw_index/hnsw.rs create mode 100644 src/query/storages/common/index/src/hnsw_index/mod.rs create mode 100644 src/query/storages/common/index/src/hnsw_index/point_scorer.rs create mode 100644 src/query/storages/common/index/src/hnsw_index/quantization/encoded_storage.rs create mode 100644 src/query/storages/common/index/src/hnsw_index/quantization/encoded_vectors.rs create mode 100644 src/query/storages/common/index/src/hnsw_index/quantization/encoded_vectors_u8.rs create mode 100644 src/query/storages/common/index/src/hnsw_index/quantization/mod.rs create mode 100644 src/query/storages/common/index/src/hnsw_index/quantization/quantile.rs create mode 100644 src/query/storages/common/index/src/hnsw_index/search_context.rs create mode 100644 src/query/storages/common/index/src/hnsw_index/visited_pool.rs create mode 100644 src/query/storages/fuse/src/io/read/vector_index/mod.rs create mode 100644 src/query/storages/fuse/src/io/read/vector_index/vector_index_loader.rs create mode 100644 src/query/storages/fuse/src/io/read/vector_index/vector_index_reader.rs create mode 100644 src/query/storages/fuse/src/io/write/vector_index_writer.rs create mode 100644 src/query/storages/fuse/src/pruning/vector_index_pruner.rs diff --git a/Cargo.lock b/Cargo.lock index 0b6e9bc040c39..551227dddbed7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5478,29 +5478,42 @@ version = "0.1.0" dependencies = [ "anyerror", "bincode 2.0.1", + "bitvec", + "bytemuck", "bytes", "cbordata", "databend-common-ast", "databend-common-exception", "databend-common-expression", "databend-common-functions", + "databend-common-vector", "databend-storages-common-table-meta", "divan", "fastrace", + "feistel-permutation-rs", "goldenfile", + "itertools 0.13.0", "jsonb", "levenshtein_automata", "log", "match-template", + "num-traits", + "num_cpus", + "ordered-float 5.0.0", + "parking_lot 0.12.3", "parquet", "rand 0.8.5", + "rayon", "roaring", + "self_cell", "serde", + "serde_json", "tantivy", "tantivy-common", "tantivy-fst", "thiserror 1.0.69", "xorfilter-rs", + "zerocopy", ] [[package]] @@ -6545,6 +6558,12 @@ dependencies = [ "semver", ] +[[package]] +name = "feistel-permutation-rs" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db6c829a796418de937efceee04e784abd1c756a82037d84a26f868bc2279f2" + [[package]] name = "ff" version = "0.13.1" @@ -8254,12 +8273,6 @@ dependencies = [ "libc", ] -[[package]] -name = "hermit-abi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" - [[package]] name = "hermit-abi" version = "0.5.1" @@ -10733,11 +10746,11 @@ dependencies = [ [[package]] name = "num_cpus" -version = "1.16.0" +version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" dependencies = [ - "hermit-abi 0.3.9", + "hermit-abi 0.5.1", "libc", ] @@ -13567,6 +13580,12 @@ dependencies = [ "libc", ] +[[package]] +name = "self_cell" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f7d95a54511e0c7be3f51e8867aa8cf35148d7b9445d44de2f943e2b206e749" + [[package]] name = "semver" version = "1.0.26" @@ -17458,18 +17477,18 @@ checksum = "9b3a41ce106832b4da1c065baa4c31cf640cf965fa1483816402b7f6b96f0a64" [[package]] name = "zerocopy" -version = "0.8.25" +version = "0.8.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb" +checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.25" +version = "0.8.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef" +checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index f3f8a78f654c7..93d78ab729bf2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -263,6 +263,7 @@ base64 = "0.22" bincode = { version = "2.0.0-rc.3", features = ["serde", "std", "alloc"] } bincode_v1 = { package = "bincode", version = "1.3.3" } bitpacking = "0.8.0" +bitvec = "1.0.1" blake3 = "1.3.1" bollard = { version = "0.17" } borsh = { version = "1.2.1", features = ["derive"] } @@ -311,6 +312,7 @@ enumflags2 = { version = "0.7.7", features = ["serde"] } ethnum = { version = "1.5.1" } faststr = "0.2" feature-set = { version = "0.1.1" } +feistel-permutation-rs = "0.1.1" flatbuffers = "25" # Must use the same version with arrow-ipc foreign_vec = "0.1.0" form_urlencoded = { version = "1" } @@ -393,7 +395,7 @@ num = "0.4.0" num-bigint = "0.4.6" num-derive = "0.4.2" num-traits = "0.2.19" -num_cpus = "1.13.1" +num_cpus = "1.17" object = "0.36.5" object_store_opendal = { version = "0.52.0" } once_cell = "1.15.0" @@ -472,6 +474,7 @@ rustls-pemfile = "2" rustls-pki-types = "1" rustyline = "14" scroll = "0.12.0" +self_cell = "1.2.0" semver = "1.0.14" serde = { version = "1.0.164", features = ["derive", "rc"] } serde_derive = "1" @@ -542,6 +545,7 @@ wiremock = "0.6" wkt = "0.11.1" xorf = { version = "0.11.0", default-features = false, features = ["binary-fuse"] } xorfilter-rs = "0.5" +zerocopy = "0.8.26" zip = "3.0.0" zstd = "0.12.3" diff --git a/src/common/metrics/src/metrics/storage.rs b/src/common/metrics/src/metrics/storage.rs index a1766d77b1bb5..b60a72acb416d 100644 --- a/src/common/metrics/src/metrics/storage.rs +++ b/src/common/metrics/src/metrics/storage.rs @@ -177,6 +177,24 @@ static BLOCK_INVERTED_INDEX_READ_MILLISECONDS: LazyLock = LazyLock::n static BLOCK_INVERTED_INDEX_SEARCH_MILLISECONDS: LazyLock = LazyLock::new(|| { register_histogram_in_milliseconds("fuse_block_inverted_index_search_milliseconds") }); +static BLOCK_VECTOR_INDEX_WRITE_NUMS: LazyLock = + LazyLock::new(|| register_counter("fuse_block_vector_index_write_nums")); +static BLOCK_VECTOR_INDEX_WRITE_BYTES: LazyLock = + LazyLock::new(|| register_counter("fuse_block_vector_index_write_bytes")); +static BLOCK_VECTOR_INDEX_WRITE_MILLISECONDS: LazyLock = LazyLock::new(|| { + register_histogram_in_milliseconds("fuse_block_vector_index_write_milliseconds") +}); +static BLOCK_VECTOR_INDEX_GENERATE_MILLISECONDS: LazyLock = LazyLock::new(|| { + register_histogram_in_milliseconds("fuse_block_vector_index_generate_milliseconds") +}); +static BLOCK_VECTOR_INDEX_READ_MILLISECONDS: LazyLock = LazyLock::new(|| { + register_histogram_in_milliseconds("fuse_block_vector_index_read_milliseconds") +}); +static BLOCK_VECTOR_INDEX_SEARCH_MILLISECONDS: LazyLock = LazyLock::new(|| { + register_histogram_in_milliseconds("fuse_block_vector_index_search_milliseconds") +}); +static BLOCK_VECTOR_INDEX_READ_BYTES: LazyLock = + LazyLock::new(|| register_counter("fuse_block_vector_index_read_bytes")); static COMPACT_BLOCK_READ_NUMS: LazyLock = LazyLock::new(|| register_counter("fuse_compact_block_read_nums")); static COMPACT_BLOCK_READ_BYTES: LazyLock = @@ -548,6 +566,10 @@ pub fn metrics_inc_block_index_write_bytes(c: u64) { BLOCK_INDEX_WRITE_BYTES.inc_by(c); } +pub fn metrics_inc_block_index_read_bytes(c: u64) { + BLOCK_INDEX_READ_BYTES.inc_by(c); +} + pub fn metrics_inc_block_index_write_milliseconds(c: u64) { BLOCK_INDEX_WRITE_MILLISECONDS.observe(c as f64); } @@ -576,8 +598,32 @@ pub fn metrics_inc_block_inverted_index_search_milliseconds(c: u64) { BLOCK_INVERTED_INDEX_SEARCH_MILLISECONDS.observe(c as f64); } -pub fn metrics_inc_block_index_read_bytes(c: u64) { - BLOCK_INDEX_READ_BYTES.inc_by(c); +pub fn metrics_inc_block_vector_index_write_nums(c: u64) { + BLOCK_VECTOR_INDEX_WRITE_NUMS.inc_by(c); +} + +pub fn metrics_inc_block_vector_index_write_bytes(c: u64) { + BLOCK_VECTOR_INDEX_WRITE_BYTES.inc_by(c); +} + +pub fn metrics_inc_block_vector_index_write_milliseconds(c: u64) { + BLOCK_VECTOR_INDEX_WRITE_MILLISECONDS.observe(c as f64); +} + +pub fn metrics_inc_block_vector_index_generate_milliseconds(c: u64) { + BLOCK_VECTOR_INDEX_GENERATE_MILLISECONDS.observe(c as f64); +} + +pub fn metrics_inc_block_vector_index_read_milliseconds(c: u64) { + BLOCK_VECTOR_INDEX_READ_MILLISECONDS.observe(c as f64); +} + +pub fn metrics_inc_block_vector_index_search_milliseconds(c: u64) { + BLOCK_VECTOR_INDEX_SEARCH_MILLISECONDS.observe(c as f64); +} + +pub fn metrics_inc_block_vector_index_read_bytes(c: u64) { + BLOCK_VECTOR_INDEX_READ_BYTES.inc_by(c); } /// Compact metrics. diff --git a/src/common/vector/src/distance.rs b/src/common/vector/src/distance.rs index 97953374d239a..aff7caa4a26ab 100644 --- a/src/common/vector/src/distance.rs +++ b/src/common/vector/src/distance.rs @@ -16,69 +16,101 @@ use databend_common_exception::ErrorCode; use databend_common_exception::Result; use ndarray::ArrayView; -pub fn cosine_distance(from: &[f32], to: &[f32]) -> Result { - if from.len() != to.len() { +pub fn cosine_distance(lhs: &[f32], rhs: &[f32]) -> Result { + if lhs.len() != rhs.len() { return Err(ErrorCode::InvalidArgument(format!( "Vector length not equal: {:} != {:}", - from.len(), - to.len(), + lhs.len(), + rhs.len(), ))); } - let a = ArrayView::from(from); - let b = ArrayView::from(to); + let a = ArrayView::from(lhs); + let b = ArrayView::from(rhs); let aa_sum = (&a * &a).sum(); let bb_sum = (&b * &b).sum(); Ok(1.0 - (&a * &b).sum() / ((aa_sum).sqrt() * (bb_sum).sqrt())) } -pub fn l2_distance(from: &[f32], to: &[f32]) -> Result { - if from.len() != to.len() { +pub fn l1_distance(lhs: &[f32], rhs: &[f32]) -> Result { + if lhs.len() != rhs.len() { return Err(ErrorCode::InvalidArgument(format!( "Vector length not equal: {:} != {:}", - from.len(), - to.len(), + lhs.len(), + rhs.len(), ))); } - Ok(from + Ok(lhs .iter() - .zip(to.iter()) + .zip(rhs.iter()) + .map(|(a, b)| (a - b).abs()) + .sum::()) +} + +pub fn l2_distance(lhs: &[f32], rhs: &[f32]) -> Result { + if lhs.len() != rhs.len() { + return Err(ErrorCode::InvalidArgument(format!( + "Vector length not equal: {:} != {:}", + lhs.len(), + rhs.len(), + ))); + } + + Ok(lhs + .iter() + .zip(rhs.iter()) .map(|(a, b)| (a - b).powi(2)) .sum::() .sqrt()) } -pub fn cosine_distance_64(from: &[f64], to: &[f64]) -> Result { - if from.len() != to.len() { +pub fn cosine_distance_64(lhs: &[f64], rhs: &[f64]) -> Result { + if lhs.len() != rhs.len() { return Err(ErrorCode::InvalidArgument(format!( "Vector length not equal: {:} != {:}", - from.len(), - to.len(), + lhs.len(), + rhs.len(), ))); } - let a = ArrayView::from(from); - let b = ArrayView::from(to); + let a = ArrayView::from(lhs); + let b = ArrayView::from(rhs); let aa_sum = (&a * &a).sum(); let bb_sum = (&b * &b).sum(); Ok(1.0 - (&a * &b).sum() / ((aa_sum).sqrt() * (bb_sum).sqrt())) } -pub fn l2_distance_64(from: &[f64], to: &[f64]) -> Result { - if from.len() != to.len() { +pub fn l1_distance_64(lhs: &[f64], rhs: &[f64]) -> Result { + if lhs.len() != rhs.len() { + return Err(ErrorCode::InvalidArgument(format!( + "Vector length not equal: {:} != {:}", + lhs.len(), + rhs.len(), + ))); + } + + Ok(lhs + .iter() + .zip(rhs.iter()) + .map(|(a, b)| (a - b).abs()) + .sum::()) +} + +pub fn l2_distance_64(lhs: &[f64], rhs: &[f64]) -> Result { + if lhs.len() != rhs.len() { return Err(ErrorCode::InvalidArgument(format!( "Vector length not equal: {:} != {:}", - from.len(), - to.len(), + lhs.len(), + rhs.len(), ))); } - Ok(from + Ok(lhs .iter() - .zip(to.iter()) + .zip(rhs.iter()) .map(|(a, b)| (a - b).powi(2)) .sum::() .sqrt()) diff --git a/src/common/vector/src/lib.rs b/src/common/vector/src/lib.rs index 2988f0db11730..593c9252e6c23 100644 --- a/src/common/vector/src/lib.rs +++ b/src/common/vector/src/lib.rs @@ -16,5 +16,7 @@ mod distance; pub use distance::cosine_distance; pub use distance::cosine_distance_64; +pub use distance::l1_distance; +pub use distance::l1_distance_64; pub use distance::l2_distance; pub use distance::l2_distance_64; diff --git a/src/query/catalog/src/plan/internal_column.rs b/src/query/catalog/src/plan/internal_column.rs index 412d32d29f37c..a026ffb1f516a 100644 --- a/src/query/catalog/src/plan/internal_column.rs +++ b/src/query/catalog/src/plan/internal_column.rs @@ -42,6 +42,7 @@ use databend_common_expression::SEARCH_MATCHED_COLUMN_ID; use databend_common_expression::SEARCH_SCORE_COLUMN_ID; use databend_common_expression::SEGMENT_NAME_COLUMN_ID; use databend_common_expression::SNAPSHOT_NAME_COLUMN_ID; +use databend_common_expression::VECTOR_SCORE_COLUMN_ID; use databend_storages_common_table_meta::meta::try_extract_uuid_str_from_path; use databend_storages_common_table_meta::meta::NUM_BLOCK_ID_BITS; @@ -106,6 +107,8 @@ pub struct InternalColumnMeta { pub inner: Option, // The search matched rows and optional scores in the block. pub matched_rows: Option)>>, + // The vector topn rows and scores in the block. + pub vector_scores: Option>, } #[typetag::serde(name = "internal_column_meta")] @@ -142,6 +145,9 @@ pub enum InternalColumnType { SearchMatched, SearchScore, + // vector columns + VectorScore, + FileName, FileRowNumber, } @@ -176,6 +182,7 @@ impl InternalColumn { )), InternalColumnType::SearchMatched => TableDataType::Boolean, InternalColumnType::SearchScore => TableDataType::Number(NumberDataType::Float32), + InternalColumnType::VectorScore => TableDataType::Number(NumberDataType::Float32), InternalColumnType::FileName => TableDataType::String, InternalColumnType::FileRowNumber => TableDataType::Number(NumberDataType::UInt64), } @@ -200,6 +207,7 @@ impl InternalColumn { InternalColumnType::BaseBlockIds => BASE_BLOCK_IDS_COLUMN_ID, InternalColumnType::SearchMatched => SEARCH_MATCHED_COLUMN_ID, InternalColumnType::SearchScore => SEARCH_SCORE_COLUMN_ID, + InternalColumnType::VectorScore => VECTOR_SCORE_COLUMN_ID, InternalColumnType::FileName => FILENAME_COLUMN_ID, InternalColumnType::FileRowNumber => FILE_ROW_NUMBER_COLUMN_ID, } @@ -291,6 +299,20 @@ impl InternalColumn { } Float32Type::from_data(scores).into() } + InternalColumnType::VectorScore => { + assert!(meta.vector_scores.is_some()); + let vector_scores = meta.vector_scores.as_ref().unwrap(); + + // The smaller the score, the closer the distance. + // Fill other rows with the maximum value and they will be filtered out. + let mut scores = vec![F32::from(f32::MAX); num_rows]; + for (idx, score) in vector_scores.iter() { + if let Some(val) = scores.get_mut(*idx) { + *val = *score; + } + } + Float32Type::from_data(scores).into() + } InternalColumnType::FileName | InternalColumnType::FileRowNumber => { todo!("generate_column_values not support for file related") } diff --git a/src/query/catalog/src/plan/pushdown.rs b/src/query/catalog/src/plan/pushdown.rs index cfad14da502a2..d090d4b47d59d 100644 --- a/src/query/catalog/src/plan/pushdown.rs +++ b/src/query/catalog/src/plan/pushdown.rs @@ -19,6 +19,7 @@ use std::fmt::Debug; use databend_common_ast::ast::SampleConfig; use databend_common_expression::types::DataType; use databend_common_expression::types::F32; +use databend_common_expression::ColumnId; use databend_common_expression::DataSchema; use databend_common_expression::RemoteExpr; use databend_common_expression::Scalar; @@ -131,6 +132,23 @@ pub struct InvertedIndexInfo { pub inverted_index_option: Option, } +/// Information about vector index. +#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Eq)] +pub struct VectorIndexInfo { + /// The index name. + pub index_name: String, + /// The index version. + pub index_version: String, + /// The index options: m, ef_construct, .. + pub index_options: BTreeMap, + /// The column id of vector column. + pub column_id: ColumnId, + /// The distance function name: l1_distance, l2_distance, cosine_distance, .. + pub func_name: String, + /// The query vector value. + pub query_values: Vec, +} + /// Extras is a wrapper for push down items. #[derive(serde::Serialize, serde::Deserialize, Clone, Default, Debug, PartialEq, Eq)] pub struct PushDownInfo { @@ -161,6 +179,8 @@ pub struct PushDownInfo { pub change_type: Option, /// Optional inverted index pub inverted_index: Option, + /// Optional vector index + pub vector_index: Option, /// Used by table sample pub sample: Option, } @@ -236,6 +256,10 @@ impl PushDownInfo { } } + pub fn vector_topn(&self) -> bool { + !self.order_by.is_empty() && self.limit.is_some() && self.vector_index.is_some() + } + pub fn prewhere_of_push_downs(push_downs: Option<&PushDownInfo>) -> Option { if let Some(PushDownInfo { prewhere, .. }) = push_downs { prewhere.clone() diff --git a/src/query/config/src/config.rs b/src/query/config/src/config.rs index d357d7aae491a..d69a95ad30120 100644 --- a/src/query/config/src/config.rs +++ b/src/query/config/src/config.rs @@ -3285,6 +3285,30 @@ pub struct CacheConfig { )] pub inverted_index_filter_memory_ratio: u64, + /// Max number of cached vector index meta objects. Set it to 0 to disable it. + #[clap( + long = "cache-vector-index-meta-count", + value_name = "VALUE", + default_value = "3000" + )] + pub vector_index_meta_count: u64, + + /// Max bytes of cached vector index filters used. Set it to 0 to disable it. + #[clap( + long = "cache-vector-index-filter-size", + value_name = "VALUE", + default_value = "2147483648" + )] + pub vector_index_filter_size: u64, + + /// Max percentage of in memory vector index filter cache relative to whole memory. By default it is 0 (disabled). + #[clap( + long = "cache-vector-index-filter-memory-ratio", + value_name = "VALUE", + default_value = "0" + )] + pub vector_index_filter_memory_ratio: u64, + #[clap( long = "cache-table-prune-partitions-count", value_name = "VALUE", @@ -3609,6 +3633,9 @@ mod cache_config_converters { inverted_index_meta_count: value.inverted_index_meta_count, inverted_index_filter_size: value.inverted_index_filter_size, inverted_index_filter_memory_ratio: value.inverted_index_filter_memory_ratio, + vector_index_meta_count: value.vector_index_meta_count, + vector_index_filter_size: value.vector_index_filter_size, + vector_index_filter_memory_ratio: value.vector_index_filter_memory_ratio, table_prune_partitions_count: value.table_prune_partitions_count, data_cache_storage: value.data_cache_storage.try_into()?, table_data_cache_population_queue_size: value @@ -3645,6 +3672,9 @@ mod cache_config_converters { inverted_index_meta_count: value.inverted_index_meta_count, inverted_index_filter_size: value.inverted_index_filter_size, inverted_index_filter_memory_ratio: value.inverted_index_filter_memory_ratio, + vector_index_meta_count: value.vector_index_meta_count, + vector_index_filter_size: value.vector_index_filter_size, + vector_index_filter_memory_ratio: value.vector_index_filter_memory_ratio, table_prune_partitions_count: value.table_prune_partitions_count, data_cache_storage: value.data_cache_storage.into(), data_cache_key_reload_policy: value.data_cache_key_reload_policy.into(), diff --git a/src/query/config/src/inner.rs b/src/query/config/src/inner.rs index 32ee0791d0f6d..2807e1aaee817 100644 --- a/src/query/config/src/inner.rs +++ b/src/query/config/src/inner.rs @@ -616,6 +616,15 @@ pub struct CacheConfig { /// Max percentage of in memory inverted index filters cache relative to whole memory. By default it is 0 (disabled). pub inverted_index_filter_memory_ratio: u64, + /// Max number of cached vector index meta objects. Set it to 0 to disable it. + pub vector_index_meta_count: u64, + + /// Max bytes of cached vector index filters used. Set it to 0 to disable it. + pub vector_index_filter_size: u64, + + /// Max percentage of in memory vector index filters cache relative to whole memory. By default it is 0 (disabled). + pub vector_index_filter_memory_ratio: u64, + pub data_cache_storage: CacheStorageTypeConfig, /// Max size of external cache population queue length @@ -749,6 +758,9 @@ impl Default for CacheConfig { inverted_index_meta_count: 3000, inverted_index_filter_size: 2147483648, inverted_index_filter_memory_ratio: 0, + vector_index_meta_count: 3000, + vector_index_filter_size: 2147483648, + vector_index_filter_memory_ratio: 0, table_prune_partitions_count: 256, data_cache_storage: Default::default(), table_data_cache_population_queue_size: 0, diff --git a/src/query/ee/tests/it/main.rs b/src/query/ee/tests/it/main.rs index 8b5c2646c8640..4bf5e344b6e6b 100644 --- a/src/query/ee/tests/it/main.rs +++ b/src/query/ee/tests/it/main.rs @@ -19,3 +19,4 @@ mod license; mod ngram_index; mod storages; mod stream; +mod vector_index; diff --git a/src/query/ee/tests/it/vector_index/mod.rs b/src/query/ee/tests/it/vector_index/mod.rs new file mode 100644 index 0000000000000..56193a6b31dee --- /dev/null +++ b/src/query/ee/tests/it/vector_index/mod.rs @@ -0,0 +1,15 @@ +// Copyright 2023 Databend Cloud +// +// Licensed under the Elastic License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.elastic.co/licensing/elastic-license +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod pruning; diff --git a/src/query/ee/tests/it/vector_index/pruning.rs b/src/query/ee/tests/it/vector_index/pruning.rs new file mode 100644 index 0000000000000..9b862ed7071ff --- /dev/null +++ b/src/query/ee/tests/it/vector_index/pruning.rs @@ -0,0 +1,757 @@ +// Copyright 2023 Databend Cloud +// +// Licensed under the Elastic License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.elastic.co/licensing/elastic-license +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::BTreeMap; +use std::sync::Arc; + +use databend_common_ast::ast::Engine; +use databend_common_base::base::tokio; +use databend_common_catalog::plan::PushDownInfo; +use databend_common_catalog::plan::VectorIndexInfo; +use databend_common_exception::Result; +use databend_common_expression::types::number::UInt64Type; +use databend_common_expression::types::DataType; +use databend_common_expression::types::NumberDataType; +use databend_common_expression::types::VectorColumn; +use databend_common_expression::types::VectorDataType; +use databend_common_expression::types::F32; +use databend_common_expression::Column; +use databend_common_expression::DataBlock; +use databend_common_expression::FromData; +use databend_common_expression::RemoteExpr; +use databend_common_expression::TableDataType; +use databend_common_expression::TableField; +use databend_common_expression::TableSchemaRef; +use databend_common_expression::TableSchemaRefExt; +use databend_common_meta_app::schema::CreateOption; +use databend_common_meta_app::schema::TableIndex; +use databend_common_meta_app::schema::TableIndexType; +use databend_common_sql::plans::CreateTablePlan; +use databend_common_sql::BloomIndexColumns; +use databend_common_storages_fuse::pruning::create_segment_location_vector; +use databend_common_storages_fuse::pruning::FusePruner; +use databend_common_storages_fuse::FuseTable; +use databend_enterprise_query::test_kits::context::EESetup; +use databend_query::interpreters::CreateTableInterpreter; +use databend_query::interpreters::Interpreter; +use databend_query::sessions::QueryContext; +use databend_query::sessions::TableContext; +use databend_query::storages::fuse::FUSE_OPT_KEY_BLOCK_PER_SEGMENT; +use databend_query::storages::fuse::FUSE_OPT_KEY_ROW_PER_BLOCK; +use databend_query::test_kits::*; +use databend_storages_common_pruner::BlockMetaIndex; +use databend_storages_common_table_meta::meta::BlockMeta; +use databend_storages_common_table_meta::meta::TableSnapshot; +use databend_storages_common_table_meta::table::OPT_KEY_DATABASE_ID; +use opendal::Operator; + +async fn apply_block_pruning( + table_snapshot: Arc, + schema: TableSchemaRef, + push_down: &Option, + ctx: Arc, + dal: Operator, + bloom_index_cols: BloomIndexColumns, +) -> Result)>> { + let ctx: Arc = ctx; + let segment_locs = table_snapshot.segments.clone(); + let segment_locs = create_segment_location_vector(segment_locs, None); + + FusePruner::create(&ctx, dal, schema, push_down, bloom_index_cols, vec![], None)? + .read_pruning(segment_locs) + .await +} + +#[tokio::test(flavor = "multi_thread")] +async fn test_block_pruner() -> Result<()> { + let fixture = TestFixture::setup_with_custom(EESetup::new()).await?; + + let ctx = fixture.new_query_ctx().await?; + fixture.create_default_database().await?; + + let test_tbl_name = "test_vector_index"; + let test_schema = TableSchemaRefExt::create(vec![ + TableField::new("id", TableDataType::Number(NumberDataType::UInt64)), + TableField::new( + "embedding", + TableDataType::Vector(VectorDataType::Float32(4)), + ), + ]); + + let row_per_block = 10; + let num_blocks_opt = row_per_block.to_string(); + + let index_name = "idx1".to_string(); + let index_version = "test1".to_string(); + + let mut index_options = BTreeMap::new(); + index_options.insert("m".to_string(), "10".to_string()); + index_options.insert("ef_construct".to_string(), "40".to_string()); + index_options.insert("distance".to_string(), "cosine,l1,l2".to_string()); + let index_column_id = 1; + let table_index = TableIndex { + index_type: TableIndexType::Vector, + name: index_name.clone(), + column_ids: vec![index_column_id], + sync_creation: true, + version: index_version.clone(), + options: index_options.clone(), + }; + let mut table_indexes = BTreeMap::new(); + table_indexes.insert("idx1".to_string(), table_index); + + // create test table + let create_table_plan = CreateTablePlan { + catalog: "default".to_owned(), + create_option: CreateOption::Create, + tenant: fixture.default_tenant(), + database: fixture.default_db_name(), + table: test_tbl_name.to_string(), + schema: test_schema.clone(), + engine: Engine::Fuse, + engine_options: Default::default(), + storage_params: None, + options: [ + (FUSE_OPT_KEY_ROW_PER_BLOCK.to_owned(), num_blocks_opt), + (FUSE_OPT_KEY_BLOCK_PER_SEGMENT.to_owned(), "5".to_owned()), + (OPT_KEY_DATABASE_ID.to_owned(), "1".to_owned()), + ] + .into(), + field_comments: vec![], + as_select: None, + cluster_key: None, + table_indexes: Some(table_indexes), + attached_columns: None, + table_partition: None, + table_properties: None, + }; + + let interpreter = CreateTableInterpreter::try_create(ctx.clone(), create_table_plan)?; + let _ = interpreter.execute(ctx.clone()).await?; + + // get table + let catalog = ctx.get_catalog("default").await?; + let table = catalog + .get_table( + &fixture.default_tenant(), + fixture.default_db_name().as_str(), + test_tbl_name, + ) + .await?; + + // prepare test blocks + let vals0: Vec = vec![ + -0.6886994, + 0.594091, + 0.90251666, + -0.5796461, + -0.82056284, + 0.80095357, + 0.6307791, + -0.10274009, + 0.80994654, + 0.17736527, + -0.65107286, + -0.34088722, + -0.06466371, + -0.20792475, + 0.15237674, + 0.51079565, + -0.6937013, + -0.5297969, + 0.7171806, + 0.785739, + -0.65965945, + -0.81779677, + 0.35969305, + -0.46954358, + -0.8181405, + -0.6114142, + -0.87969273, + -0.00383717, + 0.918081, + -0.08214826, + -0.2705187, + -0.39889243, + 0.6290396, + 0.9973043, + -0.3085359, + 0.8468473, + -0.32078063, + 0.67827964, + 0.9978988, + -0.30051866, + ]; + let vals0 = unsafe { std::mem::transmute::, Vec>(vals0) }; + let block0 = DataBlock::new_from_columns(vec![ + UInt64Type::from_data(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), + Column::Vector(VectorColumn::Float32((vals0.into(), 4))), + ]); + let vals1: Vec = vec![ + -0.8662579, + 0.21157496, + 0.04832743, + 0.01034609, + -0.8213428, + -0.07455289, + 0.79567593, + 0.22692858, + -0.13815483, + 0.04082361, + -0.04671623, + 0.07829991, + -0.4285805, + -0.83638775, + 0.16173266, + -0.6230965, + 0.48879236, + -0.8992002, + 0.6461996, + -0.6104055, + 0.7835251, + 0.6034467, + 0.12212521, + 0.49520096, + 0.5970688, + 0.45890963, + -0.05623427, + -0.49175563, + -0.8342597, + -0.5295784, + 0.6283545, + 0.08985507, + -0.60963225, + -0.9484875, + -0.40452087, + -0.87066746, + 0.48526454, + 0.03684357, + 0.63801855, + -0.49714512, + ]; + let vals1 = unsafe { std::mem::transmute::, Vec>(vals1) }; + let block1 = DataBlock::new_from_columns(vec![ + UInt64Type::from_data(vec![11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), + Column::Vector(VectorColumn::Float32((vals1.into(), 4))), + ]); + let vals2: Vec = vec![ + -0.18905626, + 0.6927208, + 0.7869001, + 0.22925916, + -0.5255186, + 0.14997292, + -0.5750151, + 0.51772356, + -0.951746, + 0.9412492, + 0.4678889, + 0.46652728, + 0.61070764, + -0.66532606, + -0.76100147, + -0.12496163, + -0.6957283, + 0.8386284, + -0.15284961, + -0.2555948, + -0.22072262, + 0.42040154, + 0.99745035, + 0.6271642, + 0.9605643, + -0.65621495, + -0.4781119, + 0.6010602, + 0.7315234, + -0.03415851, + -0.12357767, + 0.09560691, + 0.21121186, + 0.2585377, + 0.5601369, + 0.23845962, + -0.35424188, + 0.17996286, + -0.4941602, + -0.20577724, + ]; + let vals2 = unsafe { std::mem::transmute::, Vec>(vals2) }; + let block2 = DataBlock::new_from_columns(vec![ + UInt64Type::from_data(vec![21, 22, 23, 24, 25, 26, 27, 28, 29, 30]), + Column::Vector(VectorColumn::Float32((vals2.into(), 4))), + ]); + let vals3: Vec = vec![ + 0.8412355, + 0.3082751, + 0.59870875, + -0.54127926, + -0.9425862, + -0.4464907, + -0.82330227, + -0.33117214, + 0.13021936, + -0.6236809, + 0.96284235, + -0.5690468, + -0.2858306, + 0.4726673, + -0.1239042, + -0.6170608, + -0.00327663, + -0.83231056, + 0.16952398, + -0.01978558, + 0.6004247, + 0.09402651, + 0.9722124, + -0.46700177, + 0.59854394, + 0.43756092, + -0.60489684, + -0.77390605, + -0.33195212, + 0.20036773, + -0.78870934, + 0.06877671, + 0.90521765, + 0.76765245, + -0.5661686, + -0.85996264, + -0.8881472, + 0.7931559, + 0.2554919, + -0.8342734, + ]; + let vals3 = unsafe { std::mem::transmute::, Vec>(vals3) }; + let block3 = DataBlock::new_from_columns(vec![ + UInt64Type::from_data(vec![31, 32, 33, 34, 35, 36, 37, 38, 39, 40]), + Column::Vector(VectorColumn::Float32((vals3.into(), 4))), + ]); + let vals4: Vec = vec![ + -0.07214834, + -0.45140868, + 0.52644473, + -0.9244883, + -0.30683544, + -0.54323095, + -0.21925122, + -0.12423284, + -0.8629535, + 0.58288944, + 0.75837606, + 0.03510276, + -0.8564059, + -0.03417623, + -0.07238109, + 0.58050597, + 0.7454117, + -0.27445704, + 0.45540568, + -0.5408085, + -0.780661, + 0.6657731, + -0.97462314, + 0.8857822, + 0.02701622, + 0.04349842, + 0.5408021, + 0.7438895, + -0.44429415, + 0.77314705, + -0.36297366, + -0.6039303, + 0.19068193, + 0.14782214, + 0.75198305, + -0.10257443, + -0.08388132, + -0.7079838, + -0.45469823, + 0.4560124, + ]; + let vals4 = unsafe { std::mem::transmute::, Vec>(vals4) }; + let block4 = DataBlock::new_from_columns(vec![ + UInt64Type::from_data(vec![41, 42, 43, 44, 45, 46, 47, 48, 49, 50]), + Column::Vector(VectorColumn::Float32((vals4.into(), 4))), + ]); + let vals5: Vec = vec![ + -0.1999165, + 0.52322525, + -0.337038, + -0.90144914, + -0.8406314, + -0.5335526, + -0.95726347, + 0.33673206, + -0.8691562, + 0.48139447, + -0.6788517, + 0.3771608, + 0.4059562, + -0.58860403, + -0.428289, + 0.32089558, + -0.3011892, + 0.60242313, + -0.87302023, + -0.25639316, + -0.9859232, + 0.29515472, + 0.55974996, + -0.8190884, + -0.08609874, + -0.50538206, + 0.0652289, + 0.7410794, + -0.59104115, + 0.8998315, + 0.31411764, + 0.5163839, + 0.25237387, + 0.02671343, + -0.8648633, + 0.95094275, + -0.6676619, + 0.62161124, + 0.6938727, + -0.10332275, + ]; + let vals5 = unsafe { std::mem::transmute::, Vec>(vals5) }; + let block5 = DataBlock::new_from_columns(vec![ + UInt64Type::from_data(vec![51, 52, 53, 54, 55, 56, 57, 58, 59, 60]), + Column::Vector(VectorColumn::Float32((vals5.into(), 4))), + ]); + let vals6: Vec = vec![ + 0.5895334, + -0.7343663, + -0.02117946, + 0.1402015, + 0.6598045, + -0.722716, + -0.40154833, + -0.10447401, + -0.78196186, + 0.436223, + -0.8290139, + 0.22458494, + -0.01400176, + 0.3236723, + 0.17722614, + 0.9377708, + 0.09351188, + 0.8986833, + -0.8690766, + 0.10546188, + -0.2846303, + -0.454967, + -0.5632622, + 0.46904188, + -0.39408457, + -0.1404441, + -0.5426498, + -0.7066665, + 0.8154848, + 0.92514247, + -0.449755, + 0.62942183, + 0.5758866, + 0.8156669, + -0.15692636, + -0.15390746, + 0.457048, + 0.47833237, + 0.63010204, + 0.81386733, + ]; + let vals6 = unsafe { std::mem::transmute::, Vec>(vals6) }; + let block6 = DataBlock::new_from_columns(vec![ + UInt64Type::from_data(vec![61, 62, 63, 64, 65, 66, 67, 68, 69, 70]), + Column::Vector(VectorColumn::Float32((vals6.into(), 4))), + ]); + let vals7: Vec = vec![ + 0.2055598, + -0.9889231, + 0.48384285, + 0.6735521, + 0.42140472, + -0.56612134, + -0.3547931, + 0.37290242, + -0.63698244, + 0.25703365, + -0.6497194, + -0.00122721, + 0.01125184, + -0.32437629, + -0.23926528, + -0.13202162, + -0.37527475, + -0.23734985, + 0.03072986, + -0.08610785, + 0.09782696, + -0.05098151, + -0.01559174, + -0.59764004, + -0.48390508, + 0.71857893, + -0.4476935, + 0.6353149, + -0.9063252, + 0.03339462, + -0.13207407, + 0.35822904, + 0.14378202, + -0.6895029, + -0.45171574, + 0.7036348, + -0.05764073, + -0.04511834, + -0.6025827, + 0.42203856, + ]; + let vals7 = unsafe { std::mem::transmute::, Vec>(vals7) }; + let block7 = DataBlock::new_from_columns(vec![ + UInt64Type::from_data(vec![71, 72, 73, 74, 75, 76, 77, 78, 79, 80]), + Column::Vector(VectorColumn::Float32((vals7.into(), 4))), + ]); + let vals8: Vec = vec![ + 0.44271547, + 0.04186246, + -0.05471806, + 0.84741205, + -0.60298675, + 0.13338158, + -0.01588953, + 0.2876288, + -0.09086735, + -0.11241615, + 0.03860525, + 0.63135403, + 0.60686076, + -0.32387394, + -0.66953754, + 0.7155654, + -0.40972582, + -0.70375466, + 0.28354865, + -0.75318587, + 0.11960128, + -0.10885316, + 0.30722642, + 0.11420934, + -0.5221141, + 0.31499448, + 0.86042684, + 0.47856066, + -0.82223445, + 0.7333596, + -0.32723898, + -0.4398808, + 0.9394175, + -0.25679085, + 0.2887939, + -0.73664117, + 0.5395438, + -0.05887805, + 0.36002022, + -0.72944045, + ]; + let vals8 = unsafe { std::mem::transmute::, Vec>(vals8) }; + let block8 = DataBlock::new_from_columns(vec![ + UInt64Type::from_data(vec![81, 82, 83, 84, 85, 86, 87, 88, 89, 90]), + Column::Vector(VectorColumn::Float32((vals8.into(), 4))), + ]); + let vals9: Vec = vec![ + 0.95527714, + -0.03856075, + -0.89367366, + 0.90464765, + 0.7934615, + -0.50674295, + 0.5941392, + -0.35010257, + 0.45648512, + -0.11480136, + 0.9441768, + 0.07530943, + 0.07846592, + -0.15600504, + -0.28246698, + 0.19841912, + 0.07780663, + 0.1556818, + -0.2927237, + 0.07868534, + 0.13883874, + -0.8788782, + 0.7045493, + -0.23339222, + 0.95576626, + -0.9563942, + -0.13632946, + 0.06362384, + 0.44660464, + 0.6827207, + 0.5226848, + -0.23891447, + 0.48967868, + 0.9801073, + -0.5306416, + -0.36345342, + 0.42729795, + 0.92860633, + 0.8177991, + -0.24459854, + ]; + let vals9 = unsafe { std::mem::transmute::, Vec>(vals9) }; + let block9 = DataBlock::new_from_columns(vec![ + UInt64Type::from_data(vec![91, 92, 93, 94, 95, 96, 97, 98, 99, 100]), + Column::Vector(VectorColumn::Float32((vals9.into(), 4))), + ]); + + let blocks = vec![ + block0, block1, block2, block3, block4, block5, block6, block7, block8, block9, + ]; + + fixture + .append_commit_blocks(table.clone(), blocks, false, true) + .await?; + + // Define query vectors for testing + let query_values1 = vec![-0.6886994, 0.594091, 0.90251667, -0.5796461]; + let query_values1 = unsafe { std::mem::transmute::, Vec>(query_values1) }; + let query_values2 = vec![0.5758866, 0.8156669, -0.15692637, -0.15390747]; + let query_values2 = unsafe { std::mem::transmute::, Vec>(query_values2) }; + + let table = catalog + .get_table( + &fixture.default_tenant(), + fixture.default_db_name().as_str(), + test_tbl_name, + ) + .await?; + + let fuse_table = FuseTable::do_create(table.get_table_info().clone())?; + let snapshot = fuse_table.read_table_snapshot().await?; + assert!(snapshot.is_some()); + let snapshot = snapshot.unwrap(); + + let orderby_expr = RemoteExpr::::ColumnRef { + span: None, + id: "_vector_score".to_string(), + data_type: DataType::Number(NumberDataType::Float32), + display_name: "_vector_score".to_string(), + }; + + let vector_index = VectorIndexInfo { + index_name: index_name.clone(), + index_version: index_version.clone(), + index_options: index_options.clone(), + column_id: index_column_id, + func_name: "".to_string(), + query_values: vec![], + }; + + let query_values = vec![ + ("cosine_distance".to_string(), query_values1.clone()), + ("l1_distance".to_string(), query_values1.clone()), + ("l2_distance".to_string(), query_values1.clone()), + ("cosine_distance".to_string(), query_values2.clone()), + ("l1_distance".to_string(), query_values2.clone()), + ("l2_distance".to_string(), query_values2.clone()), + ]; + + let results = vec![ + vec![ + vec![ + (0, 0, 0, 0.005022526), + (0, 0, 9, 0.05992174), + (0, 0, 1, 0.09289217), + ], + vec![(1, 0, 9, 0.05186367), (1, 0, 5, 0.07403374)], + ], + vec![ + vec![(0, 0, 0, 0.0), (0, 0, 9, 0.84269863), (0, 0, 1, 1.0792456)], + vec![(0, 4, 2, 0.9375271)], + vec![(1, 0, 9, 0.7167929)], + ], + vec![vec![(0, 0, 0, 3.5187712), (0, 0, 9, 3.5518785)], vec![ + (1, 3, 6, 3.4702706), + (1, 3, 7, 3.5206928), + (1, 3, 1, 3.556445), + ]], + vec![ + vec![(0, 1, 6, 0.18258381)], + vec![(0, 3, 8, 0.15948296)], + vec![(1, 1, 8, 0.008677483), (1, 1, 7, 0.21170044)], + vec![(1, 4, 8, 0.0657177)], + ], + vec![ + vec![(0, 1, 6, 0.7965471)], + vec![(0, 2, 7, 1.3045802)], + vec![(1, 1, 8, 0.0)], + vec![(1, 4, 8, 0.8538904), (1, 4, 7, 1.021619)], + ], + vec![vec![(1, 1, 8, 3.4763064)], vec![ + (1, 3, 5, 3.4903116), + (1, 3, 9, 3.4926815), + (1, 3, 0, 3.527872), + (1, 3, 8, 3.560473), + ]], + ]; + + let mut extras = Vec::new(); + for ((func_name, query_values), result) in query_values.into_iter().zip(results.into_iter()) { + let mut vector_index = vector_index.clone(); + vector_index.func_name = func_name; + vector_index.query_values = query_values; + let extra = PushDownInfo { + limit: Some(5), + order_by: vec![(orderby_expr.clone(), true, false)], + vector_index: Some(vector_index), + ..Default::default() + }; + extras.push((Some(extra), result)); + } + + for (extra, expected_results) in extras { + let block_metas = apply_block_pruning( + snapshot.clone(), + table.get_table_info().schema(), + &extra, + ctx.clone(), + fuse_table.get_operator(), + fuse_table.bloom_index_cols(), + ) + .await?; + assert_eq!(block_metas.len(), expected_results.len()); + for ((block_meta_index, _), expected_scores) in + block_metas.iter().zip(expected_results.iter()) + { + assert!(block_meta_index.vector_scores.is_some()); + let vector_scores = block_meta_index.vector_scores.clone().unwrap(); + assert_eq!(vector_scores.len(), expected_scores.len()); + for (vector_score, expected_score) in vector_scores.iter().zip(expected_scores) { + assert_eq!(block_meta_index.segment_idx, expected_score.0); + assert_eq!(block_meta_index.block_idx, expected_score.1); + assert_eq!(vector_score.0, expected_score.2); + assert_eq!(vector_score.1, expected_score.3); + } + } + } + + Ok(()) +} diff --git a/src/query/expression/src/evaluator.rs b/src/query/expression/src/evaluator.rs index 9379b53b8cb8e..b4ebe56e3c48f 100644 --- a/src/query/expression/src/evaluator.rs +++ b/src/query/expression/src/evaluator.rs @@ -49,6 +49,10 @@ use crate::types::ReturnType; use crate::types::StringType; use crate::types::ValueType; use crate::types::VariantType; +use crate::types::VectorColumn; +use crate::types::VectorDataType; +use crate::types::VectorScalar; +use crate::types::F32; use crate::values::Column; use crate::values::ColumnBuilder; use crate::values::Scalar; @@ -916,6 +920,85 @@ impl<'a> Evaluator<'a> { other => unreachable!("source: {}", other), } } + (DataType::Array(inner_src_ty), DataType::Vector(inner_dest_ty)) => { + if !matches!(&**inner_src_ty, DataType::Number(_) | DataType::Decimal(_)) + || matches!(inner_dest_ty, VectorDataType::Int8(_)) + { + return Err(ErrorCode::BadArguments(format!( + "unable to cast type `{src_type}` to type `{dest_type}`" + )) + .set_span(span)); + } + let dimension = inner_dest_ty.dimension() as usize; + match value { + Value::Scalar(Scalar::Array(col)) => { + if col.len() != dimension { + return Err(ErrorCode::BadArguments( + "Array value cast to a vector has incorrect dimension".to_string(), + ) + .set_span(span)); + } + let mut vals = Vec::with_capacity(dimension); + match col { + Column::Number(num_col) => { + for i in 0..dimension { + let num = unsafe { num_col.index_unchecked(i) }; + vals.push(num.to_f32()); + } + } + Column::Decimal(dec_col) => { + for i in 0..dimension { + let dec = unsafe { dec_col.index_unchecked(i) }; + vals.push(F32::from(dec.to_float32())); + } + } + _ => { + return Err(ErrorCode::BadArguments( + "Array value cast to a vector has invalid value".to_string(), + ) + .set_span(span)); + } + } + Ok(Value::Scalar(Scalar::Vector(VectorScalar::Float32(vals)))) + } + Value::Column(Column::Array(array_col)) => { + let mut vals = Vec::with_capacity(dimension * array_col.len()); + for col in array_col.iter() { + if col.len() != dimension { + return Err(ErrorCode::BadArguments( + "Array value cast to a vector has incorrect dimension" + .to_string(), + ) + .set_span(span)); + } + match col { + Column::Number(num_col) => { + for i in 0..dimension { + let num = unsafe { num_col.index_unchecked(i) }; + vals.push(num.to_f32()); + } + } + Column::Decimal(dec_col) => { + for i in 0..dimension { + let dec = unsafe { dec_col.index_unchecked(i) }; + vals.push(F32::from(dec.to_float32())); + } + } + _ => { + return Err(ErrorCode::BadArguments( + "Array value cast to a vector has invalid value" + .to_string(), + ) + .set_span(span)); + } + } + } + let vector_col = VectorColumn::Float32((vals.into(), dimension)); + Ok(Value::Column(Column::Vector(vector_col))) + } + other => unreachable!("source: {}", other), + } + } _ => Err(ErrorCode::BadArguments(format!( "unable to cast type `{src_type}` to type `{dest_type}`" diff --git a/src/query/expression/src/schema.rs b/src/query/expression/src/schema.rs index 0cdb6cc69f331..89f088dd71f47 100644 --- a/src/query/expression/src/schema.rs +++ b/src/query/expression/src/schema.rs @@ -50,6 +50,8 @@ pub const BASE_BLOCK_IDS_COLUMN_ID: u32 = u32::MAX - 6; // internal search column id. pub const SEARCH_MATCHED_COLUMN_ID: u32 = u32::MAX - 7; pub const SEARCH_SCORE_COLUMN_ID: u32 = u32::MAX - 8; +// internal vector score column id. +pub const VECTOR_SCORE_COLUMN_ID: u32 = u32::MAX - 9; pub const VIRTUAL_COLUMN_ID_START: u32 = 3_000_000_000; pub const VIRTUAL_COLUMNS_ID_UPPER: u32 = 3_000_001_000; @@ -66,6 +68,8 @@ pub const BASE_BLOCK_IDS_COL_NAME: &str = "_base_block_ids"; // internal search column name. pub const SEARCH_MATCHED_COL_NAME: &str = "_search_matched"; pub const SEARCH_SCORE_COL_NAME: &str = "_search_score"; +// internal vector score column name. +pub const VECTOR_SCORE_COL_NAME: &str = "_vector_score"; pub const CHANGE_ACTION_COL_NAME: &str = "change$action"; pub const CHANGE_IS_UPDATE_COL_NAME: &str = "change$is_update"; @@ -100,6 +104,7 @@ pub static INTERNAL_COLUMNS: LazyLock> = LazyLock::new(|| BASE_BLOCK_IDS_COL_NAME, SEARCH_MATCHED_COL_NAME, SEARCH_SCORE_COL_NAME, + VECTOR_SCORE_COL_NAME, CHANGE_ACTION_COL_NAME, CHANGE_IS_UPDATE_COL_NAME, CHANGE_ROW_ID_COL_NAME, @@ -114,7 +119,7 @@ pub static INTERNAL_COLUMNS: LazyLock> = LazyLock::new(|| #[inline] pub fn is_internal_column_id(column_id: ColumnId) -> bool { - column_id >= SEARCH_SCORE_COLUMN_ID + column_id >= VECTOR_SCORE_COLUMN_ID || (FILE_ROW_NUMBER_COLUMN_ID..=FILENAME_COLUMN_ID).contains(&column_id) } @@ -670,7 +675,10 @@ impl TableSchema { } } ( - TableDataType::Tuple { .. } | TableDataType::Array(_) | TableDataType::Map(_), + TableDataType::Tuple { .. } + | TableDataType::Array(_) + | TableDataType::Map(_) + | TableDataType::Vector(_), _, ) => { // ignore leaf columns diff --git a/src/query/expression/src/type_check.rs b/src/query/expression/src/type_check.rs index df503fdd1c1f1..2b60877f13d12 100755 --- a/src/query/expression/src/type_check.rs +++ b/src/query/expression/src/type_check.rs @@ -638,7 +638,11 @@ fn can_cast_to(src_ty: &DataType, dest_ty: &DataType) -> bool { { true } - + (DataType::Array(fields_src_ty), DataType::Vector(_)) + if matches!(&**fields_src_ty, DataType::Number(_) | DataType::Decimal(_)) => + { + true + } (DataType::Nullable(box inner_src_ty), DataType::Nullable(box inner_dest_ty)) | (DataType::Nullable(box inner_src_ty), inner_dest_ty) | (inner_src_ty, DataType::Nullable(box inner_dest_ty)) diff --git a/src/query/expression/src/types/decimal.rs b/src/query/expression/src/types/decimal.rs index 583d77522a57d..4e9e93d8d6467 100644 --- a/src/query/expression/src/types/decimal.rs +++ b/src/query/expression/src/types/decimal.rs @@ -176,6 +176,12 @@ pub enum DecimalScalar { } impl DecimalScalar { + pub fn to_float32(&self) -> f32 { + with_decimal_type!(|DECIMAL| match self { + DecimalScalar::DECIMAL(v, size) => v.to_float32(size.scale), + }) + } + pub fn to_float64(&self) -> f64 { with_decimal_type!(|DECIMAL| match self { DecimalScalar::DECIMAL(v, size) => v.to_float64(size.scale), diff --git a/src/query/expression/src/types/number.rs b/src/query/expression/src/types/number.rs index 358d7d6c467c5..0bd92a7d526c2 100644 --- a/src/query/expression/src/types/number.rs +++ b/src/query/expression/src/types/number.rs @@ -489,6 +489,14 @@ impl NumberScalar { } } + pub fn to_f32(&self) -> F32 { + crate::with_integer_mapped_type!(|NUM_TYPE| match self { + NumberScalar::NUM_TYPE(num) => (*num as f32).into(), + NumberScalar::Float32(num) => *num, + NumberScalar::Float64(num) => (num.into_inner() as f32).into(), + }) + } + pub fn to_f64(&self) -> F64 { crate::with_integer_mapped_type!(|NUM_TYPE| match self { NumberScalar::NUM_TYPE(num) => (*num as f64).into(), diff --git a/src/query/formats/src/field_decoder/nested.rs b/src/query/formats/src/field_decoder/nested.rs index 753fe52f694b0..b9e0d1ce1e696 100644 --- a/src/query/formats/src/field_decoder/nested.rs +++ b/src/query/formats/src/field_decoder/nested.rs @@ -449,19 +449,14 @@ impl NestedValues { reader.must_ignore_byte(b'[')?; let dimension = column.dimension(); let mut values = Vec::with_capacity(dimension); - for _ in 0..dimension { + for idx in 0..dimension { let _ = reader.ignore_white_spaces_or_comments(); - reader.must_ignore_byte(b',')?; - let _ = reader.ignore_white_spaces_or_comments(); - let res: Result = reader.read_float_text(); - match res { - Ok(v) => { - values.push(v.into()); - } - Err(err) => { - return Err(err); - } + if idx != 0 { + reader.must_ignore_byte(b',')?; } + let _ = reader.ignore_white_spaces_or_comments(); + let v: f32 = reader.read_float_text()?; + values.push(v.into()); } reader.must_ignore_byte(b']')?; column.push(&VectorScalarRef::Float32(&values)); diff --git a/src/query/functions/src/scalars/vector.rs b/src/query/functions/src/scalars/vector.rs index bec069210957b..5e7231b2feb07 100644 --- a/src/query/functions/src/scalars/vector.rs +++ b/src/query/functions/src/scalars/vector.rs @@ -12,20 +12,41 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::sync::Arc; + +use databend_common_exception::Result; +use databend_common_expression::types::AnyType; use databend_common_expression::types::ArrayType; use databend_common_expression::types::Buffer; +use databend_common_expression::types::DataType; use databend_common_expression::types::Float32Type; use databend_common_expression::types::Float64Type; +use databend_common_expression::types::NumberColumn; +use databend_common_expression::types::NumberDataType; +use databend_common_expression::types::NumberScalar; use databend_common_expression::types::StringType; +use databend_common_expression::types::VectorDataType; +use databend_common_expression::types::VectorScalarRef; use databend_common_expression::types::F32; use databend_common_expression::types::F64; use databend_common_expression::vectorize_with_builder_1_arg; use databend_common_expression::vectorize_with_builder_2_arg; +use databend_common_expression::Column; +use databend_common_expression::EvalContext; +use databend_common_expression::Function; use databend_common_expression::FunctionDomain; +use databend_common_expression::FunctionEval; +use databend_common_expression::FunctionFactory; use databend_common_expression::FunctionRegistry; +use databend_common_expression::FunctionSignature; +use databend_common_expression::Scalar; +use databend_common_expression::ScalarRef; +use databend_common_expression::Value; use databend_common_openai::OpenAI; use databend_common_vector::cosine_distance; use databend_common_vector::cosine_distance_64; +use databend_common_vector::l1_distance; +use databend_common_vector::l1_distance_64; use databend_common_vector::l2_distance; use databend_common_vector::l2_distance_64; @@ -37,12 +58,35 @@ pub fn register(registry: &mut FunctionRegistry) { |_, _, _| FunctionDomain::MayThrow, vectorize_with_builder_2_arg::, ArrayType, Float32Type>( |lhs, rhs, output, ctx| { - let l= + let l = unsafe { std::mem::transmute::, Buffer>(lhs) }; let r = unsafe { std::mem::transmute::, Buffer>(rhs) }; - match cosine_distance(l.as_slice(), r .as_slice()) { + match cosine_distance(l.as_slice(), r.as_slice()) { + Ok(dist) => { + output.push(F32::from(dist)); + } + Err(err) => { + ctx.set_error(output.len(), err.to_string()); + output.push(F32::from(0.0)); + } + } + } + ), + ); + + registry.register_passthrough_nullable_2_arg::, ArrayType, Float32Type, _, _>( + "l1_distance", + |_, _, _| FunctionDomain::MayThrow, + vectorize_with_builder_2_arg::, ArrayType, Float32Type>( + |lhs, rhs, output, ctx| { + let l = + unsafe { std::mem::transmute::, Buffer>(lhs) }; + let r = + unsafe { std::mem::transmute::, Buffer>(rhs) }; + + match l1_distance(l.as_slice(), r.as_slice()) { Ok(dist) => { output.push(F32::from(dist)); } @@ -63,12 +107,12 @@ pub fn register(registry: &mut FunctionRegistry) { |_, _, _| FunctionDomain::MayThrow, vectorize_with_builder_2_arg::, ArrayType, Float32Type>( |lhs, rhs, output, ctx| { - let l= + let l = unsafe { std::mem::transmute::, Buffer>(lhs) }; let r = unsafe { std::mem::transmute::, Buffer>(rhs) }; - match l2_distance(l.as_slice(), r .as_slice()) { + match l2_distance(l.as_slice(), r.as_slice()) { Ok(dist) => { output.push(F32::from(dist)); } @@ -91,7 +135,30 @@ pub fn register(registry: &mut FunctionRegistry) { let r = unsafe { std::mem::transmute::, Buffer>(rhs) }; - match cosine_distance_64(l.as_slice(), r .as_slice()) { + match cosine_distance_64(l.as_slice(), r.as_slice()) { + Ok(dist) => { + output.push(F64::from(dist)); + } + Err(err) => { + ctx.set_error(output.len(), err.to_string()); + output.push(F64::from(0.0)); + } + } + } + ), + ); + + registry.register_passthrough_nullable_2_arg::, ArrayType, Float64Type, _, _>( + "l1_distance", + |_, _, _| FunctionDomain::MayThrow, + vectorize_with_builder_2_arg::, ArrayType, Float64Type>( + |lhs, rhs, output, ctx| { + let l = + unsafe { std::mem::transmute::, Buffer>(lhs) }; + let r = + unsafe { std::mem::transmute::, Buffer>(rhs) }; + + match l1_distance_64(l.as_slice(), r.as_slice()) { Ok(dist) => { output.push(F64::from(dist)); } @@ -109,12 +176,12 @@ pub fn register(registry: &mut FunctionRegistry) { |_, _, _| FunctionDomain::MayThrow, vectorize_with_builder_2_arg::, ArrayType, Float64Type>( |lhs, rhs, output, ctx| { - let l= + let l = unsafe { std::mem::transmute::, Buffer>(lhs) }; let r = unsafe { std::mem::transmute::, Buffer>(rhs) }; - match l2_distance_64(l.as_slice(), r .as_slice()) { + match l2_distance_64(l.as_slice(), r.as_slice()) { Ok(dist) => { output.push(F64::from(dist)); } @@ -226,4 +293,127 @@ pub fn register(registry: &mut FunctionRegistry) { } }), ); + + let cosine_distance_factory = + FunctionFactory::Closure(Box::new(|_, args_type: &[DataType]| { + let args_type = check_args_type(args_type)?; + Some(Arc::new(Function { + signature: FunctionSignature { + name: "cosine_distance".to_string(), + args_type: args_type.clone(), + return_type: DataType::Number(NumberDataType::Float32), + }, + eval: FunctionEval::Scalar { + calc_domain: Box::new(|_, _| FunctionDomain::Full), + eval: Box::new(|args, ctx| calculate_distance(args, ctx, cosine_distance)), + }, + })) + })); + registry.register_function_factory("cosine_distance", cosine_distance_factory); + + let l1_distance_factory = FunctionFactory::Closure(Box::new(|_, args_type: &[DataType]| { + let args_type = check_args_type(args_type)?; + Some(Arc::new(Function { + signature: FunctionSignature { + name: "l1_distance".to_string(), + args_type: args_type.clone(), + return_type: DataType::Number(NumberDataType::Float32), + }, + eval: FunctionEval::Scalar { + calc_domain: Box::new(|_, _| FunctionDomain::Full), + eval: Box::new(|args, ctx| calculate_distance(args, ctx, l1_distance)), + }, + })) + })); + registry.register_function_factory("l1_distance", l1_distance_factory); + + let l2_distance_factory = FunctionFactory::Closure(Box::new(|_, args_type: &[DataType]| { + let args_type = check_args_type(args_type)?; + Some(Arc::new(Function { + signature: FunctionSignature { + name: "l2_distance".to_string(), + args_type: args_type.clone(), + return_type: DataType::Number(NumberDataType::Float32), + }, + eval: FunctionEval::Scalar { + calc_domain: Box::new(|_, _| FunctionDomain::Full), + eval: Box::new(|args, ctx| calculate_distance(args, ctx, l2_distance)), + }, + })) + })); + registry.register_function_factory("l2_distance", l2_distance_factory); +} + +fn check_args_type(args_type: &[DataType]) -> Option> { + if args_type.len() != 2 { + return None; + } + let args_type0 = args_type[0].remove_nullable(); + let vector_type0 = args_type0.as_vector()?; + let args_type1 = args_type[1].remove_nullable(); + let vector_type1 = args_type1.as_vector()?; + match (vector_type0, vector_type1) { + (VectorDataType::Int8(dim0), VectorDataType::Int8(dim1)) => { + if dim0 != dim1 { + return None; + } + } + (VectorDataType::Float32(dim0), VectorDataType::Float32(dim1)) => { + if dim0 != dim1 { + return None; + } + } + (_, _) => { + return None; + } + } + Some(args_type.to_vec()) +} + +fn calculate_distance( + args: &[Value], + _ctx: &mut EvalContext, + distance_fn: F, +) -> Value +where + F: Fn(&[f32], &[f32]) -> Result, +{ + let len_opt = args.iter().find_map(|arg| match arg { + Value::Column(col) => Some(col.len()), + _ => None, + }); + let len = len_opt.unwrap_or(1); + let mut builder = Vec::with_capacity(len); + for i in 0..len { + let lhs = unsafe { args[0].index_unchecked(i) }; + let rhs = unsafe { args[1].index_unchecked(i) }; + match (lhs, rhs) { + ( + ScalarRef::Vector(VectorScalarRef::Int8(lhs)), + ScalarRef::Vector(VectorScalarRef::Int8(rhs)), + ) => { + let l: Vec<_> = lhs.iter().map(|v| *v as f32).collect(); + let r: Vec<_> = rhs.iter().map(|v| *v as f32).collect(); + let dist = distance_fn(l.as_slice(), r.as_slice()).unwrap(); + builder.push(F32::from(dist)); + } + ( + ScalarRef::Vector(VectorScalarRef::Float32(lhs)), + ScalarRef::Vector(VectorScalarRef::Float32(rhs)), + ) => { + let l = unsafe { std::mem::transmute::<&[F32], &[f32]>(lhs) }; + let r = unsafe { std::mem::transmute::<&[F32], &[f32]>(rhs) }; + let dist = distance_fn(l, r).unwrap(); + builder.push(F32::from(dist)); + } + (_, _) => { + builder.push(F32::from(f32::MAX)); + } + } + } + if len_opt.is_some() { + Value::Column(Column::Number(NumberColumn::Float32(Buffer::from(builder)))) + } else { + Value::Scalar(Scalar::Number(NumberScalar::Float32(builder[0]))) + } } diff --git a/src/query/functions/tests/it/scalars/testdata/function_list.txt b/src/query/functions/tests/it/scalars/testdata/function_list.txt index f3aa029688e9a..a691ba428c0f8 100644 --- a/src/query/functions/tests/it/scalars/testdata/function_list.txt +++ b/src/query/functions/tests/it/scalars/testdata/function_list.txt @@ -1345,6 +1345,7 @@ Functions overloads: 1 cosine_distance(Array(Float32) NULL, Array(Float32) NULL) :: Float32 NULL 2 cosine_distance(Array(Float64), Array(Float64)) :: Float64 3 cosine_distance(Array(Float64) NULL, Array(Float64) NULL) :: Float64 NULL +4 cosine_distance FACTORY 0 cot(Float64) :: Float64 1 cot(Float64 NULL) :: Float64 NULL 0 crc32(String) :: UInt32 @@ -2299,10 +2300,16 @@ Functions overloads: 1 json_strip_nulls(Variant NULL) :: Variant NULL 0 json_typeof(Variant) :: String 1 json_typeof(Variant NULL) :: String NULL +0 l1_distance(Array(Float32), Array(Float32)) :: Float32 +1 l1_distance(Array(Float32) NULL, Array(Float32) NULL) :: Float32 NULL +2 l1_distance(Array(Float64), Array(Float64)) :: Float64 +3 l1_distance(Array(Float64) NULL, Array(Float64) NULL) :: Float64 NULL +4 l1_distance FACTORY 0 l2_distance(Array(Float32), Array(Float32)) :: Float32 1 l2_distance(Array(Float32) NULL, Array(Float32) NULL) :: Float32 NULL 2 l2_distance(Array(Float64), Array(Float64)) :: Float64 3 l2_distance(Array(Float64) NULL, Array(Float64) NULL) :: Float64 NULL +4 l2_distance FACTORY 0 left(String, UInt64) :: String 1 left(String NULL, UInt64 NULL) :: String NULL 0 length(Variant NULL) :: UInt32 NULL diff --git a/src/query/functions/tests/it/scalars/testdata/vector.txt b/src/query/functions/tests/it/scalars/testdata/vector.txt index dc4e081423cc6..f7626f452ef77 100644 --- a/src/query/functions/tests/it/scalars/testdata/vector.txt +++ b/src/query/functions/tests/it/scalars/testdata/vector.txt @@ -1,23 +1,171 @@ -ast : cosine_distance([a], [b]) -raw expr : cosine_distance(array(a::Float32), array(b::Float32)) -checked expr : cosine_distance(array(a), array(b)) -evaluation: -+--------+---------+---------+---------+ -| | a | b | Output | -+--------+---------+---------+---------+ -| Type | Float32 | Float32 | Float32 | -| Domain | {0..=2} | {3..=5} | Unknown | -| Row 0 | 0 | 3 | NaN | -| Row 1 | 1 | 4 | 0 | -| Row 2 | 2 | 5 | 0 | -+--------+---------+---------+---------+ -evaluation (internal): -+--------+----------------------+ -| Column | Data | -+--------+----------------------+ -| a | Float32([0, 1, 2]) | -| b | Float32([3, 4, 5]) | -| Output | Float32([NaN, 0, 0]) | -+--------+----------------------+ +ast : cosine_distance([1,0,0], [1,0,0]) +raw expr : cosine_distance(array(1, 0, 0), array(1, 0, 0)) +checked expr : cosine_distance(CAST(array(1_u8, 0_u8, 0_u8) AS Array(Float32)), CAST(array(1_u8, 0_u8, 0_u8) AS Array(Float32))) +optimized expr : 0_f32 +output type : Float32 +output domain : {0..=0} +output : 0 + + +ast : cosine_distance([1,0,0], [-1,0,0]) +raw expr : cosine_distance(array(1, 0, 0), array(minus(1), 0, 0)) +checked expr : cosine_distance(CAST(array(1_u8, 0_u8, 0_u8) AS Array(Float32)), CAST(array(minus(1_u8), CAST(0_u8 AS Int16), CAST(0_u8 AS Int16)) AS Array(Float32))) +optimized expr : 2_f32 +output type : Float32 +output domain : {2..=2} +output : 2 + + +ast : cosine_distance([1,2,3], [4,5,6]) +raw expr : cosine_distance(array(1, 2, 3), array(4, 5, 6)) +checked expr : cosine_distance(CAST(array(1_u8, 2_u8, 3_u8) AS Array(Float32)), CAST(array(4_u8, 5_u8, 6_u8) AS Array(Float32))) +optimized expr : 0.02536821_f32 +output type : Float32 +output domain : {0.02536821..=0.02536821} +output : 0.02536821 + + +ast : cosine_distance([0,0,0], [1,2,3]) +raw expr : cosine_distance(array(0, 0, 0), array(1, 2, 3)) +checked expr : cosine_distance(CAST(array(0_u8, 0_u8, 0_u8) AS Array(Float32)), CAST(array(1_u8, 2_u8, 3_u8) AS Array(Float32))) +optimized expr : NaN_f32 +output type : Float32 +output domain : {NaN..=NaN} +output : NaN + + +ast : cosine_distance([1,-2,3]::vector(3), [-4,5,-6]::vector(3)) +raw expr : cosine_distance(CAST(array(1, minus(2), 3) AS Vector(3)), CAST(array(minus(4), 5, minus(6)) AS Vector(3))) +checked expr : cosine_distance(CAST(array(CAST(1_u8 AS Int16), minus(2_u8), CAST(3_u8 AS Int16)) AS Vector(3)), CAST(array(minus(4_u8), CAST(5_u8 AS Int16), minus(6_u8)) AS Vector(3))) +optimized expr : 1.974632_f32 +output type : Float32 +output domain : {1.974632..=1.974632} +output : 1.974632 + + +ast : cosine_distance([0.1,0.2,0.3]::vector(3), [0.4,0.5,0.6]::vector(3)) +raw expr : cosine_distance(CAST(array(0.1, 0.2, 0.3) AS Vector(3)), CAST(array(0.4, 0.5, 0.6) AS Vector(3))) +checked expr : cosine_distance(CAST(array(0.1_d64(1,1), 0.2_d64(1,1), 0.3_d64(1,1)) AS Vector(3)), CAST(array(0.4_d64(1,1), 0.5_d64(1,1), 0.6_d64(1,1)) AS Vector(3))) +optimized expr : 0.02536827_f32 +output type : Float32 +output domain : {0.02536827..=0.02536827} +output : 0.02536827 + + +ast : cosine_distance([1,0]::vector(2), [0,1]::vector(2)) +raw expr : cosine_distance(CAST(array(1, 0) AS Vector(2)), CAST(array(0, 1) AS Vector(2))) +checked expr : cosine_distance(CAST(array(1_u8, 0_u8) AS Vector(2)), CAST(array(0_u8, 1_u8) AS Vector(2))) +optimized expr : 1_f32 +output type : Float32 +output domain : {1..=1} +output : 1 + + +ast : l1_distance([1,2,3], [1,2,3]) +raw expr : l1_distance(array(1, 2, 3), array(1, 2, 3)) +checked expr : l1_distance(CAST(array(1_u8, 2_u8, 3_u8) AS Array(Float32)), CAST(array(1_u8, 2_u8, 3_u8) AS Array(Float32))) +optimized expr : 0_f32 +output type : Float32 +output domain : {0..=0} +output : 0 + + +ast : l1_distance([1,2,3], [4,5,6]) +raw expr : l1_distance(array(1, 2, 3), array(4, 5, 6)) +checked expr : l1_distance(CAST(array(1_u8, 2_u8, 3_u8) AS Array(Float32)), CAST(array(4_u8, 5_u8, 6_u8) AS Array(Float32))) +optimized expr : 9_f32 +output type : Float32 +output domain : {9..=9} +output : 9 + + +ast : l1_distance([0,0,0], [1,2,3]) +raw expr : l1_distance(array(0, 0, 0), array(1, 2, 3)) +checked expr : l1_distance(CAST(array(0_u8, 0_u8, 0_u8) AS Array(Float32)), CAST(array(1_u8, 2_u8, 3_u8) AS Array(Float32))) +optimized expr : 6_f32 +output type : Float32 +output domain : {6..=6} +output : 6 + + +ast : l1_distance([1,-2,3]::vector(3), [-4,5,-6]::vector(3)) +raw expr : l1_distance(CAST(array(1, minus(2), 3) AS Vector(3)), CAST(array(minus(4), 5, minus(6)) AS Vector(3))) +checked expr : l1_distance(CAST(array(CAST(1_u8 AS Int16), minus(2_u8), CAST(3_u8 AS Int16)) AS Vector(3)), CAST(array(minus(4_u8), CAST(5_u8 AS Int16), minus(6_u8)) AS Vector(3))) +optimized expr : 21_f32 +output type : Float32 +output domain : {21..=21} +output : 21 + + +ast : l1_distance([0.1,0.2,0.3]::vector(3), [0.4,0.5,0.6]::vector(3)) +raw expr : l1_distance(CAST(array(0.1, 0.2, 0.3) AS Vector(3)), CAST(array(0.4, 0.5, 0.6) AS Vector(3))) +checked expr : l1_distance(CAST(array(0.1_d64(1,1), 0.2_d64(1,1), 0.3_d64(1,1)) AS Vector(3)), CAST(array(0.4_d64(1,1), 0.5_d64(1,1), 0.6_d64(1,1)) AS Vector(3))) +optimized expr : 0.9_f32 +output type : Float32 +output domain : {0.9..=0.9} +output : 0.9 + + +ast : l1_distance([1,2]::vector(2), [3,4]::vector(2)) +raw expr : l1_distance(CAST(array(1, 2) AS Vector(2)), CAST(array(3, 4) AS Vector(2))) +checked expr : l1_distance(CAST(array(1_u8, 2_u8) AS Vector(2)), CAST(array(3_u8, 4_u8) AS Vector(2))) +optimized expr : 4_f32 +output type : Float32 +output domain : {4..=4} +output : 4 + + +ast : l2_distance([1,2,3], [1,2,3]) +raw expr : l2_distance(array(1, 2, 3), array(1, 2, 3)) +checked expr : l2_distance(CAST(array(1_u8, 2_u8, 3_u8) AS Array(Float32)), CAST(array(1_u8, 2_u8, 3_u8) AS Array(Float32))) +optimized expr : 0_f32 +output type : Float32 +output domain : {0..=0} +output : 0 + + +ast : l2_distance([1,2,3], [4,5,6]) +raw expr : l2_distance(array(1, 2, 3), array(4, 5, 6)) +checked expr : l2_distance(CAST(array(1_u8, 2_u8, 3_u8) AS Array(Float32)), CAST(array(4_u8, 5_u8, 6_u8) AS Array(Float32))) +optimized expr : 5.196152_f32 +output type : Float32 +output domain : {5.196152..=5.196152} +output : 5.196152 + + +ast : l2_distance([0,0,0], [1,2,3]) +raw expr : l2_distance(array(0, 0, 0), array(1, 2, 3)) +checked expr : l2_distance(CAST(array(0_u8, 0_u8, 0_u8) AS Array(Float32)), CAST(array(1_u8, 2_u8, 3_u8) AS Array(Float32))) +optimized expr : 3.741658_f32 +output type : Float32 +output domain : {3.741658..=3.741658} +output : 3.741658 + + +ast : l2_distance([1,-2,3]::vector(3), [-4,5,-6]::vector(3)) +raw expr : l2_distance(CAST(array(1, minus(2), 3) AS Vector(3)), CAST(array(minus(4), 5, minus(6)) AS Vector(3))) +checked expr : l2_distance(CAST(array(CAST(1_u8 AS Int16), minus(2_u8), CAST(3_u8 AS Int16)) AS Vector(3)), CAST(array(minus(4_u8), CAST(5_u8 AS Int16), minus(6_u8)) AS Vector(3))) +optimized expr : 12.4499_f32 +output type : Float32 +output domain : {12.4499..=12.4499} +output : 12.4499 + + +ast : l2_distance([0.1,0.2,0.3]::vector(3), [0.4,0.5,0.6]::vector(3)) +raw expr : l2_distance(CAST(array(0.1, 0.2, 0.3) AS Vector(3)), CAST(array(0.4, 0.5, 0.6) AS Vector(3))) +checked expr : l2_distance(CAST(array(0.1_d64(1,1), 0.2_d64(1,1), 0.3_d64(1,1)) AS Vector(3)), CAST(array(0.4_d64(1,1), 0.5_d64(1,1), 0.6_d64(1,1)) AS Vector(3))) +optimized expr : 0.5196152_f32 +output type : Float32 +output domain : {0.5196152..=0.5196152} +output : 0.5196152 + + +ast : l2_distance([1,2]::vector(2), [3,4]::vector(2)) +raw expr : l2_distance(CAST(array(1, 2) AS Vector(2)), CAST(array(3, 4) AS Vector(2))) +checked expr : l2_distance(CAST(array(1_u8, 2_u8) AS Vector(2)), CAST(array(3_u8, 4_u8) AS Vector(2))) +optimized expr : 2.828427_f32 +output type : Float32 +output domain : {2.828427..=2.828427} +output : 2.828427 diff --git a/src/query/functions/tests/it/scalars/vector.rs b/src/query/functions/tests/it/scalars/vector.rs index 8421e5e61d590..5e5020aede4d0 100644 --- a/src/query/functions/tests/it/scalars/vector.rs +++ b/src/query/functions/tests/it/scalars/vector.rs @@ -14,8 +14,6 @@ use std::io::Write; -use databend_common_expression::types::*; -use databend_common_expression::FromData; use goldenfile::Mint; use super::run_ast; @@ -26,11 +24,62 @@ fn test_vector() { let file = &mut mint.new_goldenfile("vector.txt").unwrap(); test_vector_cosine_distance(file); + test_vector_l1_distance(file); + test_vector_l2_distance(file); } fn test_vector_cosine_distance(file: &mut impl Write) { - run_ast(file, "cosine_distance([a], [b])", &[ - ("a", Float32Type::from_data(vec![0f32, 1.0, 2.0])), - ("b", Float32Type::from_data(vec![3f32, 4.0, 5.0])), - ]); + run_ast(file, "cosine_distance([1,0,0], [1,0,0])", &[]); + run_ast(file, "cosine_distance([1,0,0], [-1,0,0])", &[]); + run_ast(file, "cosine_distance([1,2,3], [4,5,6])", &[]); + run_ast(file, "cosine_distance([0,0,0], [1,2,3])", &[]); + run_ast( + file, + "cosine_distance([1,-2,3]::vector(3), [-4,5,-6]::vector(3))", + &[], + ); + run_ast( + file, + "cosine_distance([0.1,0.2,0.3]::vector(3), [0.4,0.5,0.6]::vector(3))", + &[], + ); + run_ast( + file, + "cosine_distance([1,0]::vector(2), [0,1]::vector(2))", + &[], + ); +} + +fn test_vector_l1_distance(file: &mut impl Write) { + run_ast(file, "l1_distance([1,2,3], [1,2,3])", &[]); + run_ast(file, "l1_distance([1,2,3], [4,5,6])", &[]); + run_ast(file, "l1_distance([0,0,0], [1,2,3])", &[]); + run_ast( + file, + "l1_distance([1,-2,3]::vector(3), [-4,5,-6]::vector(3))", + &[], + ); + run_ast( + file, + "l1_distance([0.1,0.2,0.3]::vector(3), [0.4,0.5,0.6]::vector(3))", + &[], + ); + run_ast(file, "l1_distance([1,2]::vector(2), [3,4]::vector(2))", &[]); +} + +fn test_vector_l2_distance(file: &mut impl Write) { + run_ast(file, "l2_distance([1,2,3], [1,2,3])", &[]); + run_ast(file, "l2_distance([1,2,3], [4,5,6])", &[]); + run_ast(file, "l2_distance([0,0,0], [1,2,3])", &[]); + run_ast( + file, + "l2_distance([1,-2,3]::vector(3), [-4,5,-6]::vector(3))", + &[], + ); + run_ast( + file, + "l2_distance([0.1,0.2,0.3]::vector(3), [0.4,0.5,0.6]::vector(3))", + &[], + ); + run_ast(file, "l2_distance([1,2]::vector(2), [3,4]::vector(2))", &[]); } diff --git a/src/query/service/src/test_kits/block_writer.rs b/src/query/service/src/test_kits/block_writer.rs index 7bd9b7bed0729..6f81b9a8f1dbe 100644 --- a/src/query/service/src/test_kits/block_writer.rs +++ b/src/query/service/src/test_kits/block_writer.rs @@ -110,6 +110,8 @@ impl<'a> BlockWriter<'a> { None, None, None, + None, + None, Compression::Lz4Raw, Some(Utc::now()), ); diff --git a/src/query/service/tests/it/storages/fuse/bloom_index_meta_size.rs b/src/query/service/tests/it/storages/fuse/bloom_index_meta_size.rs index d16d635d824d7..520231794d2cb 100644 --- a/src/query/service/tests/it/storages/fuse/bloom_index_meta_size.rs +++ b/src/query/service/tests/it/storages/fuse/bloom_index_meta_size.rs @@ -337,6 +337,8 @@ fn build_test_segment_info( bloom_filter_index_size: 0, inverted_index_size: None, ngram_filter_index_size: None, + vector_index_size: None, + vector_index_location: None, virtual_block_meta: None, compression: Compression::Lz4, create_on: Some(Utc::now()), diff --git a/src/query/service/tests/it/storages/fuse/operations/internal_column.rs b/src/query/service/tests/it/storages/fuse/operations/internal_column.rs index 0300a5e1b8f03..805b24432413b 100644 --- a/src/query/service/tests/it/storages/fuse/operations/internal_column.rs +++ b/src/query/service/tests/it/storages/fuse/operations/internal_column.rs @@ -65,6 +65,7 @@ fn expected_data_block( base_block_ids: None, inner: None, matched_rows: block_meta.matched_rows.clone(), + vector_scores: block_meta.vector_scores.clone(), }; for internal_column in internal_columns { let column = internal_column.generate_column_values(&internal_column_meta, num_rows); diff --git a/src/query/service/tests/it/storages/fuse/operations/mutation/recluster_mutator.rs b/src/query/service/tests/it/storages/fuse/operations/mutation/recluster_mutator.rs index ab4ea10645464..97c3e9cf85f0d 100644 --- a/src/query/service/tests/it/storages/fuse/operations/mutation/recluster_mutator.rs +++ b/src/query/service/tests/it/storages/fuse/operations/mutation/recluster_mutator.rs @@ -79,6 +79,8 @@ async fn test_recluster_mutator_block_select() -> Result<()> { None, None, None, + None, + None, meta::Compression::Lz4Raw, Some(Utc::now()), )); diff --git a/src/query/service/tests/it/storages/fuse/operations/mutation/segments_compact_mutator.rs b/src/query/service/tests/it/storages/fuse/operations/mutation/segments_compact_mutator.rs index 97fbcebc9e0eb..ebdcabfd13662 100644 --- a/src/query/service/tests/it/storages/fuse/operations/mutation/segments_compact_mutator.rs +++ b/src/query/service/tests/it/storages/fuse/operations/mutation/segments_compact_mutator.rs @@ -779,6 +779,8 @@ impl CompactSegmentTestFixture { None, None, None, + None, + None, Compression::Lz4Raw, Some(Utc::now()), ); diff --git a/src/query/service/tests/it/storages/fuse/operations/read_plan.rs b/src/query/service/tests/it/storages/fuse/operations/read_plan.rs index fd2ee9c0d6914..09dccdb11816e 100644 --- a/src/query/service/tests/it/storages/fuse/operations/read_plan.rs +++ b/src/query/service/tests/it/storages/fuse/operations/read_plan.rs @@ -105,6 +105,8 @@ fn test_to_partitions() -> Result<()> { None, None, None, + None, + None, meta::Compression::Lz4Raw, Some(Utc::now()), )); diff --git a/src/query/service/tests/it/storages/fuse/statistics.rs b/src/query/service/tests/it/storages/fuse/statistics.rs index 64c1f89b04d3e..c87b524f9217f 100644 --- a/src/query/service/tests/it/storages/fuse/statistics.rs +++ b/src/query/service/tests/it/storages/fuse/statistics.rs @@ -635,6 +635,8 @@ fn test_reduce_block_meta() -> databend_common_exception::Result<()> { None, None, None, + None, + None, Compression::Lz4Raw, Some(Utc::now()), ); diff --git a/src/query/service/tests/it/storages/testdata/configs_table_basic.txt b/src/query/service/tests/it/storages/testdata/configs_table_basic.txt index 5b4a92ad7b69b..0cef36101265a 100644 --- a/src/query/service/tests/it/storages/testdata/configs_table_basic.txt +++ b/src/query/service/tests/it/storages/testdata/configs_table_basic.txt @@ -32,6 +32,9 @@ DB.Table: 'system'.'configs', Table: configs-table_id:1, ver:0, Engine: SystemCo | 'cache' | 'table_meta_snapshot_count' | '256' | '' | | 'cache' | 'table_meta_statistic_count' | '256' | '' | | 'cache' | 'table_prune_partitions_count' | '256' | '' | +| 'cache' | 'vector_index_filter_memory_ratio' | '0' | '' | +| 'cache' | 'vector_index_filter_size' | '2147483648' | '' | +| 'cache' | 'vector_index_meta_count' | '3000' | '' | | 'log' | 'dir' | './.databend/logs' | '' | | 'log' | 'file.dir' | './.databend/logs' | '' | | 'log' | 'file.format' | 'text' | '' | diff --git a/src/query/sql/src/executor/physical_plans/physical_table_scan.rs b/src/query/sql/src/executor/physical_plans/physical_table_scan.rs index 0541b88c9abd1..ea303927eda2b 100644 --- a/src/query/sql/src/executor/physical_plans/physical_table_scan.rs +++ b/src/query/sql/src/executor/physical_plans/physical_table_scan.rs @@ -564,6 +564,7 @@ impl PhysicalPlanBuilder { agg_index: None, change_type: scan.change_type.clone(), inverted_index: scan.inverted_index.clone(), + vector_index: scan.vector_index.clone(), sample: scan.sample.clone(), }) } diff --git a/src/query/sql/src/planner/binder/bind_context.rs b/src/query/sql/src/planner/binder/bind_context.rs index 2cf026e08983e..e3015508ce3ad 100644 --- a/src/query/sql/src/planner/binder/bind_context.rs +++ b/src/query/sql/src/planner/binder/bind_context.rs @@ -25,6 +25,7 @@ use databend_common_ast::ast::WindowSpec; use databend_common_ast::Span; use databend_common_catalog::plan::InternalColumn; use databend_common_catalog::plan::InvertedIndexInfo; +use databend_common_catalog::plan::VectorIndexInfo; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::ColumnId; @@ -143,6 +144,8 @@ pub struct BindContext { pub inverted_index_map: Box>, + pub vector_index_map: Box>, + /// Whether allow rewrite as virtual column and pushdown. pub allow_virtual_column: bool, @@ -217,6 +220,7 @@ impl BindContext { have_udf_script: false, have_udf_server: false, inverted_index_map: Box::default(), + vector_index_map: Box::default(), allow_virtual_column: false, expr_context: ExprContext::default(), planning_agg_index: false, @@ -261,6 +265,7 @@ impl BindContext { have_udf_script: false, have_udf_server: false, inverted_index_map: Box::default(), + vector_index_map: Box::default(), allow_virtual_column: parent.allow_virtual_column, expr_context: ExprContext::default(), planning_agg_index: false, diff --git a/src/query/sql/src/planner/binder/bind_mutation/mutation_expression.rs b/src/query/sql/src/planner/binder/bind_mutation/mutation_expression.rs index 0938ea87dda2c..85ba4317ee0f5 100644 --- a/src/query/sql/src/planner/binder/bind_mutation/mutation_expression.rs +++ b/src/query/sql/src/planner/binder/bind_mutation/mutation_expression.rs @@ -465,7 +465,7 @@ impl Binder { let row_id_index: usize = column_binding.index; - *expr = expr.add_column_index_to_scans(table_index, row_id_index, &None); + *expr = expr.add_column_index_to_scans(table_index, row_id_index, &None, &None); self.metadata .write() diff --git a/src/query/sql/src/planner/binder/binder.rs b/src/query/sql/src/planner/binder/binder.rs index 51d18d059ed39..4430dddf37ce2 100644 --- a/src/query/sql/src/planner/binder/binder.rs +++ b/src/query/sql/src/planner/binder/binder.rs @@ -1105,13 +1105,20 @@ impl<'a> Binder { .to_string(), )); } + let mut vector_index_map = mem::take(&mut bind_context.vector_index_map); for ((table_index, _), column_index) in bound_internal_columns.iter() { let inverted_index = inverted_index_map.shift_remove(table_index).map(|mut i| { i.has_score = has_score; i }); - s_expr = s_expr.add_column_index_to_scans(*table_index, *column_index, &inverted_index); + let vector_index = vector_index_map.shift_remove(table_index); + s_expr = s_expr.add_column_index_to_scans( + *table_index, + *column_index, + &inverted_index, + &vector_index, + ); } Ok(s_expr) } diff --git a/src/query/sql/src/planner/binder/ddl/index.rs b/src/query/sql/src/planner/binder/ddl/index.rs index dbf5c8bde93a6..b872de8bfd37f 100644 --- a/src/query/sql/src/planner/binder/ddl/index.rs +++ b/src/query/sql/src/planner/binder/ddl/index.rs @@ -45,6 +45,7 @@ use databend_common_meta_app::schema::IndexNameIdent; use databend_storages_common_table_meta::meta::Location; use derive_visitor::Drive; use derive_visitor::DriveMut; +use itertools::Itertools; use crate::binder::Binder; use crate::optimizer::optimize; @@ -104,6 +105,19 @@ fn is_valid_index_record_values>(opt_val: S) -> bool { INDEX_RECORD_VALUES.contains(opt_val.as_ref()) } +// valid values for vector index distance +static INDEX_DISTANCE_VALUES: LazyLock> = LazyLock::new(|| { + let mut r = HashSet::new(); + r.insert("cosine"); + r.insert("l1"); + r.insert("l2"); + r +}); + +fn is_valid_index_distance_values>(opt_val: S) -> bool { + INDEX_DISTANCE_VALUES.contains(opt_val.as_ref()) +} + impl Binder { #[async_backtrace::framed] pub(in crate::planner::binder) async fn bind_query_index( @@ -750,6 +764,23 @@ impl Binder { } options.insert("ef_construct".to_string(), value); } + "distance" => { + let raw_distances: Vec<&str> = value.split(',').collect(); + let mut distances = BTreeSet::new(); + for raw_distance in raw_distances { + let distance = raw_distance.trim(); + if !is_valid_index_distance_values(distance) { + return Err(ErrorCode::IndexOptionInvalid(format!( + "value `{distance}` is invalid index distance type", + ))); + } + distances.insert(distance); + } + options.insert( + "distance".to_string(), + distances.into_iter().join(",").to_string(), + ); + } _ => { return Err(ErrorCode::IndexOptionInvalid(format!( "index option `{key}` is invalid key for create vector index statement", @@ -757,6 +788,12 @@ impl Binder { } } } + if !options.contains_key("distance") { + return Err(ErrorCode::IndexOptionInvalid( + "must specify `distance` option, valid values are: `cosine`, `l1` and `l2`" + .to_string(), + )); + } Ok(options) } diff --git a/src/query/sql/src/planner/binder/internal_column_factory.rs b/src/query/sql/src/planner/binder/internal_column_factory.rs index e43f56fe01608..90fc2a2db5e58 100644 --- a/src/query/sql/src/planner/binder/internal_column_factory.rs +++ b/src/query/sql/src/planner/binder/internal_column_factory.rs @@ -27,6 +27,7 @@ use databend_common_expression::SEARCH_MATCHED_COL_NAME; use databend_common_expression::SEARCH_SCORE_COL_NAME; use databend_common_expression::SEGMENT_NAME_COL_NAME; use databend_common_expression::SNAPSHOT_NAME_COL_NAME; +use databend_common_expression::VECTOR_SCORE_COL_NAME; #[ctor] pub static INTERNAL_COLUMN_FACTORY: InternalColumnFactory = InternalColumnFactory::init(); @@ -79,6 +80,11 @@ impl InternalColumnFactory { InternalColumn::new(SEARCH_SCORE_COL_NAME, InternalColumnType::SearchScore), ); + internal_columns.insert( + VECTOR_SCORE_COL_NAME.to_string(), + InternalColumn::new(VECTOR_SCORE_COL_NAME, InternalColumnType::VectorScore), + ); + internal_columns.insert( FILENAME_COLUMN_NAME.to_string(), InternalColumn::new(FILENAME_COLUMN_NAME, InternalColumnType::FileName), diff --git a/src/query/sql/src/planner/binder/table.rs b/src/query/sql/src/planner/binder/table.rs index c659a4b12910a..b63ae6d0cf0f5 100644 --- a/src/query/sql/src/planner/binder/table.rs +++ b/src/query/sql/src/planner/binder/table.rs @@ -187,6 +187,7 @@ impl Binder { have_udf_script: false, have_udf_server: false, inverted_index_map: Box::default(), + vector_index_map: Box::default(), allow_virtual_column: false, expr_context: ExprContext::default(), planning_agg_index: false, diff --git a/src/query/sql/src/planner/optimizer/ir/expr/s_expr.rs b/src/query/sql/src/planner/optimizer/ir/expr/s_expr.rs index 4900fe5167077..a2fd2194e2400 100644 --- a/src/query/sql/src/planner/optimizer/ir/expr/s_expr.rs +++ b/src/query/sql/src/planner/optimizer/ir/expr/s_expr.rs @@ -17,6 +17,7 @@ use std::sync::Arc; use std::sync::Mutex; use databend_common_catalog::plan::InvertedIndexInfo; +use databend_common_catalog::plan::VectorIndexInfo; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use educe::Educe; @@ -401,6 +402,7 @@ impl SExpr { table_index: IndexType, column_index: IndexType, inverted_index: &Option, + vector_index: &Option, ) -> SExpr { #[recursive::recursive] fn add_column_index_to_scans_recursive( @@ -408,6 +410,7 @@ impl SExpr { column_index: IndexType, table_index: IndexType, inverted_index: &Option, + vector_index: &Option, ) -> SExpr { let mut s_expr = s_expr.clone(); s_expr.plan = if let RelOperator::Scan(mut p) = (*s_expr.plan).clone() { @@ -416,6 +419,9 @@ impl SExpr { if inverted_index.is_some() { p.inverted_index = inverted_index.clone(); } + if vector_index.is_some() { + p.vector_index = vector_index.clone(); + } } Arc::new(p.into()) } else { @@ -432,6 +438,7 @@ impl SExpr { column_index, table_index, inverted_index, + vector_index, ))); } @@ -441,7 +448,13 @@ impl SExpr { } } - add_column_index_to_scans_recursive(self, column_index, table_index, inverted_index) + add_column_index_to_scans_recursive( + self, + column_index, + table_index, + inverted_index, + vector_index, + ) } // The method will clear the applied rules of current SExpr and its children. diff --git a/src/query/sql/src/planner/plans/scan.rs b/src/query/sql/src/planner/plans/scan.rs index 2f2227bf16a96..d7a8a0d3f136f 100644 --- a/src/query/sql/src/planner/plans/scan.rs +++ b/src/query/sql/src/planner/plans/scan.rs @@ -18,6 +18,7 @@ use std::sync::Arc; use databend_common_ast::ast::SampleConfig; use databend_common_catalog::plan::InvertedIndexInfo; +use databend_common_catalog::plan::VectorIndexInfo; use databend_common_catalog::statistics::BasicColumnStatistics; use databend_common_catalog::table::TableStatistics; use databend_common_catalog::table_context::TableContext; @@ -104,6 +105,7 @@ pub struct Scan { // Whether to update stream columns. pub update_stream_columns: bool, pub inverted_index: Option, + pub vector_index: Option, // Lazy row fetch. pub is_lazy_table: bool, pub sample: Option, @@ -146,6 +148,7 @@ impl Scan { change_type: self.change_type.clone(), update_stream_columns: self.update_stream_columns, inverted_index: self.inverted_index.clone(), + vector_index: self.vector_index.clone(), is_lazy_table: self.is_lazy_table, sample: self.sample.clone(), scan_id: self.scan_id, diff --git a/src/query/sql/src/planner/semantic/type_check.rs b/src/query/sql/src/planner/semantic/type_check.rs index 906bdcb56ebc6..2278bf26d9405 100644 --- a/src/query/sql/src/planner/semantic/type_check.rs +++ b/src/query/sql/src/planner/semantic/type_check.rs @@ -56,6 +56,7 @@ use databend_common_catalog::plan::InternalColumn; use databend_common_catalog::plan::InternalColumnType; use databend_common_catalog::plan::InvertedIndexInfo; use databend_common_catalog::plan::InvertedIndexOption; +use databend_common_catalog::plan::VectorIndexInfo; use databend_common_catalog::table_context::TableContext; use databend_common_compress::CompressAlgorithm; use databend_common_compress::DecompressDecoder; @@ -77,6 +78,7 @@ use databend_common_expression::types::Decimal; use databend_common_expression::types::NumberDataType; use databend_common_expression::types::NumberScalar; use databend_common_expression::types::F32; +use databend_common_expression::Column; use databend_common_expression::ColumnIndex; use databend_common_expression::Constant; use databend_common_expression::ConstantFolder; @@ -90,6 +92,7 @@ use databend_common_expression::Scalar; use databend_common_expression::TableDataType; use databend_common_expression::SEARCH_MATCHED_COL_NAME; use databend_common_expression::SEARCH_SCORE_COL_NAME; +use databend_common_expression::VECTOR_SCORE_COL_NAME; use databend_common_functions::aggregates::AggregateFunctionFactory; use databend_common_functions::is_builtin_function; use databend_common_functions::ASYNC_FUNCTIONS; @@ -99,6 +102,8 @@ use databend_common_functions::GENERAL_SEARCH_FUNCTIONS; use databend_common_functions::GENERAL_WINDOW_FUNCTIONS; use databend_common_functions::GENERAL_WITHIN_GROUP_FUNCTIONS; use databend_common_functions::RANK_WINDOW_FUNCTIONS; +use databend_common_license::license::Feature; +use databend_common_license::license_manager::LicenseManagerSwitch; use databend_common_meta_app::principal::LambdaUDF; use databend_common_meta_app::principal::UDAFScript; use databend_common_meta_app::principal::UDFDefinition; @@ -108,6 +113,7 @@ use databend_common_meta_app::schema::dictionary_name_ident::DictionaryNameIdent use databend_common_meta_app::schema::DictionaryIdentity; use databend_common_meta_app::schema::GetSequenceReq; use databend_common_meta_app::schema::SequenceIdent; +use databend_common_meta_app::schema::TableIndexType; use databend_common_storage::init_stage_operator; use databend_common_users::UserApiProvider; use derive_visitor::Drive; @@ -2705,6 +2711,9 @@ impl<'a> TypeChecker<'a> { let mut index_schema = None; let mut index_options = BTreeMap::new(); for table_index in table_indexes.values() { + if table_index.index_type != TableIndexType::Inverted { + continue; + } if column_ids .iter() .all(|id| table_index.column_ids.contains(id)) @@ -2922,6 +2931,11 @@ impl<'a> TypeChecker<'a> { { return rewritten_variant_expr; } + if let Some(rewritten_vector_expr) = + self.try_rewrite_vector_function(span, func_name, &args) + { + return rewritten_vector_expr; + } self.resolve_scalar_function_call(span, func_name, params, args) } @@ -4368,6 +4382,189 @@ impl<'a> TypeChecker<'a> { None } + fn vector_functions() -> &'static [Ascii<&'static str>] { + static VECTOR_FUNCTIONS: &[Ascii<&'static str>] = &[ + Ascii::new("cosine_distance"), + Ascii::new("l1_distance"), + Ascii::new("l2_distance"), + ]; + VECTOR_FUNCTIONS + } + + fn try_rewrite_vector_function( + &mut self, + span: Span, + func_name: &str, + args: &[ScalarExpr], + ) -> Option>> { + // Try rewrite vector distance function to vector score internal column, + // so that the vector index can be used to accelerate the query. + let uni_case_func_name = Ascii::new(func_name); + if Self::vector_functions().contains(&uni_case_func_name) { + match args { + [ScalarExpr::BoundColumnRef(BoundColumnRef { + column: + ColumnBinding { + table_index, + database_name, + table_name, + column_name, + data_type, + .. + }, + .. + }), ScalarExpr::CastExpr(CastExpr { + argument, + target_type, + .. + })] + | [ScalarExpr::CastExpr(CastExpr { + argument, + target_type, + .. + }), ScalarExpr::BoundColumnRef(BoundColumnRef { + column: + ColumnBinding { + table_index, + database_name, + table_name, + column_name, + data_type, + .. + }, + .. + })] => { + let col_data_type = data_type.remove_nullable(); + if table_index.is_some() + && matches!(col_data_type, DataType::Vector(_)) + && matches!(&**argument, ScalarExpr::ConstantExpr(_)) + && matches!(&**target_type, DataType::Vector(_)) + && LicenseManagerSwitch::instance() + .check_enterprise_enabled( + self.ctx.get_license_key(), + Feature::VectorIndex, + ) + .is_ok() + { + let table_index = table_index.unwrap(); + let table_entry = self.metadata.read().table(table_index).clone(); + let table = table_entry.table(); + let table_info = table.get_table_info(); + let table_schema = table_info.schema(); + let table_indexes = &table_info.meta.indexes; + if self + .bind_context + .vector_index_map + .contains_key(&table_index) + { + return None; + } + let Ok(column_id) = table_schema.column_id_of(column_name) else { + return None; + }; + for vector_index in table_indexes.values() { + if vector_index.index_type != TableIndexType::Vector { + continue; + } + let Some(distances) = vector_index.options.get("distance") else { + continue; + }; + // distance_type must match function name + let mut matched_distance = false; + let distance_types: Vec<&str> = distances.split(',').collect(); + for distance_type in distance_types { + if func_name.starts_with(distance_type) { + matched_distance = true; + break; + } + } + if !matched_distance { + continue; + } + if vector_index.column_ids.contains(&column_id) { + let internal_column = InternalColumn::new( + VECTOR_SCORE_COL_NAME, + InternalColumnType::VectorScore, + ); + let internal_column_binding = InternalColumnBinding { + database_name: database_name.clone(), + table_name: table_name.clone(), + internal_column, + }; + let Ok(column_binding) = + self.bind_context.add_internal_column_binding( + &internal_column_binding, + self.metadata.clone(), + Some(table_index), + false, + ) + else { + return None; + }; + + let new_column = ScalarExpr::BoundColumnRef(BoundColumnRef { + span, + column: column_binding, + }); + + let arg = ConstantExpr::try_from(*argument.clone()).unwrap(); + let Scalar::Array(arg_col) = arg.value else { + return None; + }; + + let col_vector_type = col_data_type.as_vector().unwrap(); + let col_dimension = col_vector_type.dimension() as usize; + let arg_vector_type = target_type.as_vector().unwrap(); + let arg_dimension = arg_vector_type.dimension() as usize; + if col_dimension != arg_dimension || arg_col.len() != col_dimension + { + return None; + } + let mut query_values = Vec::with_capacity(arg_col.len()); + match arg_col { + Column::Number(num_col) => { + for i in 0..num_col.len() { + let num = unsafe { num_col.index_unchecked(i) }; + query_values.push(num.to_f32()); + } + } + Column::Decimal(dec_col) => { + for i in 0..dec_col.len() { + let dec = unsafe { dec_col.index_unchecked(i) }; + query_values.push(F32::from(dec.to_float32())); + } + } + _ => { + return None; + } + } + + let index_info = VectorIndexInfo { + index_name: vector_index.name.clone(), + index_version: vector_index.version.clone(), + index_options: vector_index.options.clone(), + column_id, + func_name: func_name.to_string(), + query_values, + }; + self.bind_context + .vector_index_map + .insert(table_index, index_info); + + return Some(Ok(Box::new(( + new_column, + DataType::Number(NumberDataType::Float32), + )))); + } + } + } + } + _ => {} + } + } + None + } + fn resolve_trim_function( &mut self, span: Span, diff --git a/src/query/storages/common/cache/src/cache_items.rs b/src/query/storages/common/cache/src/cache_items.rs index 3b3df7f23f7b7..1f39fd50c10ef 100644 --- a/src/query/storages/common/cache/src/cache_items.rs +++ b/src/query/storages/common/cache/src/cache_items.rs @@ -21,6 +21,8 @@ pub use databend_storages_common_index::filters::FilterImpl; pub use databend_storages_common_index::BloomIndexMeta; pub use databend_storages_common_index::InvertedIndexFile; pub use databend_storages_common_index::InvertedIndexMeta; +pub use databend_storages_common_index::VectorIndexFile; +pub use databend_storages_common_index::VectorIndexMeta; pub use databend_storages_common_table_meta::meta::column_oriented_segment::ColumnOrientedSegment; pub use databend_storages_common_table_meta::meta::BlockMeta; pub use databend_storages_common_table_meta::meta::CompactSegmentInfo; diff --git a/src/query/storages/common/cache/src/caches.rs b/src/query/storages/common/cache/src/caches.rs index cfe565a00349b..944462e5bd4bc 100644 --- a/src/query/storages/common/cache/src/caches.rs +++ b/src/query/storages/common/cache/src/caches.rs @@ -52,6 +52,9 @@ pub type BloomIndexMetaCache = HybridCache; pub type InvertedIndexMetaCache = InMemoryLruCache; pub type InvertedIndexFileCache = InMemoryLruCache; +pub type VectorIndexMetaCache = InMemoryLruCache; +pub type VectorIndexFileCache = InMemoryLruCache; + /// In memory object cache of parquet FileMetaData of external parquet rs files pub type ParquetMetaDataCache = InMemoryLruCache; @@ -151,6 +154,20 @@ impl CachedObject for InvertedIndexMeta { } } +impl CachedObject for VectorIndexFile { + type Cache = VectorIndexFileCache; + fn cache() -> Option { + CacheManager::instance().get_vector_index_file_cache() + } +} + +impl CachedObject for VectorIndexMeta { + type Cache = VectorIndexMetaCache; + fn cache() -> Option { + CacheManager::instance().get_vector_index_meta_cache() + } +} + pub struct CacheValue { inner: Arc, mem_bytes: usize, @@ -280,6 +297,24 @@ impl From for CacheValue { } } +impl From for CacheValue { + fn from(value: VectorIndexMeta) -> Self { + CacheValue { + inner: Arc::new(value), + mem_bytes: 0, + } + } +} + +impl From for CacheValue { + fn from(value: VectorIndexFile) -> Self { + CacheValue { + mem_bytes: std::mem::size_of::() + value.data.len(), + inner: Arc::new(value), + } + } +} + impl From for CacheValue { fn from(value: ParquetMetaData) -> Self { CacheValue { diff --git a/src/query/storages/common/cache/src/manager.rs b/src/query/storages/common/cache/src/manager.rs index 1f7f12e91a335..2e4aeb6b0fb6b 100644 --- a/src/query/storages/common/cache/src/manager.rs +++ b/src/query/storages/common/cache/src/manager.rs @@ -42,6 +42,8 @@ use crate::caches::PrunePartitionsCache; use crate::caches::SegmentBlockMetasCache; use crate::caches::TableSnapshotCache; use crate::caches::TableSnapshotStatisticCache; +use crate::caches::VectorIndexFileCache; +use crate::caches::VectorIndexMetaCache; use crate::providers::HybridCache; use crate::providers::HybridCacheExt; use crate::CacheAccessor; @@ -104,6 +106,8 @@ pub struct CacheManager { bloom_index_meta_cache: CacheSlot, inverted_index_meta_cache: CacheSlot, inverted_index_file_cache: CacheSlot, + vector_index_meta_cache: CacheSlot, + vector_index_file_cache: CacheSlot, prune_partitions_cache: CacheSlot, parquet_meta_data_cache: CacheSlot, in_memory_table_data_cache: CacheSlot, @@ -223,6 +227,8 @@ impl CacheManager { column_oriented_segment_info_cache: CacheSlot::new(None), inverted_index_meta_cache: CacheSlot::new(None), inverted_index_file_cache: CacheSlot::new(None), + vector_index_meta_cache: CacheSlot::new(None), + vector_index_file_cache: CacheSlot::new(None), prune_partitions_cache: CacheSlot::new(None), parquet_meta_data_cache: CacheSlot::new(None), table_statistic_cache: CacheSlot::new(None), @@ -302,6 +308,25 @@ impl CacheManager { MEMORY_CACHE_INVERTED_INDEX_FILE, inverted_index_file_size, ); + + let vector_index_meta_cache = Self::new_items_cache_slot( + MEMORY_CACHE_VECTOR_INDEX_FILE_META_DATA, + config.vector_index_meta_count as usize, + ); + + // setup in-memory vector index filter cache + let vector_index_file_size = if config.vector_index_filter_memory_ratio != 0 { + (*max_server_memory_usage as usize) + * config.vector_index_filter_memory_ratio as usize + / 100 + } else { + config.vector_index_filter_size as usize + }; + let vector_index_file_cache = Self::new_bytes_cache_slot( + MEMORY_CACHE_VECTOR_INDEX_FILE, + vector_index_file_size, + ); + let prune_partitions_cache = Self::new_items_cache_slot( MEMORY_CACHE_PRUNE_PARTITIONS, config.table_prune_partitions_count as usize, @@ -335,6 +360,8 @@ impl CacheManager { bloom_index_meta_cache, inverted_index_meta_cache, inverted_index_file_cache, + vector_index_meta_cache, + vector_index_file_cache, prune_partitions_cache, table_statistic_cache, in_memory_table_data_cache, @@ -417,6 +444,14 @@ impl CacheManager { let cache = &self.inverted_index_meta_cache; Self::set_items_capacity(cache, new_capacity, name); } + MEMORY_CACHE_VECTOR_INDEX_FILE => { + let cache = &self.vector_index_file_cache; + Self::set_bytes_capacity(cache, new_capacity, name); + } + MEMORY_CACHE_VECTOR_INDEX_FILE_META_DATA => { + let cache = &self.vector_index_meta_cache; + Self::set_items_capacity(cache, new_capacity, name); + } HYBRID_CACHE_BLOOM_INDEX_FILE_META_DATA | IN_MEMORY_CACHE_BLOOM_INDEX_FILE_META_DATA => { Self::set_hybrid_cache_items_capacity( @@ -593,6 +628,14 @@ impl CacheManager { self.inverted_index_file_cache.get() } + pub fn get_vector_index_meta_cache(&self) -> Option { + self.vector_index_meta_cache.get() + } + + pub fn get_vector_index_file_cache(&self) -> Option { + self.vector_index_file_cache.get() + } + pub fn get_prune_partitions_cache(&self) -> Option { self.prune_partitions_cache.get() } @@ -736,6 +779,8 @@ const MEMORY_CACHE_PRUNE_PARTITIONS: &str = "memory_cache_prune_partitions"; const MEMORY_CACHE_INVERTED_INDEX_FILE: &str = "memory_cache_inverted_index_file"; const MEMORY_CACHE_INVERTED_INDEX_FILE_META_DATA: &str = "memory_cache_inverted_index_file_meta_data"; +const MEMORY_CACHE_VECTOR_INDEX_FILE: &str = "memory_cache_vector_index_file"; +const MEMORY_CACHE_VECTOR_INDEX_FILE_META_DATA: &str = "memory_cache_vector_index_file_meta_data"; const HYBRID_CACHE_BLOOM_INDEX_FILE_META_DATA: &str = "cache_bloom_index_file_meta_data"; const HYBRID_CACHE_COLUMN_DATA: &str = "cache_column_data"; @@ -985,6 +1030,8 @@ mod tests { bloom_filter_index_size: 0, inverted_index_size: None, ngram_filter_index_size: None, + vector_index_location: None, + vector_index_size: None, virtual_block_meta: None, compression: Compression::Lz4, create_on: None, diff --git a/src/query/storages/common/index/Cargo.toml b/src/query/storages/common/index/Cargo.toml index 3fb76c396cca7..7f230d41573a1 100644 --- a/src/query/storages/common/index/Cargo.toml +++ b/src/query/storages/common/index/Cargo.toml @@ -14,26 +14,40 @@ databend-common-ast = { workspace = true } databend-common-exception = { workspace = true } databend-common-expression = { workspace = true } databend-common-functions = { workspace = true } +databend-common-vector = { workspace = true } databend-storages-common-table-meta = { workspace = true } anyerror = { workspace = true } bincode = { workspace = true, features = ["serde"] } +bitvec = { workspace = true } +bytemuck = { workspace = true, features = ["derive", "extern_crate_alloc", "must_cast", "transparentwrapper_extra"] } bytes = { workspace = true } cbordata = { workspace = true } fastrace = { workspace = true } +feistel-permutation-rs = { workspace = true } goldenfile = { workspace = true } +itertools = { workspace = true } jsonb = { workspace = true } levenshtein_automata = { workspace = true } log = { workspace = true } match-template = { workspace = true } +num-traits = { workspace = true } +num_cpus = { workspace = true } +ordered-float = { workspace = true } +parking_lot = { workspace = true } parquet = { workspace = true } +rand = { workspace = true } +rayon = { workspace = true } roaring = { workspace = true } +self_cell = { workspace = true } serde = { workspace = true } +serde_json = { workspace = true } tantivy = { workspace = true } tantivy-common = { workspace = true } tantivy-fst = { workspace = true } thiserror = { workspace = true } xorfilter-rs = { workspace = true, features = ["cbordata"] } +zerocopy = { workspace = true, features = ["derive"] } [dev-dependencies] divan = { workspace = true } diff --git a/src/query/storages/common/index/src/hnsw_index/common/bitpacking.rs b/src/query/storages/common/index/src/hnsw_index/common/bitpacking.rs new file mode 100644 index 0000000000000..54015bbed6b3b --- /dev/null +++ b/src/query/storages/common/index/src/hnsw_index/common/bitpacking.rs @@ -0,0 +1,407 @@ +// Copyright Qdrant +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::num::NonZero; +use std::num::Saturating; + +use num_traits::AsPrimitive; +use num_traits::ConstOne; +use num_traits::PrimInt; +use num_traits::Unsigned; + +pub trait ConstBits { + /// The size of this integer type in bits. + const BITS: u32; +} + +macro_rules! impl_const_bits { + ($($t:ty),* $(,)?) => { + $( + impl ConstBits for $t { + const BITS: u32 = Self::BITS; + } + impl ConstBits for NonZero<$t> { + const BITS: u32 = Self::BITS; + } + impl ConstBits for Saturating<$t> { + const BITS: u32 = Self::BITS; + } + )* + }; +} + +impl_const_bits!(i8, i16, i32, i64, i128, isize); +impl_const_bits!(u8, u16, u32, u64, u128, usize); + +/// The internal buffer type for [`BitWriter`] and [`BitReader`]. +/// Instead of writing/reading a single byte at a time, they write/read +/// `size_of::()` bytes at once, for a better performance. +/// This is an implementation detail and shouldn't affect the data layout. +/// Any unsigned numeric type larger than `u32` should work. +type Buf = u64; + +/// Writes bits to the `u8` vector. +/// It's like [`std::io::Write`], but for bits rather than bytes. +pub struct BitWriter<'a> { + output: &'a mut Vec, + buf: Buf, + buf_bits: u8, +} + +impl<'a> BitWriter<'a> { + /// Create a new writer that appends bits to the `output`. + #[inline] + pub fn new(output: &'a mut Vec) -> Self { + Self { + output, + buf: 0, + buf_bits: 0, + } + } + + /// Write a `value` of `bits` bits to the output. + /// + /// The `bits` must be less than or equal to 32, and the `value` must fit in + /// the `bits` bits. + #[inline] + pub fn write>(&mut self, value: T, bits: u8) { + let value = value.into(); + + #[cfg(test)] + debug_assert!(u32::from(bits) <= T::BITS && packed_bits(value) <= bits); + + self.buf |= value << self.buf_bits; + self.buf_bits += bits; + if self.buf_bits >= Buf::BITS as u8 { + // ┌──value───┐┌───initial self.buf────┐ + // rrrrrvvvvvvvbbbbbbbbbbbbbbbbbbbbbbbbb + // └[2]┘└─────────────[1]──────────────┘ + self.output.extend_from_slice(&self.buf.to_le_bytes()); // [1] + self.buf_bits -= Buf::BITS as u8; + if bits - self.buf_bits == Buf::BITS as u8 { + self.buf = 0; + } else { + self.buf = value >> (bits - self.buf_bits); // [2] + } + } + } + + /// Write the remaining bufferized bits to the output. + #[inline] + pub fn finish(self) { + self.output.extend_from_slice( + &self.buf.to_le_bytes()[..(self.buf_bits as usize).div_ceil(u8::BITS as usize)], + ); + } +} + +/// Reads bits from `u8` slice. +/// It's like [`std::io::Read`], but for bits rather than bytes. +pub struct BitReader<'a> { + input: &'a [u8], + buf: Buf, + buf_bits: u8, + mask: Buf, + bits: u8, +} + +impl<'a> BitReader<'a> { + #[inline] + pub fn new(input: &'a [u8]) -> Self { + Self { + input, + buf: 0, + buf_bits: 0, + mask: 0, + bits: 0, + } + } + + /// Configure the reader to read `bits` bits at a time. This affects + /// subsequent calls to [`read()`]. + /// + /// The `bits` must be less than or equal to 32. + /// + /// Note: it's a separate method and not a parameter of [`read()`] to + /// optimize reading a group of values with the same bit size. + /// + /// [`read()`]: Self::read + #[inline] + pub fn set_bits(&mut self, bits: u8) { + #[cfg(test)] + debug_assert!(u32::from(bits) <= Buf::BITS); + + self.bits = bits; + self.mask = make_bitmask(bits); + } + + /// Returns the number of bits set with [`set_bits()`]. + /// + /// [`set_bits()`]: Self::set_bits + #[inline] + pub fn bits(&self) -> u8 { + self.bits + } + + /// Read next `bits` bits from the input. The amount of bits must be set + /// with [`set_bits()`] before calling this method. + /// + /// If read beyond the end of the input, the result would be an unspecified + /// garbage. + /// + /// [`set_bits()`]: Self::set_bits + #[inline] + pub fn read(&mut self) -> T + where + T: 'static + Copy, + Buf: AsPrimitive, + { + if self.buf_bits >= self.bits { + self.buf_bits -= self.bits; + let val = (self.buf & self.mask).as_(); + self.buf >>= self.bits; + val + } else { + // Consider a naive approach: + // + // let new_buf = read_buf_and_advance(&mut self.input); + // self.buf |= new_buf << self.buf_bits; // *overflow* + // self.buf_bits += size_of_val(&new_buf) * u8::BITS; + // ... then proceed as usual ... + // + // For performance reasons, we want `new_buf` and `self.buf` to be + // both 64-bit. But when they are the same, the naive approach would + // overflow in the commented line. So, the following code is a trick + // to let us use the same type for both. + // + // ┌───────────new_buf────────────┐┌─self.buf─┐ + // rrrrrrrrrrrrrrrrrrrrrrrrrvvvvvvvbbbbbbbbbbbb + // └──────────[3]──────────┘├─[2]─┘└───[1]────┤ + // └───────val───────┘ + let new_buf = read_buf_and_advance(&mut self.input); + let val = (( + // [1] + self.buf + ) | ( + // [2] + new_buf << self.buf_bits + ) & self.mask) + .as_(); + self.buf_bits += Buf::BITS as u8 - self.bits; + if self.buf_bits == 0 { + self.buf = 0; + } else { + self.buf = /*[3]*/ new_buf >> (Buf::BITS as u8 - self.buf_bits); + } + val + } + } +} + +/// Read a single [`Buf`] from the `input` and advance (or not) the `input`. +#[inline] +fn read_buf_and_advance(input: &mut &[u8]) -> Buf { + let mut buf = 0; + if input.len() >= size_of::() { + // This line translates to a single unaligned pointer read. + buf = Buf::from_le_bytes(input[0..size_of::()].try_into().unwrap()); + // This line translates to a single pointer advance. + *input = &input[size_of::()..]; + } else { + // We could remove this branch by explicitly using unsafe pointer + // operations in the branch above, but we are playing it safe here. + for (i, byte) in input.iter().copied().enumerate() { + buf |= Buf::from(byte) << (i * u8::BITS as usize); + } + + // The following line is commented out for performance reasons as this + // should be the last read. If the caller will try to read input again + // anyway, it will get the same values again (aka "unspecified garbage" + // as stated in the documentation). + // *input = &[]; // Not needed, see the comment above. + } + buf +} + +/// Minimum amount of bits required to store a value in the range +/// `0..=max_value`. +pub fn packed_bits(max_value: T) -> u8 { + (T::BITS - max_value.leading_zeros()) as u8 +} + +pub fn make_bitmask(bits: u8) -> T { + if u32::from(bits) >= T::BITS { + T::max_value() + } else { + (T::ONE << usize::from(bits)) - T::ONE + } +} + +#[cfg(test)] +mod tests { + use std::fmt::Debug; + use std::iter::zip; + + use num_traits::ConstOne; + use num_traits::ConstZero; + use num_traits::PrimInt; + use num_traits::Unsigned; + use rand::distributions::uniform::SampleUniform; + use rand::rngs::StdRng; + use rand::Rng as _; + use rand::SeedableRng as _; + + use super::*; + + #[test] + fn test_simple() { + let mut packed = Vec::new(); + let mut w = BitWriter::new(&mut packed); + + w.write::(0b01010, 5); + w.write::(0b10110, 5); + w.write::(0b10100, 5); + w.write::(0b010110010, 9); + w.write::(0b101100001, 9); + w.write::(0b001001101, 9); + w.write::(0x12345678, 32); + w.finish(); + assert_eq!(packed.len(), 10); + + let mut r = BitReader::new(&packed); + r.set_bits(5); + assert_eq!(r.read::(), 0b01010); + assert_eq!(r.read::(), 0b10110); + assert_eq!(r.read::(), 0b10100); + r.set_bits(9); + assert_eq!(r.read::(), 0b010110010); + assert_eq!(r.read::(), 0b101100001); + assert_eq!(r.read::(), 0b001001101); + r.set_bits(32); + assert_eq!(r.read::(), 0x12345678); + } + + #[test] + fn test_random() { + test_random_impl::(); + test_random_impl::(); + test_random_impl::(); + test_random_impl::(); + } + + fn test_random_impl() + where + Buf: AsPrimitive, + T: ConstBits + + ConstOne + + ConstZero + + Copy + + Debug + + Into + + PrimInt + + SampleUniform + + Unsigned + + 'static, + { + let mut rng = StdRng::seed_from_u64(42); + + let mut bits_per_value = Vec::new(); + let mut values = Vec::::new(); + let mut packed = Vec::new(); + let mut unpacked = Vec::::new(); + for len in 0..40 { + for _ in 0..100 { + values.clear(); + bits_per_value.clear(); + let mut total_bits = 0; + for _ in 0..len { + let bits = rng.gen_range(0u8..=T::BITS as u8); + values.push(rng.gen_range(T::ZERO..=make_bitmask(bits))); + bits_per_value.push(bits); + total_bits += u64::from(bits); + } + + packed.clear(); + let mut w = BitWriter::new(&mut packed); + for (&x, &bits) in zip(&values, &bits_per_value) { + w.write(x, bits); + } + w.finish(); + + assert_eq!(packed.len(), total_bits.next_multiple_of(8) as usize / 8); + + unpacked.clear(); + let mut r = BitReader::new(&packed); + for &bits in &bits_per_value { + r.set_bits(bits); + unpacked.push(r.read()); + } + + assert_eq!(values, unpacked); + } + } + } + + #[test] + fn test_packed_bits_simple() { + assert_eq!(packed_bits(0_u32), 0); + + assert_eq!(packed_bits(1_u32), 1); + + assert_eq!(packed_bits(2_u32), 2); + assert_eq!(packed_bits(3_u32), 2); + + assert_eq!(packed_bits(4_u32), 3); + assert_eq!(packed_bits(7_u32), 3); + + assert_eq!(packed_bits(0x_7FFF_FFFF_u32), 31); + + assert_eq!(packed_bits(0x_8000_0000_u32), 32); + assert_eq!(packed_bits(0x_FFFF_FFFF_u32), 32); + } + + #[test] + fn test_packed_bits_extensive() { + fn check>(v: u128, expected_bits: u8) { + if let Ok(x) = v.try_into() { + assert_eq!(packed_bits::(x), expected_bits); + } + } + + for expected_bits in 0..=128_u8 { + let (min, max); + if expected_bits == 0 { + (min, max) = (0, 0); + } else { + min = 1_u128 << (expected_bits - 1); + max = (min - 1) * 2 + 1; + } + + check::(min, expected_bits); + check::(min, expected_bits); + check::(min, expected_bits); + check::(min, expected_bits); + check::(min, expected_bits); + check::(min, expected_bits); + + check::(max, expected_bits); + check::(max, expected_bits); + check::(max, expected_bits); + check::(max, expected_bits); + check::(max, expected_bits); + check::(max, expected_bits); + } + } +} diff --git a/src/query/storages/common/index/src/hnsw_index/common/bitpacking_links.rs b/src/query/storages/common/index/src/hnsw_index/common/bitpacking_links.rs new file mode 100644 index 0000000000000..388a0753add57 --- /dev/null +++ b/src/query/storages/common/index/src/hnsw_index/common/bitpacking_links.rs @@ -0,0 +1,192 @@ +// Copyright Qdrant +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use super::bitpacking::packed_bits; +use super::bitpacking::BitReader; +use super::bitpacking::BitWriter; + +/// To simplify value counting, each value should be at least one byte. +/// Otherwise the count could would be ambiguous, e.g., a 2-byte slice of 5-bit +/// values could contain either 2 or 3 values. +pub const MIN_BITS_PER_VALUE: u8 = u8::BITS as u8; + +/// How many bits required to store a value in range +/// `MIN_BITS_PER_VALUE..=u32::BITS`. +const HEADER_BITS: u8 = 5; + +/// A specialized packer to pack HNSW graph links. +/// +/// It assumes that the first `m` (or `m0`) values could be re-ordered for better +/// compression. +/// +/// Parameters: +/// - `bits_per_unsorted` should be enough to store the maximum point ID +/// (it should be the same for all nodes/links within a segment). +/// - `sorted_count` is `m` (or `m0`) for this layer. +pub fn pack_links( + links: &mut Vec, + mut raw_links: Vec, + bits_per_unsorted: u8, + sorted_count: usize, +) { + if raw_links.is_empty() { + return; + } + + // Sort and delta-encode the first `sorted_count` links. + let sorted_count = raw_links.len().min(sorted_count); + raw_links[..sorted_count].sort_unstable(); + for i in (1..sorted_count).rev() { + raw_links[i] -= raw_links[i - 1]; + } + + let mut w = BitWriter::new(links); + + if sorted_count != 0 { + // 1. Header. + let bits_per_sorted = + packed_bits(*raw_links[..sorted_count].iter().max().unwrap()).max(MIN_BITS_PER_VALUE); + w.write(u32::from(bits_per_sorted - MIN_BITS_PER_VALUE), HEADER_BITS); + + // 2. First `sorted_count` values, sorted and delta-encoded. + // The bit width is determined by the header. + for &value in &raw_links[..sorted_count] { + w.write(value, bits_per_sorted); + } + } + + // 3. The rest of the values, unsorted. + for &value in &raw_links[sorted_count..] { + w.write(value, bits_per_unsorted); + } + + w.finish(); +} + +/// Returns an iterator over packed links. +/// See [`pack_links`] for parameter descriptions. +#[inline] +pub fn iterate_packed_links( + links: &[u8], + bits_per_unsorted: u8, + sorted_count: usize, +) -> PackedLinksIterator { + let mut reader = BitReader::new(links); + + let mut remaining_bits = links.len() * u8::BITS as usize; + let mut remaining_bits_target = remaining_bits; + if sorted_count != 0 && !links.is_empty() { + // 1. Header. + reader.set_bits(HEADER_BITS); + let bits_per_sorted = reader.read::() + MIN_BITS_PER_VALUE; + remaining_bits -= HEADER_BITS as usize; + + // Prepare for reading sorted values. + reader.set_bits(bits_per_sorted); + let max_sorted = remaining_bits / bits_per_sorted as usize; + remaining_bits_target -= sorted_count.min(max_sorted) * bits_per_sorted as usize; + } else { + // Prepare for reading unsorted values. + reader.set_bits(bits_per_unsorted); + } + + PackedLinksIterator { + reader, + bits_per_unsorted, + remaining_bits, + remaining_bits_target, + current_delta: 0, + } +} + +/// Iterator over links packed with [`pack_links`]. +/// Created by [`iterate_packed_links`]. +pub struct PackedLinksIterator<'a> { + reader: BitReader<'a>, + bits_per_unsorted: u8, + remaining_bits: usize, + remaining_bits_target: usize, + current_delta: u32, +} + +impl PackedLinksIterator<'_> { + #[inline] + fn next_sorted(&mut self) -> u32 { + self.current_delta = self.current_delta.wrapping_add(self.reader.read::()); + self.remaining_bits -= self.reader.bits() as usize; + self.current_delta + } + + #[inline] + fn next_unsorted(&mut self) -> Option { + if let Some(rb) = self.remaining_bits.checked_sub(self.reader.bits() as usize) { + self.remaining_bits = rb; + Some(self.reader.read::()) + } else { + None + } + } +} + +impl Iterator for PackedLinksIterator<'_> { + type Item = u32; + + #[inline] + fn next(&mut self) -> Option { + if self.remaining_bits > self.remaining_bits_target { + let value = self.next_sorted(); + if self.remaining_bits <= self.remaining_bits_target { + // It was the last sorted value. + self.reader.set_bits(self.bits_per_unsorted); + } + return Some(value); + } + + self.next_unsorted() + } + + /// Optimized [`Iterator::fold()`]. Should be faster than calling + /// [`Iterator::next()`] in a loop. + /// + /// It is used in a hot loop during HNSW search, so performance is critical. + #[inline] + fn fold Acc>(mut self, mut acc: Acc, mut f: F) -> Acc { + while self.remaining_bits > self.remaining_bits_target { + acc = f(acc, self.next_sorted()); + } + + self.reader.set_bits(self.bits_per_unsorted); + while let Some(value) = self.next_unsorted() { + acc = f(acc, value); + } + + acc + } + + fn size_hint(&self) -> (usize, Option) { + let (sorted, unsorted); + if let Some(sorted_bits) = self.remaining_bits.checked_sub(self.remaining_bits_target) { + let sorted_bits = sorted_bits.next_multiple_of(self.reader.bits() as usize); + sorted = sorted_bits / self.reader.bits() as usize; + unsorted = (self.remaining_bits - sorted_bits) / self.bits_per_unsorted as usize; + } else { + sorted = 0; + unsorted = self.remaining_bits / self.reader.bits() as usize; + } + (sorted + unsorted, Some(sorted + unsorted)) + } +} + +impl ExactSizeIterator for PackedLinksIterator<'_> {} diff --git a/src/query/storages/common/index/src/hnsw_index/common/bitpacking_ordered.rs b/src/query/storages/common/index/src/hnsw_index/common/bitpacking_ordered.rs new file mode 100644 index 0000000000000..667185ebad02a --- /dev/null +++ b/src/query/storages/common/index/src/hnsw_index/common/bitpacking_ordered.rs @@ -0,0 +1,311 @@ +// Copyright Qdrant +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! A compression algorithm to store medium-to-large-sized sorted arrays of +//! `u64` values. +//! +//! Allows for fast random access within the compressed data. +//! +//! Assumptions: +//! - The input values are sorted. +//! - The distribution of the values is somewhat uniform, i.e. there are no +//! large gaps between values. A single gap might bloat the overall size, but +//! it shouldn't be worse than storing byte-aligned bases without deltas. +//! +//! # Format +//! +//! The compressed data consists of small, uniformly-sized chunks. +//! The size of each chunk is determined by compression parameters. +//! The compression parameters are determined automatically during compression. +//! +//! Each chunk contains `1 << chunk_len_log2` values: the first value (the base) +//! is stored as is, and the rest are stored as deltas from the base. Or, more +//! formally: +//! - `chunk_value[0] = base` (assume `delta[0]` is 0) +//! - `chunk_value[i] = base + delta[i]` for `i > 0` +//! +//! ```text +//! ┌───────┬───────┬───────┬ ┬───────┬────────┐ +//! │chunk 0│chunk 1│chunk 2│ … │chunk X│7 × 0xFF│ +//! └───────┤ ├───────┴ ┴───────┴────────┘ +//! ╭───────╯ ╰────────────────╮ +//! │ bitpacked chunk │ +//! ├────┬──┬──┬──┬──┬ ┬────┬──────┤ +//! │base│Δ₁│Δ₂│Δ₃│Δ₄│ … │Δₙ₋₁│bitpad│ +//! └────┴──┴──┴──┴──┴ ┴────┴──────┘ +//! ``` +//! +//! In the above diagram: +//! - `7 × 0xFF` is 8 bytes tail (see [`TAIL_SIZE`]). +//! - `base` is `parameters.base_bits` wide. +//! - `Δ₁`..`Δₙ₋₁` are delta values, each is `parameters.delta_bits` wide. +//! - `bitpad` is a bit padding (0..7 bits) so the chunk is byte-aligned. + +use std::ops::RangeInclusive; + +use thiserror::Error; +use zerocopy::little_endian::U64; +use zerocopy::FromBytes; +use zerocopy::Immutable; +use zerocopy::IntoBytes; +use zerocopy::KnownLayout; + +use super::bitpacking::make_bitmask; +use super::bitpacking::packed_bits; +use super::bitpacking::BitWriter; + +/// The size of the tail padding. +/// These extra 7 bytes after the last chunk allows the decompressor to safely +/// perform unchecked unaligned 8-byte reads. +const TAIL_SIZE: usize = size_of::() - 1; + +/// The allowed range for the `delta_bits` parameter. +/// Limiting it up to 7*8 = 56 bits allows the decompressor to read a single +/// delta value in a single unaligned read. +/// Disallowing 0 removes unlikely edge cases. +const DELTA_BITS_RANGE: RangeInclusive = 1..=(u64::BITS - u8::BITS) as u8; + +/// Larger values are unlikely to produce better compression. +const MAX_CHUNK_LEN_LOG2: u8 = 7; + +/// Compress the provided data using the best parameters found. +/// +/// # Panics +/// +/// This function may panic if the input data is not sorted. +pub fn compress(values: &[u64]) -> (Vec, Parameters) { + let parameters = Parameters::find_best(values); + let compressed = compress_with_parameters(values, parameters); + (compressed, parameters) +} + +/// Compress the data with given parameters. +fn compress_with_parameters(values: &[u64], parameters: Parameters) -> Vec { + let expected_size = parameters.total_chunks_size_bytes().unwrap() + TAIL_SIZE; + let mut compressed = Vec::with_capacity(expected_size); + + for chunk in values.chunks(1 << parameters.chunk_len_log2) { + let first = chunk[0]; + let mut w = BitWriter::new(&mut compressed); + w.write(first, parameters.base_bits); + for &value in chunk.iter().skip(1) { + w.write(value - first, parameters.delta_bits); + } + // For the last (incomplete) chunk, pad it with 0b11...11, so all chunks + // have the same size. + for _ in 0..(1 << parameters.chunk_len_log2) - chunk.len() { + w.write( + make_bitmask::(parameters.delta_bits), + parameters.delta_bits, + ); + } + w.finish(); // bit padding + } + + compressed.extend_from_slice(&[0xFF; TAIL_SIZE]); + assert_eq!(compressed.len(), expected_size); + + compressed +} + +#[derive(Clone, Debug)] +pub struct Reader<'a> { + base_bits: u8, + base_mask: u64, + delta_bits: u8, + delta_mask: u64, + chunk_len_log2: u8, + chunk_len_mask: usize, + chunk_size_bytes: usize, + compressed: &'a [u8], + len: usize, +} + +#[derive(Error, Debug)] +#[error("decompression error: {0}")] +pub struct DecompressionError(String); + +impl<'a> Reader<'a> { + pub fn new( + parameters: Parameters, + bytes: &'a [u8], + ) -> Result<(Self, &'a [u8]), DecompressionError> { + // Safety checks: the `get()` method doesn't perform bounds checking, + // so we need to be extra cautious here, including checking for + // overflows. + if !parameters.valid() { + return Err(DecompressionError("invalid parameters".to_string())); + } + let total_size_bytes = parameters + .total_chunks_size_bytes() + .and_then(|size| size.checked_add(TAIL_SIZE)) + .ok_or_else(|| DecompressionError("invalid parameters".to_string()))?; + + let (compressed, bytes) = bytes.split_at_checked(total_size_bytes).ok_or_else(|| { + DecompressionError(format!( + "insufficient length (compressed data, expected {total_size_bytes} bytes, got {})", + bytes.len(), + )) + })?; + + let result = Self { + base_bits: parameters.base_bits, + base_mask: make_bitmask(parameters.base_bits), + delta_bits: parameters.delta_bits, + delta_mask: make_bitmask(parameters.delta_bits), + chunk_len_log2: parameters.chunk_len_log2, + chunk_len_mask: make_bitmask(parameters.chunk_len_log2), + chunk_size_bytes: parameters.chunk_size_bytes().unwrap(), + compressed, + len: parameters.length.get() as usize, + }; + + // Safety checks: the `get()` method doesn't perform bounds checking. + // The assertions below ensure that the `compressed` slice holds enough + // bytes for any index reachable by `get()`. + if let Some(max_index) = result.len.checked_sub(1) { + let chunk_offset = (max_index >> result.chunk_len_log2) * result.chunk_size_bytes; + // *base* + assert!(chunk_offset + size_of::() <= result.compressed.len()); + + let max_value_index = result.chunk_len_mask; + if max_value_index > 0 { + let delta_offset_bits = + result.base_bits as usize + (max_value_index - 1) * result.delta_bits as usize; + // *delta* + assert!( + chunk_offset + delta_offset_bits / u8::BITS as usize + size_of::() + <= result.compressed.len() + ); + } + } + + Ok((result, bytes)) + } + + /// Parameters used to compress the data. + #[allow(dead_code)] + pub fn parameters(&self) -> Parameters { + Parameters { + length: U64::new(self.len as u64), + base_bits: self.base_bits, + delta_bits: self.delta_bits, + chunk_len_log2: self.chunk_len_log2, + } + } + + /// The number of values in the decompressed data. + #[inline] + #[allow(dead_code)] + pub fn len(&self) -> usize { + self.len + } + + /// Get the value at the given index. + #[inline] + pub fn get(&self, index: usize) -> Option { + if index >= self.len { + return None; + } + + let chunk_offset = (index >> self.chunk_len_log2) * self.chunk_size_bytes; + let value_index = index & self.chunk_len_mask; + let chunk_ptr = self.compressed.as_ptr().wrapping_add(chunk_offset); + // SAFETY: see the *base* comment in `new()`. + let base = unsafe { read_u64_le(chunk_ptr) } & self.base_mask; + if value_index == 0 { + return Some(base); + } + let delta_offset_bits = + self.base_bits as usize + (value_index - 1) * self.delta_bits as usize; + // SAFETY: see the *delta* comment in `new()`. + let delta = (unsafe { read_u64_le(chunk_ptr.add(delta_offset_bits / u8::BITS as usize)) } + >> (delta_offset_bits % u8::BITS as usize)) + & self.delta_mask; + Some(base + delta) + } +} + +#[inline(always)] +unsafe fn read_u64_le(ptr: *const u8) -> u64 { + unsafe { u64::from_le(ptr.cast::().read_unaligned()) } +} + +/// Compression parameters. Required for decompression. +#[derive(Clone, Copy, Debug, FromBytes, Immutable, IntoBytes, KnownLayout)] +#[repr(C)] +pub struct Parameters { + /// Amount of values in the decompressed data. + pub length: U64, + /// Amount of bits to store base values. + pub base_bits: u8, + /// Amount of bits to store delta values. + pub delta_bits: u8, + /// Log2 of the amount of values in a chunk. + pub chunk_len_log2: u8, +} + +impl Parameters { + /// Check if the parameters are valid. + fn valid(self) -> bool { + u32::from(self.base_bits) <= u64::BITS + && DELTA_BITS_RANGE.contains(&self.delta_bits) + && self.chunk_len_log2 <= MAX_CHUNK_LEN_LOG2 + } + + /// Size of a single chunk in bytes. + /// Returns `None` on overflow: see safety comments in [`Reader::new()`]. + #[deny(clippy::arithmetic_side_effects, reason = "extra cautious for safety")] + fn chunk_size_bytes(self) -> Option { + let bits = (self.base_bits as usize).checked_add( + (self.delta_bits as usize).checked_mul(make_bitmask::(self.chunk_len_log2))?, + )?; + Some(bits.div_ceil(u8::BITS as usize)) + } + + /// Size of the compressed data, without the tail. + /// Returns `None` on overflow: see safety comments in [`Reader::new()`]. + #[deny(clippy::arithmetic_side_effects, reason = "extra cautious for safety")] + fn total_chunks_size_bytes(self) -> Option { + let chunks_count = (self.length.get() as usize).div_ceil(1 << self.chunk_len_log2); + chunks_count.checked_mul(self.chunk_size_bytes()?) + } + + /// Find the best compression parameters for the given values. + fn find_best(values: &[u64]) -> Self { + Self::try_all(values) + .min_by_key(|parameters| parameters.total_chunks_size_bytes()) + .unwrap() + } + + /// Generate all possible compression parameters for the given values. + fn try_all(values: &[u64]) -> impl Iterator + use<'_> { + let last_value = values.last().copied().unwrap_or(0); + (0..=MAX_CHUNK_LEN_LOG2) + .map(move |chunk_len_log2| { + let mut delta_bits = *DELTA_BITS_RANGE.start(); + for chunk in values.chunks(1 << chunk_len_log2) { + delta_bits = delta_bits.max(packed_bits(chunk.last().unwrap() - chunk[0])); + } + Parameters { + length: U64::new(values.len() as u64), + base_bits: packed_bits(last_value).max(1), + delta_bits, + chunk_len_log2, + } + }) + .filter(|parameters| DELTA_BITS_RANGE.contains(¶meters.delta_bits)) + } +} diff --git a/src/query/storages/common/index/src/hnsw_index/common/fixed_length_priority_queue.rs b/src/query/storages/common/index/src/hnsw_index/common/fixed_length_priority_queue.rs new file mode 100644 index 0000000000000..27f55cf46c8da --- /dev/null +++ b/src/query/storages/common/index/src/hnsw_index/common/fixed_length_priority_queue.rs @@ -0,0 +1,112 @@ +// Copyright Qdrant +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::cmp::Reverse; +use std::collections::BinaryHeap; +use std::num::NonZeroUsize; +use std::vec::IntoIter as VecIntoIter; + +use bytemuck::TransparentWrapper as _; +use bytemuck::TransparentWrapperAlloc as _; +use serde::Deserialize; +use serde::Serialize; + +/// To avoid excessive memory allocation, FixedLengthPriorityQueue +/// imposes a reasonable limit on the allocation size. If the limit +/// is extremely large, we treat it as if no limit was set and +/// delay allocation, assuming that the results will fit within a +/// predefined threshold. +const LARGEST_REASONABLE_ALLOCATION_SIZE: usize = 1_048_576; + +/// A container that forgets all but the top N elements +/// +/// This is a MinHeap by default - it will keep the largest elements, pop smallest +#[derive(Deserialize, Serialize, Clone, Debug)] +pub struct FixedLengthPriorityQueue { + heap: BinaryHeap>, + length: NonZeroUsize, +} + +impl Default for FixedLengthPriorityQueue { + fn default() -> Self { + Self::new(1) + } +} + +impl FixedLengthPriorityQueue { + /// Creates a new queue with the given length + /// Panics if length is 0 + pub fn new(length: usize) -> Self { + let heap = BinaryHeap::with_capacity( + length + .saturating_add(1) + .min(LARGEST_REASONABLE_ALLOCATION_SIZE), + ); + let length = NonZeroUsize::new(length).expect("length must be greater than zero"); + FixedLengthPriorityQueue:: { heap, length } + } + + /// Pushes a value into the priority queue. + /// + /// If the queue if full, replaces the smallest value and returns it. + pub fn push(&mut self, value: T) -> Option { + if self.heap.len() < self.length.into() { + self.heap.push(Reverse(value)); + return None; + } + + let mut x = self.heap.peek_mut().unwrap(); + let mut value = Reverse(value); + if x.0 < value.0 { + std::mem::swap(&mut *x, &mut value); + } + Some(value.0) + } + + /// Consumes the [`FixedLengthPriorityQueue`] and returns a vector + /// in sorted (descending) order. + pub fn into_sorted_vec(self) -> Vec { + Reverse::peel_vec(self.heap.into_sorted_vec()) + } + + /// Returns an iterator over the elements in the queue, in arbitrary order. + pub fn iter_unsorted(&self) -> std::slice::Iter<'_, T> { + Reverse::peel_slice(self.heap.as_slice()).iter() + } + + /// Returns an iterator over the elements in the queue + /// in sorted (descending) order. + pub fn into_iter_sorted(self) -> VecIntoIter { + self.into_sorted_vec().into_iter() + } + + /// Returns the smallest element of the queue, + /// if there is any. + pub fn top(&self) -> Option<&T> { + self.heap.peek().map(|x| &x.0) + } + + /// Returns actual length of the queue + #[allow(dead_code)] + pub fn len(&self) -> usize { + self.heap.len() + } + + /// Checks if the queue is empty + #[allow(dead_code)] + pub fn is_empty(&self) -> bool { + self.heap.is_empty() + } +} diff --git a/src/query/storages/common/index/src/hnsw_index/common/mod.rs b/src/query/storages/common/index/src/hnsw_index/common/mod.rs new file mode 100644 index 0000000000000..6ec58d62c8199 --- /dev/null +++ b/src/query/storages/common/index/src/hnsw_index/common/mod.rs @@ -0,0 +1,22 @@ +// Copyright Qdrant +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +pub mod bitpacking; +pub mod bitpacking_links; +pub mod bitpacking_ordered; +pub mod fixed_length_priority_queue; +pub mod types; +pub mod utils; +pub mod zeros; diff --git a/src/query/storages/common/index/src/hnsw_index/common/types.rs b/src/query/storages/common/index/src/hnsw_index/common/types.rs new file mode 100644 index 0000000000000..65e05aa4e28fd --- /dev/null +++ b/src/query/storages/common/index/src/hnsw_index/common/types.rs @@ -0,0 +1,48 @@ +// Copyright Qdrant +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::cmp::Ordering; + +use ordered_float::OrderedFloat; +use zerocopy::FromBytes; +use zerocopy::Immutable; +use zerocopy::IntoBytes; +use zerocopy::KnownLayout; + +/// Type of vector matching score +pub type ScoreType = f32; +/// Type of point index inside a segment +pub type PointOffsetType = u32; + +#[derive(Copy, Clone, PartialEq, Debug, Default, FromBytes, IntoBytes, KnownLayout, Immutable)] +#[repr(C)] +pub struct ScoredPointOffset { + pub idx: PointOffsetType, + pub score: ScoreType, +} + +impl Eq for ScoredPointOffset {} + +impl Ord for ScoredPointOffset { + fn cmp(&self, other: &Self) -> Ordering { + OrderedFloat(self.score).cmp(&OrderedFloat(other.score)) + } +} + +impl PartialOrd for ScoredPointOffset { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} diff --git a/src/query/storages/common/index/src/hnsw_index/common/utils.rs b/src/query/storages/common/index/src/hnsw_index/common/utils.rs new file mode 100644 index 0000000000000..db1f75e1cdb48 --- /dev/null +++ b/src/query/storages/common/index/src/hnsw_index/common/utils.rs @@ -0,0 +1,31 @@ +// Copyright Qdrant +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::atomic::AtomicBool; +use std::sync::atomic::Ordering; + +use databend_common_exception::ErrorCode; +use databend_common_exception::Result; + +pub fn rev_range(a: usize, b: usize) -> impl Iterator { + (b + 1..=a).rev() +} + +pub fn check_process_stopped(stopped: &AtomicBool) -> Result<()> { + if stopped.load(Ordering::Relaxed) { + return Err(ErrorCode::Internal("check process stopped error")); + } + Ok(()) +} diff --git a/src/query/storages/common/index/src/hnsw_index/common/zeros.rs b/src/query/storages/common/index/src/hnsw_index/common/zeros.rs new file mode 100644 index 0000000000000..040830e06dbf9 --- /dev/null +++ b/src/query/storages/common/index/src/hnsw_index/common/zeros.rs @@ -0,0 +1,33 @@ +// Copyright Qdrant +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::io::Result; +use std::io::Write; + +static ZEROS: [u8; 8096] = [0u8; 8096]; + +pub trait WriteZerosExt { + /// Write `len` zeros to the writer. + fn write_zeros(&mut self, len: usize) -> Result<()>; +} + +impl WriteZerosExt for W { + fn write_zeros(&mut self, mut len: usize) -> Result<()> { + while len > 0 { + len -= self.write(&ZEROS[..ZEROS.len().min(len)])?; + } + Ok(()) + } +} diff --git a/src/query/storages/common/index/src/hnsw_index/entry_points.rs b/src/query/storages/common/index/src/hnsw_index/entry_points.rs new file mode 100644 index 0000000000000..cb9970583f867 --- /dev/null +++ b/src/query/storages/common/index/src/hnsw_index/entry_points.rs @@ -0,0 +1,162 @@ +// Copyright Qdrant +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::cmp::Ordering; + +use serde::Deserialize; +use serde::Serialize; + +use crate::hnsw_index::common::fixed_length_priority_queue::FixedLengthPriorityQueue; +use crate::hnsw_index::common::types::PointOffsetType; + +#[derive(Deserialize, Serialize, Clone, Debug, PartialEq)] +pub struct EntryPoint { + pub point_id: PointOffsetType, + pub level: usize, +} + +impl Eq for EntryPoint {} + +impl PartialOrd for EntryPoint { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for EntryPoint { + fn cmp(&self, other: &Self) -> Ordering { + self.level.cmp(&other.level) + } +} + +#[derive(Deserialize, Serialize, Clone, Debug)] +pub struct EntryPoints { + entry_points: Vec, + extra_entry_points: FixedLengthPriorityQueue, +} + +impl EntryPoints { + pub fn new(extra_entry_points: usize) -> Self { + EntryPoints { + entry_points: vec![], + extra_entry_points: FixedLengthPriorityQueue::new(extra_entry_points), + } + } + + #[allow(dead_code)] + pub fn merge_from_other(&mut self, mut other: EntryPoints) { + self.entry_points.append(&mut other.entry_points); + // Do not merge `extra_entry_points` to prevent duplications + } + + pub fn new_point( + &mut self, + new_point: PointOffsetType, + level: usize, + checker: F, + ) -> Option + where + F: Fn(PointOffsetType) -> bool, + { + // there are 3 cases: + // - There is proper entry point for a new point higher or same level - return the point + // - The new point is higher than any alternative - return the next best thing + // - There is no point and alternatives - return None + + for i in 0..self.entry_points.len() { + let candidate = &self.entry_points[i]; + + if !checker(candidate.point_id) { + continue; // Checkpoint does not fulfil filtering conditions. Hence, does not "exists" + } + // Found checkpoint candidate + return if candidate.level >= level { + // The good checkpoint exists. + // Return it, and also try to save given if required + self.extra_entry_points.push(EntryPoint { + point_id: new_point, + level, + }); + Some(candidate.clone()) + } else { + // The current point is better than existing + let entry = self.entry_points[i].clone(); + self.entry_points[i] = EntryPoint { + point_id: new_point, + level, + }; + self.extra_entry_points.push(entry.clone()); + Some(entry) + }; + } + // No entry points found. Create a new one and return self + let new_entry = EntryPoint { + point_id: new_point, + level, + }; + self.entry_points.push(new_entry); + None + } + + /// Find the highest `EntryPoint` which satisfies filtering condition of `checker` + pub fn get_entry_point(&self, checker: F) -> Option + where F: Fn(PointOffsetType) -> bool { + self.entry_points + .iter() + .find(|entry| checker(entry.point_id)) + .cloned() + .or_else(|| { + // Searching for at least some entry point + self.extra_entry_points + .iter_unsorted() + .filter(|entry| checker(entry.point_id)) + .cloned() + .max_by_key(|ep| ep.level) + }) + } +} + +#[cfg(test)] +mod tests { + use rand::thread_rng; + use rand::Rng; + + use super::*; + + #[test] + fn test_entry_points() { + let mut points = EntryPoints::new(10); + + let mut rng = thread_rng(); + + for i in 0..1000 { + let level = rng.gen_range(0..10000); + points.new_point(i, level, |_x| true); + } + + assert_eq!(points.entry_points.len(), 1); + assert_eq!(points.extra_entry_points.len(), 10); + + assert!(points.entry_points[0].level > 1); + + for i in 1000..2000 { + let level = rng.gen_range(0..10000); + points.new_point(i, level, |x| x % 5 == i % 5); + } + + assert_eq!(points.entry_points.len(), 5); + assert_eq!(points.extra_entry_points.len(), 10); + } +} diff --git a/src/query/storages/common/index/src/hnsw_index/graph_layers.rs b/src/query/storages/common/index/src/hnsw_index/graph_layers.rs new file mode 100644 index 0000000000000..bc5e31f53a224 --- /dev/null +++ b/src/query/storages/common/index/src/hnsw_index/graph_layers.rs @@ -0,0 +1,291 @@ +// Copyright Qdrant +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::borrow::Cow; +use std::cmp::max; +use std::sync::atomic::AtomicBool; + +use databend_common_exception::ErrorCode; +use databend_common_exception::Result; +use itertools::Itertools; +use serde::Deserialize; +use serde::Serialize; + +use super::entry_points::EntryPoint; +use super::graph_links::GraphLinks; +use super::graph_links::GraphLinksFormat; +use crate::hnsw_index::common::fixed_length_priority_queue::FixedLengthPriorityQueue; +use crate::hnsw_index::common::types::PointOffsetType; +use crate::hnsw_index::common::types::ScoredPointOffset; +use crate::hnsw_index::common::utils::check_process_stopped; +use crate::hnsw_index::common::utils::rev_range; +use crate::hnsw_index::entry_points::EntryPoints; +use crate::hnsw_index::graph_links::GraphLinksSerializer; +use crate::hnsw_index::point_scorer::FilteredScorer; +use crate::hnsw_index::search_context::SearchContext; +use crate::hnsw_index::visited_pool::VisitedListHandle; +use crate::hnsw_index::visited_pool::VisitedPool; + +pub type LinkContainer = Vec; +#[allow(dead_code)] +pub type LayersContainer = Vec; + +/// Contents of the `graph.bin` file. +#[derive(Deserialize, Serialize, Debug)] +pub(super) struct GraphLayerData<'a> { + pub(super) m: usize, + pub(super) m0: usize, + pub(super) ef_construct: usize, + pub(super) entry_points: Cow<'a, EntryPoints>, +} + +#[derive(Debug)] +pub struct GraphLayers { + pub(super) m: usize, + pub(super) m0: usize, + pub(super) links: GraphLinks, + pub(super) entry_points: EntryPoints, + pub(super) visited_pool: VisitedPool, +} + +pub trait GraphLayersBase { + fn get_visited_list_from_pool(&self) -> VisitedListHandle; + + fn links_map(&self, point_id: PointOffsetType, level: usize, f: F) + where F: FnMut(PointOffsetType); + + /// Get M based on current level + fn get_m(&self, level: usize) -> usize; + + /// Greedy search for closest points within a single graph layer + fn _search_on_level( + &self, + searcher: &mut SearchContext, + level: usize, + visited_list: &mut VisitedListHandle, + points_scorer: &mut FilteredScorer, + is_stopped: &AtomicBool, + ) -> Result<()> { + let limit = self.get_m(level); + let mut points_ids: Vec = Vec::with_capacity(2 * limit); + + while let Some(candidate) = searcher.candidates.pop() { + check_process_stopped(is_stopped)?; + + if candidate.score < searcher.lower_bound() { + break; + } + + points_ids.clear(); + self.links_map(candidate.idx, level, |link| { + if !visited_list.check(link) { + points_ids.push(link); + } + }); + + let scores = points_scorer.score_points(&mut points_ids, limit); + scores.iter().copied().for_each(|score_point| { + searcher.process_candidate(score_point); + visited_list.check_and_update_visited(score_point.idx); + }); + } + + Ok(()) + } + + fn search_on_level( + &self, + level_entry: ScoredPointOffset, + level: usize, + ef: usize, + points_scorer: &mut FilteredScorer, + is_stopped: &AtomicBool, + ) -> Result> { + let mut visited_list = self.get_visited_list_from_pool(); + visited_list.check_and_update_visited(level_entry.idx); + let mut search_context = SearchContext::new(level_entry, ef); + + self._search_on_level( + &mut search_context, + level, + &mut visited_list, + points_scorer, + is_stopped, + )?; + Ok(search_context.nearest) + } + + /// Greedy searches for entry point of level `target_level`. + /// Beam size is 1. + fn search_entry( + &self, + entry_point: PointOffsetType, + top_level: usize, + target_level: usize, + points_scorer: &mut FilteredScorer, + is_stopped: &AtomicBool, + ) -> Result { + let mut links: Vec = Vec::with_capacity(2 * self.get_m(0)); + + let mut current_point = ScoredPointOffset { + idx: entry_point, + score: points_scorer.score_point(entry_point), + }; + for level in rev_range(top_level, target_level) { + check_process_stopped(is_stopped)?; + + let limit = self.get_m(level); + + let mut changed = true; + while changed { + changed = false; + + links.clear(); + self.links_map(current_point.idx, level, |link| { + links.push(link); + }); + + let scores = points_scorer.score_points(&mut links, limit); + scores.iter().copied().for_each(|score_point| { + if score_point.score > current_point.score { + changed = true; + current_point = score_point; + } + }); + } + } + Ok(current_point) + } +} + +impl GraphLayersBase for GraphLayers { + fn get_visited_list_from_pool(&self) -> VisitedListHandle { + self.visited_pool.get(self.links.num_points()) + } + + fn links_map(&self, point_id: PointOffsetType, level: usize, f: F) + where F: FnMut(PointOffsetType) { + self.links.links(point_id, level).for_each(f); + } + + fn get_m(&self, level: usize) -> usize { + if level == 0 { + self.m0 + } else { + self.m + } + } +} + +/// Object contains links between nodes for HNSW search +/// +/// Assume all scores are similarities. Larger score = closer points +impl GraphLayers { + /// Returns the highest level this point is included in + pub fn point_level(&self, point_id: PointOffsetType) -> usize { + self.links.point_level(point_id) + } + + fn get_entry_point( + &self, + points_scorer: &FilteredScorer, + custom_entry_points: Option<&[PointOffsetType]>, + ) -> Option { + // Try to get it from custom entry points + custom_entry_points + .and_then(|custom_entry_points| { + custom_entry_points + .iter() + .filter(|&&point_id| points_scorer.check_vector(point_id)) + .map(|&point_id| { + let level = self.point_level(point_id); + EntryPoint { point_id, level } + }) + .max_by_key(|ep| ep.level) + }) + .or_else(|| { + // Otherwise use normal entry points + self.entry_points + .get_entry_point(|point_id| points_scorer.check_vector(point_id)) + }) + } + + pub fn search( + &self, + top: usize, + ef: usize, + mut points_scorer: FilteredScorer, + custom_entry_points: Option<&[PointOffsetType]>, + is_stopped: &AtomicBool, + ) -> Result> { + let Some(entry_point) = self.get_entry_point(&points_scorer, custom_entry_points) else { + return Ok(Vec::default()); + }; + + let zero_level_entry = self.search_entry( + entry_point.point_id, + entry_point.level, + 0, + &mut points_scorer, + is_stopped, + )?; + let nearest = self.search_on_level( + zero_level_entry, + 0, + max(top, ef), + &mut points_scorer, + is_stopped, + )?; + Ok(nearest.into_iter_sorted().take(top).collect_vec()) + } + + #[allow(dead_code)] + pub fn num_points(&self) -> usize { + self.links.num_points() + } +} + +impl GraphLayers { + pub fn open(links_slice: &[u8], data_slice: &[u8]) -> Result { + let (graph_data, _): (GraphLayerData, _) = + bincode::serde::decode_from_slice(data_slice, bincode::config::standard()).map_err( + |e| ErrorCode::StorageOther(format!("failed to decode graph layer data {:?}", e)), + )?; + + let graph_links = GraphLinks::load(links_slice)?; + Ok(Self { + m: graph_data.m, + m0: graph_data.m0, + links: graph_links, + entry_points: graph_data.entry_points.into_owned(), + visited_pool: VisitedPool::new(), + }) + } + + #[allow(dead_code)] + pub fn compress_ram(&mut self) { + assert_eq!(self.links.format(), GraphLinksFormat::Plain); + let dummy = GraphLinksSerializer::new(Vec::new(), GraphLinksFormat::Plain, 0, 0) + .to_graph_links_ram(); + let links = std::mem::replace(&mut self.links, dummy); + self.links = GraphLinksSerializer::new( + links.into_edges(), + GraphLinksFormat::Compressed, + self.m, + self.m0, + ) + .to_graph_links_ram(); + } +} diff --git a/src/query/storages/common/index/src/hnsw_index/graph_layers_builder.rs b/src/query/storages/common/index/src/hnsw_index/graph_layers_builder.rs new file mode 100644 index 0000000000000..c226dcc146746 --- /dev/null +++ b/src/query/storages/common/index/src/hnsw_index/graph_layers_builder.rs @@ -0,0 +1,571 @@ +// Copyright Qdrant +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::borrow::Cow; +use std::cmp::max; +use std::cmp::min; +use std::collections::BinaryHeap; +use std::sync::atomic::AtomicBool; +use std::sync::atomic::AtomicUsize; + +use bitvec::prelude::BitVec; +use databend_common_exception::ErrorCode; +use databend_common_exception::Result; +use parking_lot::Mutex; +use parking_lot::MutexGuard; +use parking_lot::RwLock; +use rand::distributions::Uniform; +use rand::Rng; + +use super::graph_layers::GraphLayerData; +use super::graph_links::GraphLinksFormat; +use crate::hnsw_index::common::fixed_length_priority_queue::FixedLengthPriorityQueue; +use crate::hnsw_index::common::types::PointOffsetType; +use crate::hnsw_index::common::types::ScoreType; +use crate::hnsw_index::common::types::ScoredPointOffset; +use crate::hnsw_index::entry_points::EntryPoints; +use crate::hnsw_index::graph_layers::GraphLayers; +use crate::hnsw_index::graph_layers::GraphLayersBase; +use crate::hnsw_index::graph_layers::LinkContainer; +use crate::hnsw_index::graph_links::GraphLinksSerializer; +use crate::hnsw_index::point_scorer::FilteredScorer; +use crate::hnsw_index::search_context::SearchContext; +use crate::hnsw_index::visited_pool::VisitedListHandle; +use crate::hnsw_index::visited_pool::VisitedPool; + +pub type LockedLinkContainer = RwLock; +pub type LockedLayersContainer = Vec; + +/// Same as `GraphLayers`, but allows to build in parallel +/// Convertible to `GraphLayers` +pub struct GraphLayersBuilder { + max_level: AtomicUsize, + m: usize, + m0: usize, + ef_construct: usize, + // Factor of level probability + level_factor: f64, + // Exclude points according to "not closer than base" heuristic? + use_heuristic: bool, + links_layers: Vec, + entry_points: Mutex, + + // Fields used on construction phase only + visited_pool: VisitedPool, + + // List of bool flags, which defines if the point is already indexed or not + ready_list: RwLock, +} + +impl GraphLayersBase for GraphLayersBuilder { + fn get_visited_list_from_pool(&self) -> VisitedListHandle { + self.visited_pool.get(self.num_points()) + } + + fn links_map(&self, point_id: PointOffsetType, level: usize, mut f: F) + where F: FnMut(PointOffsetType) { + let links = self.links_layers[point_id as usize][level].read(); + let ready_list = self.ready_list.read(); + for link in links.iter() { + if ready_list[*link as usize] { + f(*link); + } + } + } + + fn get_m(&self, level: usize) -> usize { + if level == 0 { + self.m0 + } else { + self.m + } + } +} + +impl GraphLayersBuilder { + #[allow(dead_code)] + pub fn get_entry_points(&self) -> MutexGuard { + self.entry_points.lock() + } + + pub fn into_graph_data(self, format: GraphLinksFormat) -> Result<(Vec, Vec)> { + let serializer = + Self::links_layers_to_serializer(self.links_layers, format, self.m, self.m0); + let mut links_buf = Vec::new(); + serializer.serialize_to_writer(&mut links_buf)?; + + let entry_points = self.entry_points.into_inner(); + let data = GraphLayerData { + m: self.m, + m0: self.m0, + ef_construct: self.ef_construct, + entry_points: Cow::Borrowed(&entry_points), + }; + + let data_buf = + bincode::serde::encode_to_vec(data, bincode::config::standard()).map_err(|e| { + ErrorCode::StorageOther(format!("failed to encode graph layer data {:?}", e)) + })?; + + Ok((links_buf, data_buf)) + } + + #[allow(dead_code)] + pub fn into_graph_layers_ram(self, format: GraphLinksFormat) -> GraphLayers { + GraphLayers { + m: self.m, + m0: self.m0, + links: Self::links_layers_to_serializer(self.links_layers, format, self.m, self.m0) + .to_graph_links_ram(), + entry_points: self.entry_points.into_inner(), + visited_pool: self.visited_pool, + } + } + + fn links_layers_to_serializer( + link_layers: Vec, + format: GraphLinksFormat, + m: usize, + m0: usize, + ) -> GraphLinksSerializer { + let edges = link_layers + .into_iter() + .map(|l| l.into_iter().map(|l| l.into_inner()).collect()) + .collect(); + GraphLinksSerializer::new(edges, format, m, m0) + } + + pub fn new_with_params( + num_vectors: usize, // Initial number of points in index + m: usize, // Expected M for non-first layer + m0: usize, // Expected M for first layer + ef_construct: usize, + entry_points_num: usize, // Depends on number of points + use_heuristic: bool, + reserve: bool, + ) -> Self { + let links_layers = std::iter::repeat_with(|| { + vec![RwLock::new(if reserve { + Vec::with_capacity(m0) + } else { + vec![] + })] + }) + .take(num_vectors) + .collect(); + + let ready_list = RwLock::new(BitVec::repeat(false, num_vectors)); + + Self { + max_level: AtomicUsize::new(0), + m, + m0, + ef_construct, + level_factor: 1.0 / (max(m, 2) as f64).ln(), + use_heuristic, + links_layers, + entry_points: Mutex::new(EntryPoints::new(entry_points_num)), + visited_pool: VisitedPool::new(), + ready_list, + } + } + + pub fn new( + num_vectors: usize, // Initial number of points in index + m: usize, // Expected M for non-first layer + m0: usize, // Expected M for first layer + ef_construct: usize, + entry_points_num: usize, // Depends on number of points + use_heuristic: bool, + ) -> Self { + Self::new_with_params( + num_vectors, + m, + m0, + ef_construct, + entry_points_num, + use_heuristic, + true, + ) + } + + #[allow(dead_code)] + pub fn merge_from_other(&mut self, other: GraphLayersBuilder) { + self.max_level = AtomicUsize::new(max( + self.max_level.load(std::sync::atomic::Ordering::Relaxed), + other.max_level.load(std::sync::atomic::Ordering::Relaxed), + )); + let mut visited_list = self.visited_pool.get(self.num_points()); + if other.links_layers.len() > self.links_layers.len() { + self.links_layers + .resize_with(other.links_layers.len(), Vec::new); + } + for (point_id, layers) in other.links_layers.into_iter().enumerate() { + let current_layers = &mut self.links_layers[point_id]; + for (level, other_links) in layers.into_iter().enumerate() { + if current_layers.len() <= level { + current_layers.push(other_links); + } else { + let other_links = other_links.into_inner(); + visited_list.next_iteration(); + let mut current_links = current_layers[level].write(); + current_links.iter().copied().for_each(|x| { + visited_list.check_and_update_visited(x); + }); + for other_link in other_links + .into_iter() + .filter(|x| !visited_list.check_and_update_visited(*x)) + { + current_links.push(other_link); + } + } + } + } + self.entry_points + .lock() + .merge_from_other(other.entry_points.into_inner()); + } + + fn num_points(&self) -> usize { + self.links_layers.len() + } + + /// Generate random level for a new point, according to geometric distribution + pub fn get_random_layer(&self, rng: &mut R) -> usize + where R: Rng + ?Sized { + // let distribution = Uniform::new(0.0, 1.0).unwrap(); + let distribution = Uniform::new(0.0, 1.0); + let sample: f64 = rng.sample(distribution); + let picked_level = -sample.ln() * self.level_factor; + picked_level.round() as usize + } + + pub(crate) fn get_point_level(&self, point_id: PointOffsetType) -> usize { + self.links_layers[point_id as usize].len() - 1 + } + + pub fn set_levels(&mut self, point_id: PointOffsetType, level: usize) { + if self.links_layers.len() <= point_id as usize { + while self.links_layers.len() <= point_id as usize { + self.links_layers.push(vec![]); + } + } + let point_layers = &mut self.links_layers[point_id as usize]; + while point_layers.len() <= level { + let links = Vec::with_capacity(self.m); + point_layers.push(RwLock::new(links)); + } + self.max_level + .fetch_max(level, std::sync::atomic::Ordering::Relaxed); + } + + /// Connect new point to links, so that links contains only closest points + fn connect_new_point( + links: &mut LinkContainer, + new_point_id: PointOffsetType, + target_point_id: PointOffsetType, + level_m: usize, + mut score_internal: F, + ) where + F: FnMut(PointOffsetType, PointOffsetType) -> ScoreType, + { + // ToDo: binary search here ? (most likely does not worth it) + let new_to_target = score_internal(target_point_id, new_point_id); + + let mut id_to_insert = links.len(); + for (i, &item) in links.iter().enumerate() { + let target_to_link = score_internal(target_point_id, item); + if target_to_link < new_to_target { + id_to_insert = i; + break; + } + } + + if links.len() < level_m { + links.insert(id_to_insert, new_point_id); + } else if id_to_insert != links.len() { + links.pop(); + links.insert(id_to_insert, new_point_id); + } + } + + /// + fn select_candidate_with_heuristic_from_sorted( + candidates: impl Iterator, + m: usize, + mut score_internal: F, + ) -> Vec + where + F: FnMut(PointOffsetType, PointOffsetType) -> ScoreType, + { + let mut result_list = Vec::with_capacity(m); + for current_closest in candidates { + if result_list.len() >= m { + break; + } + let mut is_good = true; + for &selected_point in &result_list { + let dist_to_already_selected = score_internal(current_closest.idx, selected_point); + if dist_to_already_selected > current_closest.score { + is_good = false; + break; + } + } + if is_good { + result_list.push(current_closest.idx); + } + } + + result_list + } + + /// + pub(crate) fn select_candidates_with_heuristic( + candidates: FixedLengthPriorityQueue, + m: usize, + score_internal: F, + ) -> Vec + where + F: FnMut(PointOffsetType, PointOffsetType) -> ScoreType, + { + let closest_iter = candidates.into_iter_sorted(); + Self::select_candidate_with_heuristic_from_sorted(closest_iter, m, score_internal) + } + + pub fn link_new_point(&self, point_id: PointOffsetType, mut points_scorer: FilteredScorer) { + // Check if there is an suitable entry point + // - entry point level if higher or equal + // - it satisfies filters + + let level = self.get_point_level(point_id); + + let entry_point_opt = self + .entry_points + .lock() + .get_entry_point(|point_id| points_scorer.check_vector(point_id)); + if let Some(entry_point) = entry_point_opt { + let mut level_entry = if entry_point.level > level { + // The entry point is higher than a new point + // Let's find closest one on same level + + // greedy search for a single closest point + self.search_entry( + entry_point.point_id, + entry_point.level, + level, + &mut points_scorer, + &AtomicBool::new(false), + ) + .unwrap() + } else { + ScoredPointOffset { + idx: entry_point.point_id, + score: points_scorer.score_internal(point_id, entry_point.point_id), + } + }; + // minimal common level for entry points + let linking_level = min(level, entry_point.level); + + for curr_level in (0..=linking_level).rev() { + level_entry = self.link_new_point_on_level( + point_id, + curr_level, + &mut points_scorer, + level_entry, + ); + } + } else { + // New point is a new empty entry (for this filter, at least) + // We can't do much here, so just quit + } + let was_ready = self.ready_list.write().replace(point_id as usize, true); + debug_assert!(!was_ready, "Point {point_id} was already marked as ready"); + self.entry_points + .lock() + .new_point(point_id, level, |point_id| { + points_scorer.check_vector(point_id) + }); + } + + /// Add a new point using pre-existing links. + /// Mutually exclusive with [`Self::link_new_point`]. + #[allow(dead_code)] + pub fn add_new_point(&self, point_id: PointOffsetType, levels: Vec>) { + let level = self.get_point_level(point_id); + debug_assert_eq!(levels.len(), level + 1); + + for (level, neighbours) in levels.iter().enumerate() { + let mut links = self.links_layers[point_id as usize][level].write(); + links.clear(); + links.extend_from_slice(neighbours); + } + + let was_ready = self.ready_list.write().replace(point_id as usize, true); + debug_assert!(!was_ready); + self.entry_points + .lock() + .new_point(point_id, level, |_| true); + } + + /// Link a new point on a specific level. + /// Returns an entry point for the level below. + fn link_new_point_on_level( + &self, + point_id: PointOffsetType, + curr_level: usize, + points_scorer: &mut FilteredScorer, + mut level_entry: ScoredPointOffset, + ) -> ScoredPointOffset { + let mut visited_list = self.get_visited_list_from_pool(); + + visited_list.check_and_update_visited(level_entry.idx); + + let mut search_context = SearchContext::new(level_entry, self.ef_construct); + + self._search_on_level( + &mut search_context, + curr_level, + &mut visited_list, + points_scorer, + &AtomicBool::new(false), + ) + .unwrap(); + + if let Some(the_nearest) = search_context.nearest.iter_unsorted().max() { + level_entry = *the_nearest; + } + + if self.use_heuristic { + self.link_with_heuristic( + point_id, + curr_level, + &visited_list, + points_scorer, + search_context, + ); + } else { + self.link_without_heuristic(point_id, curr_level, points_scorer, search_context); + } + + level_entry + } + + fn link_with_heuristic( + &self, + point_id: PointOffsetType, + curr_level: usize, + visited_list: &VisitedListHandle, + points_scorer: &FilteredScorer, + mut search_context: SearchContext, + ) { + let level_m = self.get_m(curr_level); + let scorer = |a, b| points_scorer.score_internal(a, b); + + let selected_nearest = { + let mut existing_links = self.links_layers[point_id as usize][curr_level].write(); + { + let ready_list = self.ready_list.read(); + for &existing_link in existing_links.iter() { + if !visited_list.check(existing_link) && ready_list[existing_link as usize] { + search_context.process_candidate(ScoredPointOffset { + idx: existing_link, + score: points_scorer.score_point(existing_link), + }); + } + } + } + + let selected_nearest = + Self::select_candidates_with_heuristic(search_context.nearest, level_m, scorer); + existing_links.clone_from(&selected_nearest); + selected_nearest + }; + + for &other_point in &selected_nearest { + let mut other_point_links = self.links_layers[other_point as usize][curr_level].write(); + if other_point_links.len() < level_m { + // If linked point is lack of neighbours + other_point_links.push(point_id); + } else { + let mut candidates = BinaryHeap::with_capacity(level_m + 1); + candidates.push(ScoredPointOffset { + idx: point_id, + score: points_scorer.score_internal(point_id, other_point), + }); + for other_point_link in other_point_links.iter().take(level_m).copied() { + candidates.push(ScoredPointOffset { + idx: other_point_link, + score: points_scorer.score_internal(other_point_link, other_point), + }); + } + let selected_candidates = Self::select_candidate_with_heuristic_from_sorted( + candidates.into_sorted_vec().into_iter().rev(), + level_m, + scorer, + ); + other_point_links.clear(); // this do not free memory, which is good + for selected in selected_candidates.iter().copied() { + other_point_links.push(selected); + } + } + } + } + + fn link_without_heuristic( + &self, + point_id: PointOffsetType, + curr_level: usize, + points_scorer: &FilteredScorer, + search_context: SearchContext, + ) { + let level_m = self.get_m(curr_level); + let scorer = |a, b| points_scorer.score_internal(a, b); + for nearest_point in search_context.nearest.iter_unsorted() { + { + let mut links = self.links_layers[point_id as usize][curr_level].write(); + Self::connect_new_point(&mut links, nearest_point.idx, point_id, level_m, scorer); + } + + { + let mut links = self.links_layers[nearest_point.idx as usize][curr_level].write(); + Self::connect_new_point(&mut links, point_id, nearest_point.idx, level_m, scorer); + } + } + } + + /// This function returns average number of links per node in HNSW graph + /// on specified level. + /// + /// Useful for: + /// - estimating memory consumption + /// - percolation threshold estimation + /// - debugging + #[allow(dead_code)] + pub fn get_average_connectivity_on_level(&self, level: usize) -> f32 { + let mut sum = 0; + let mut count = 0; + for links in self.links_layers.iter() { + if links.len() > level { + sum += links[level].read().len(); + count += 1; + } + } + if count == 0 { + 0.0 + } else { + sum as f32 / count as f32 + } + } +} diff --git a/src/query/storages/common/index/src/hnsw_index/graph_links.rs b/src/query/storages/common/index/src/hnsw_index/graph_links.rs new file mode 100644 index 0000000000000..7574de694a412 --- /dev/null +++ b/src/query/storages/common/index/src/hnsw_index/graph_links.rs @@ -0,0 +1,130 @@ +// Copyright Qdrant +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use databend_common_exception::Result; + +use crate::hnsw_index::common::types::PointOffsetType; + +mod header; +mod serializer; +mod view; + +pub use serializer::GraphLinksSerializer; +use view::CompressionInfo; +use view::GraphLinksView; +pub use view::LinksIterator; + +// Links data for whole graph layers. +// +// sorted +// points: points: +// points to lvl 012345 142350 +// 0 -> 0 +// 1 -> 4 lvl4: 7 lvl4: 7 +// 2 -> 2 lvl3: Z Y lvl3: ZY +// 3 -> 2 lvl2: abcd lvl2: adbc +// 4 -> 3 lvl1: ABCDE lvl1: ADBCE +// 5 -> 1 lvl0: 123456 lvl0: 123456 <- lvl 0 is not sorted +// +// +// lvl offset: 6 11 15 17 +// │ │ │ │ +// │ │ │ │ +// ▼ ▼ ▼ ▼ +// indexes: 012345 6789A BCDE FG H +// +// flatten: 123456 ADBCE adbc ZY 7 +// ▲ ▲ ▲ ▲ ▲ ▲ ▲ +// │ │ │ │ │ │ │ +// │ │ │ │ │ │ │ +// │ │ │ │ │ │ │ +// reindex: 142350 142350 142350 142350 (same for each level) +// +// +// for lvl > 0: +// links offset = level_offsets[level] + offsets[reindex[point_id]] + +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub enum GraphLinksFormat { + #[allow(dead_code)] + Plain, + Compressed, +} + +self_cell::self_cell! { + pub struct GraphLinks { + owner: Vec, + #[covariant] + dependent: GraphLinksView, + } + + impl {Debug} +} + +impl GraphLinks { + pub fn load(data: &[u8]) -> Result { + let format = GraphLinksFormat::Compressed; + Self::try_new(data.to_vec(), |x| GraphLinksView::load(x, format)) + } + + fn view(&self) -> &GraphLinksView { + self.borrow_dependent() + } + + pub fn format(&self) -> GraphLinksFormat { + match self.view().compression { + CompressionInfo::Uncompressed { .. } => GraphLinksFormat::Plain, + CompressionInfo::Compressed { .. } => GraphLinksFormat::Compressed, + } + } + + pub fn num_points(&self) -> usize { + self.view().reindex.len() + } + + #[allow(dead_code)] + pub fn for_each_link( + &self, + point_id: PointOffsetType, + level: usize, + f: impl FnMut(PointOffsetType), + ) { + self.links(point_id, level).for_each(f); + } + + #[inline] + pub fn links(&self, point_id: PointOffsetType, level: usize) -> LinksIterator { + self.view().links(point_id, level) + } + + pub fn point_level(&self, point_id: PointOffsetType) -> usize { + self.view().point_level(point_id) + } + + /// Convert the graph links to a vector of edges, suitable for passing into + /// [`GraphLinksSerializer::new`] or using in tests. + pub fn into_edges(self) -> Vec>> { + let mut edges = Vec::with_capacity(self.num_points()); + for point_id in 0..self.num_points() { + let num_levels = self.point_level(point_id as PointOffsetType) + 1; + let mut levels = Vec::with_capacity(num_levels); + for level in 0..num_levels { + levels.push(self.links(point_id as PointOffsetType, level).collect()); + } + edges.push(levels); + } + edges + } +} diff --git a/src/query/storages/common/index/src/hnsw_index/graph_links/header.rs b/src/query/storages/common/index/src/hnsw_index/graph_links/header.rs new file mode 100644 index 0000000000000..3a538a75e9543 --- /dev/null +++ b/src/query/storages/common/index/src/hnsw_index/graph_links/header.rs @@ -0,0 +1,52 @@ +// Copyright Qdrant +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use zerocopy::little_endian::U64 as LittleU64; +use zerocopy::FromBytes; +use zerocopy::Immutable; +use zerocopy::IntoBytes; +use zerocopy::KnownLayout; + +use crate::hnsw_index::common::bitpacking_ordered; + +/// File header for the plain format. +#[derive(FromBytes, Immutable, IntoBytes, KnownLayout)] +#[repr(C)] +pub(super) struct HeaderPlain { + pub(super) point_count: u64, + pub(super) levels_count: u64, + pub(super) total_links_count: u64, + pub(super) total_offset_count: u64, + /// Either 0 or 4. + pub(super) offsets_padding_bytes: u64, + pub(super) zero_padding: [u8; 24], +} + +/// File header for the compressed format. +#[derive(FromBytes, Immutable, IntoBytes, KnownLayout)] +#[repr(C, align(8))] +pub(super) struct HeaderCompressed { + pub(super) point_count: LittleU64, + /// Should be [`HEADER_VERSION_COMPRESSED`]. + pub(super) version: LittleU64, + pub(super) levels_count: LittleU64, + pub(super) total_links_bytes: LittleU64, + pub(super) offsets_parameters: bitpacking_ordered::Parameters, + pub(super) m: LittleU64, + pub(super) m0: LittleU64, + pub(super) zero_padding: [u8; 5], +} + +pub(super) const HEADER_VERSION_COMPRESSED: u64 = 0xFFFF_FFFF_FFFF_FF01; diff --git a/src/query/storages/common/index/src/hnsw_index/graph_links/serializer.rs b/src/query/storages/common/index/src/hnsw_index/graph_links/serializer.rs new file mode 100644 index 0000000000000..0942eeeab942a --- /dev/null +++ b/src/query/storages/common/index/src/hnsw_index/graph_links/serializer.rs @@ -0,0 +1,233 @@ +// Copyright Qdrant +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::cmp::Reverse; +use std::io::Write; +use std::mem::size_of; +use std::mem::take; + +use itertools::Either; +use zerocopy::little_endian::U64 as LittleU64; +use zerocopy::IntoBytes as AsBytes; + +use super::header::HeaderCompressed; +use super::header::HeaderPlain; +use super::header::HEADER_VERSION_COMPRESSED; +use super::GraphLinks; +use super::GraphLinksFormat; +use crate::hnsw_index::common::bitpacking::packed_bits; +use crate::hnsw_index::common::bitpacking_links::pack_links; +use crate::hnsw_index::common::bitpacking_links::MIN_BITS_PER_VALUE; +use crate::hnsw_index::common::bitpacking_ordered; +use crate::hnsw_index::common::types::PointOffsetType; +use crate::hnsw_index::common::zeros::WriteZerosExt; +use crate::hnsw_index::graph_links::GraphLinksView; + +pub struct GraphLinksSerializer { + m: usize, + m0: usize, + links: Vec, + kind: Kind, + reindex: Vec, + level_offsets: Vec, +} + +enum Kind { + Uncompressed { + offsets_padding: usize, + offsets: Vec, + }, + Compressed { + compressed_offsets: Vec, + offsets_parameters: bitpacking_ordered::Parameters, + }, +} + +impl GraphLinksSerializer { + pub fn new( + mut edges: Vec>>, + format: GraphLinksFormat, + m: usize, + m0: usize, + ) -> Self { + // create map from index in `offsets` to point_id + let mut back_index: Vec = (0..edges.len()).collect(); + // sort by max layer and use this map to build `Self.reindex` + back_index.sort_unstable_by_key(|&i| Reverse(edges[i].len())); + + // `reindex` is map from point id to index in `Self.offsets` + let mut reindex = vec![0; back_index.len()]; + for i in 0..back_index.len() { + reindex[back_index[i]] = i as PointOffsetType; + } + + let levels_count = back_index + .first() + .map_or(0, |&point_id| edges[point_id].len()); + let mut point_count_by_level = vec![0; levels_count]; + for point in &edges { + point_count_by_level[point.len() - 1] += 1; + } + + let mut total_offsets_len = 0; + let mut level_offsets = Vec::with_capacity(levels_count); + let mut suffix_sum = point_count_by_level.iter().sum::(); + for &value in point_count_by_level.iter() { + level_offsets.push(total_offsets_len); + total_offsets_len += suffix_sum; + suffix_sum -= value; + } + total_offsets_len += 1; + + let mut links = Vec::new(); + let mut offsets = Vec::with_capacity(total_offsets_len as usize); + offsets.push(0); + let bits_per_unsorted = packed_bits(u32::try_from(edges.len().saturating_sub(1)).unwrap()) + .max(MIN_BITS_PER_VALUE); + + for level in 0..levels_count { + let count = point_count_by_level.iter().skip(level).sum::() as usize; + let (sorted_count, iter) = match level { + 0 => (m0, Either::Left(0..count)), + _ => (m, Either::Right(back_index[..count].iter().copied())), + }; + iter.for_each(|id| { + let raw_links = take(&mut edges[id][level]); + match format { + GraphLinksFormat::Compressed => { + pack_links(&mut links, raw_links, bits_per_unsorted, sorted_count); + offsets.push(links.len() as u64); + } + GraphLinksFormat::Plain => { + links.extend_from_slice(raw_links.as_bytes()); + offsets.push((links.len() as u64) / size_of::() as u64); + } + } + }); + } + + let kind = match format { + GraphLinksFormat::Compressed => { + let (compressed_offsets, offsets_parameters) = + bitpacking_ordered::compress(&offsets); + Kind::Compressed { + compressed_offsets, + offsets_parameters, + } + } + GraphLinksFormat::Plain => { + let len = links.len() + reindex.as_bytes().len(); + Kind::Uncompressed { + offsets_padding: len.next_multiple_of(size_of::()) - len, + offsets, + } + } + }; + + Self { + m, + m0, + links, + kind, + reindex, + level_offsets, + } + } + + pub fn to_graph_links_ram(&self) -> GraphLinks { + let format = match &self.kind { + Kind::Uncompressed { .. } => GraphLinksFormat::Plain, + Kind::Compressed { .. } => GraphLinksFormat::Compressed, + }; + + let size = self.level_offsets.as_bytes().len() + + self.reindex.as_bytes().len() + + self.links.len() + + (match &self.kind { + Kind::Uncompressed { + offsets_padding: padding, + offsets, + } => size_of::() + padding + offsets.as_bytes().len(), + Kind::Compressed { + compressed_offsets, + offsets_parameters: _, + } => size_of::() + compressed_offsets.len(), + }); + + let mut data = Vec::with_capacity(size); + // Unwrap should be the safe as `impl Write` for `Vec` never fails. + self.serialize_to_writer(&mut data).unwrap(); + debug_assert_eq!(data.len(), size); + // Unwrap should be safe as we just created the data. + GraphLinks::try_new(data, |x| GraphLinksView::load(x, format)).unwrap() + } + + pub(crate) fn serialize_to_writer(&self, writer: &mut impl Write) -> std::io::Result<()> { + match &self.kind { + Kind::Uncompressed { + offsets_padding, + offsets, + } => { + let header = HeaderPlain { + point_count: self.reindex.len() as u64, + levels_count: self.level_offsets.len() as u64, + total_links_count: self.links.len() as u64 + / size_of::() as u64, + total_offset_count: offsets.len() as u64, + offsets_padding_bytes: *offsets_padding as u64, + zero_padding: [0; 24], + }; + writer.write_all(header.as_bytes())?; + } + Kind::Compressed { + compressed_offsets: _, + offsets_parameters, + } => { + let header = HeaderCompressed { + version: HEADER_VERSION_COMPRESSED.into(), + point_count: LittleU64::new(self.reindex.len() as u64), + total_links_bytes: LittleU64::new(self.links.len() as u64), + offsets_parameters: *offsets_parameters, + levels_count: LittleU64::new(self.level_offsets.len() as u64), + m: LittleU64::new(self.m as u64), + m0: LittleU64::new(self.m0 as u64), + zero_padding: [0; 5], + }; + writer.write_all(header.as_bytes())?; + } + } + + writer.write_all(self.level_offsets.as_bytes())?; + writer.write_all(self.reindex.as_bytes())?; + writer.write_all(&self.links)?; + match &self.kind { + Kind::Uncompressed { + offsets_padding: padding, + offsets, + } => { + writer.write_zeros(*padding)?; + writer.write_all(offsets.as_bytes())?; + } + Kind::Compressed { + compressed_offsets, + offsets_parameters: _, + } => { + writer.write_all(compressed_offsets)?; + } + } + + Ok(()) + } +} diff --git a/src/query/storages/common/index/src/hnsw_index/graph_links/view.rs b/src/query/storages/common/index/src/hnsw_index/graph_links/view.rs new file mode 100644 index 0000000000000..e466c18d54fb8 --- /dev/null +++ b/src/query/storages/common/index/src/hnsw_index/graph_links/view.rs @@ -0,0 +1,189 @@ +// Copyright Qdrant +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::iter::Copied; + +use databend_common_exception::ErrorCode; +use databend_common_exception::Result; +use itertools::Either; +use itertools::Itertools as _; +use zerocopy::native_endian::U64 as NativeU64; +use zerocopy::FromBytes; +use zerocopy::Immutable; + +use super::header::HeaderCompressed; +use super::header::HeaderPlain; +use super::header::HEADER_VERSION_COMPRESSED; +use super::GraphLinksFormat; +use crate::hnsw_index::common::bitpacking::packed_bits; +use crate::hnsw_index::common::bitpacking_links::iterate_packed_links; +use crate::hnsw_index::common::bitpacking_links::PackedLinksIterator; +use crate::hnsw_index::common::bitpacking_links::MIN_BITS_PER_VALUE; +use crate::hnsw_index::common::bitpacking_ordered; +use crate::hnsw_index::common::types::PointOffsetType; + +/// An (almost) zero-copy, non-owning view into serialized graph links stored +/// as a `&[u8]` slice. +#[derive(Debug)] +pub(super) struct GraphLinksView<'a> { + pub(super) reindex: &'a [PointOffsetType], + pub(super) compression: CompressionInfo<'a>, + /// Level offsets, copied into RAM for faster access. + /// Has at least two elements: + /// - [`super::GraphLinksSerializer`] always writes `0` as the first element. + /// - Additional element is added during deserialization. + pub(super) level_offsets: Vec, +} + +/// An iterator type returned by [`GraphLinksView::links`]. +pub type LinksIterator<'a> = Either>, PackedLinksIterator<'a>>; + +#[derive(Debug)] +pub(super) enum CompressionInfo<'a> { + Uncompressed { + links: &'a [u32], + offsets: &'a [NativeU64], + }, + Compressed { + compressed_links: &'a [u8], + offsets: bitpacking_ordered::Reader<'a>, + m: usize, + m0: usize, + bits_per_unsorted: u8, + }, +} + +impl GraphLinksView<'_> { + pub(super) fn load(data: &[u8], format: GraphLinksFormat) -> Result { + match format { + GraphLinksFormat::Compressed => Self::load_compressed(data), + GraphLinksFormat::Plain => Self::load_plain(data), + } + } + + fn load_plain(data: &[u8]) -> Result { + let (header, data) = + HeaderPlain::ref_from_prefix(data).map_err(|_| error_unsufficent_size())?; + let (level_offsets, data) = + read_level_offsets(data, header.levels_count, header.total_offset_count)?; + let (reindex, data) = get_slice::(data, header.point_count)?; + let (links, data) = get_slice::(data, header.total_links_count)?; + let (_, data) = get_slice::(data, header.offsets_padding_bytes)?; + let (offsets, _bytes) = get_slice::(data, header.total_offset_count)?; + Ok(GraphLinksView { + reindex, + compression: CompressionInfo::Uncompressed { links, offsets }, + level_offsets, + }) + } + + fn load_compressed(data: &[u8]) -> Result { + let (header, data) = + HeaderCompressed::ref_from_prefix(data).map_err(|_| error_unsufficent_size())?; + debug_assert_eq!(header.version.get(), HEADER_VERSION_COMPRESSED); + let (level_offsets, data) = read_level_offsets( + data, + header.levels_count.get(), + header.offsets_parameters.length.get(), + )?; + let (reindex, data) = get_slice::(data, header.point_count.get())?; + let (compressed_links, data) = get_slice::(data, header.total_links_bytes.get())?; + let (offsets, _bytes) = bitpacking_ordered::Reader::new(header.offsets_parameters, data) + .map_err(|e| ErrorCode::Internal(format!("Can't create decompressor: {e}")))?; + Ok(GraphLinksView { + reindex, + compression: CompressionInfo::Compressed { + compressed_links, + offsets, + m: header.m.get() as usize, + m0: header.m0.get() as usize, + bits_per_unsorted: MIN_BITS_PER_VALUE.max(packed_bits( + u32::try_from(header.point_count.get().saturating_sub(1)) + .map_err(|_| ErrorCode::Internal("Too many points in GraphLinks file"))?, + )), + }, + level_offsets, + }) + } + + pub(super) fn links(&self, point_id: PointOffsetType, level: usize) -> LinksIterator { + let idx = if level == 0 { + point_id as usize + } else { + self.level_offsets[level] as usize + self.reindex[point_id as usize] as usize + }; + + match self.compression { + CompressionInfo::Uncompressed { links, offsets } => { + let links_range = offsets[idx].get() as usize..offsets[idx + 1].get() as usize; + Either::Left(links[links_range].iter().copied()) + } + CompressionInfo::Compressed { + compressed_links, + ref offsets, + m, + m0, + bits_per_unsorted, + } => { + let links_range = + offsets.get(idx).unwrap() as usize..offsets.get(idx + 1).unwrap() as usize; + Either::Right(iterate_packed_links( + &compressed_links[links_range], + bits_per_unsorted, + if level == 0 { m0 } else { m }, + )) + } + } + } + + pub(super) fn point_level(&self, point_id: PointOffsetType) -> usize { + let reindexed_point_id = u64::from(self.reindex[point_id as usize]); + for (level, (&a, &b)) in self + .level_offsets + .iter() + .skip(1) + .tuple_windows() + .enumerate() + { + if reindexed_point_id >= b - a { + return level; + } + } + // See the doc comment on `level_offsets`. + self.level_offsets.len() - 2 + } +} + +fn read_level_offsets( + bytes: &[u8], + levels_count: u64, + total_offset_count: u64, +) -> Result<(Vec, &[u8])> { + let (level_offsets, bytes) = get_slice::(bytes, levels_count)?; + let mut result = Vec::with_capacity(level_offsets.len() + 1); + result.extend_from_slice(level_offsets); + result.push(total_offset_count.checked_sub(1).ok_or_else(|| { + ErrorCode::Internal("Total offset count should be at least 1 in GraphLinks file") + })?); + Ok((result, bytes)) +} + +fn get_slice(data: &[u8], length: u64) -> Result<(&[T], &[u8])> { + <[T]>::ref_from_prefix_with_elems(data, length as usize).map_err(|_| error_unsufficent_size()) +} + +fn error_unsufficent_size() -> ErrorCode { + ErrorCode::Internal("Unsufficent file size for GraphLinks file") +} diff --git a/src/query/storages/common/index/src/hnsw_index/hnsw.rs b/src/query/storages/common/index/src/hnsw_index/hnsw.rs new file mode 100644 index 0000000000000..801809d981c1b --- /dev/null +++ b/src/query/storages/common/index/src/hnsw_index/hnsw.rs @@ -0,0 +1,374 @@ +// Copyright Qdrant +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::atomic::AtomicBool; + +use databend_common_exception::ErrorCode; +use databend_common_exception::Result; +use databend_common_expression::types::Buffer; +use databend_common_expression::types::DataType; +use databend_common_expression::types::VectorColumn; +use databend_common_expression::types::VectorColumnBuilder; +use databend_common_expression::types::VectorScalar; +use databend_common_expression::types::F32; +use databend_common_expression::BlockEntry; +use databend_common_expression::Column; +use databend_common_expression::ColumnId; +use databend_common_expression::Scalar; +use databend_common_expression::TableDataType; +use databend_common_expression::TableField; +use log::error; +use rand::thread_rng; +use rayon::iter::IntoParallelIterator; +use rayon::prelude::*; +use rayon::ThreadPoolBuilder; + +use crate::hnsw_index::common::types::PointOffsetType; +use crate::hnsw_index::common::types::ScoredPointOffset; +use crate::hnsw_index::common::utils::check_process_stopped; +use crate::hnsw_index::graph_layers::GraphLayers; +use crate::hnsw_index::graph_layers_builder::GraphLayersBuilder; +use crate::hnsw_index::graph_links::GraphLinksFormat; +use crate::hnsw_index::point_scorer::FilteredScorer; +use crate::hnsw_index::point_scorer::OriginalRawScorer; +use crate::hnsw_index::point_scorer::QuantizedRawScorer; +use crate::hnsw_index::point_scorer::RawScorer; +use crate::hnsw_index::quantization::encoded_vectors::EncodedVectors; +use crate::hnsw_index::quantization::DistanceType; +use crate::hnsw_index::quantization::EncodedVectorsU8; +use crate::hnsw_index::quantization::VectorParameters; + +pub const SINGLE_THREADED_HNSW_BUILD_THRESHOLD: usize = 256; + +pub struct HNSWIndex { + graph_layers: GraphLayers, + distance_type: DistanceType, + encoded_vectors: EncodedVectorsU8>, +} + +impl HNSWIndex { + pub fn open( + distance_type: DistanceType, + dim: usize, + count: usize, + binary_columns: Vec, + ) -> Result { + let graph_links = unsafe { binary_columns[0].as_binary().unwrap().index_unchecked(0) }; + let graph_data = unsafe { binary_columns[1].as_binary().unwrap().index_unchecked(0) }; + let encoded_meta = unsafe { binary_columns[2].as_binary().unwrap().index_unchecked(0) }; + let encoded_data = unsafe { binary_columns[3].as_binary().unwrap().index_unchecked(0) }; + + let graph_layers = GraphLayers::open(graph_links, graph_data)?; + + let invert = match distance_type { + DistanceType::Dot => false, + DistanceType::L1 | DistanceType::L2 => true, + }; + + let params = VectorParameters { + dim, + count, + distance_type, + invert, + }; + + let encoded_vectors: EncodedVectorsU8> = + EncodedVectorsU8::load(encoded_data, encoded_meta, ¶ms)?; + + Ok(Self { + graph_layers, + distance_type, + encoded_vectors, + }) + } + + pub fn search(&self, limit: usize, query_values: &[f32]) -> Result> { + let query_encode = self.encoded_vectors.encode_query(query_values); + + let raw_scorer = RawScorer::Quantized(QuantizedRawScorer { + query: query_encode, + vector: &self.encoded_vectors, + }); + + // ef is used to maintain the size of the set of candidate points in the search process, + // the larger the search precision is higher, the smaller the speed is faster, + // take 4 times the limit is a more balanced parameter. + let ef = limit * 4; + let query_filter_scorer = FilteredScorer::new(&raw_scorer); + + let is_stopped = AtomicBool::new(false); + let values = self + .graph_layers + .search(limit, ef, query_filter_scorer, None, &is_stopped)?; + + let values = Self::postprocess_score(self.distance_type, values); + Ok(values) + } + + pub fn generate_scores( + &self, + row_nums: u32, + query_values: &[f32], + ) -> Result> { + let query_encode = self.encoded_vectors.encode_query(query_values); + + let raw_scorer = RawScorer::Quantized(QuantizedRawScorer { + query: query_encode, + vector: &self.encoded_vectors, + }); + + let mut values = Vec::with_capacity(row_nums as usize); + for idx in 0..row_nums { + let score = raw_scorer.score_point(idx); + values.push(ScoredPointOffset { idx, score }); + } + + let values = Self::postprocess_score(self.distance_type, values); + Ok(values) + } + + pub fn build( + m: usize, + ef_construct: usize, + column_id: ColumnId, + column: Column, + distance_type: DistanceType, + ) -> Result<(Vec, Vec)> { + let m0 = m * 2; + let entry_points_num = 2; + let use_heuristic = true; + let num_vectors = column.len(); + + let column = column.remove_nullable(); + let vector_column = column.as_vector().unwrap(); + let vector_column = preprocess(distance_type, vector_column.clone()); + + let mut rng = thread_rng(); + let mut graph_layers_builder = GraphLayersBuilder::new( + num_vectors, + m, + m0, + ef_construct, + entry_points_num, + use_heuristic, + ); + + for i in 0..column.len() { + let vector_id = i as PointOffsetType; + let level = graph_layers_builder.get_random_layer(&mut rng); + graph_layers_builder.set_levels(vector_id, level); + } + + let parallelism = match std::thread::available_parallelism() { + Ok(degree) => degree.get(), + Err(e) => { + error!( + "failed to detect the number of parallelism: {}, fallback to 8", + e + ); + 8 + } + }; + + let pool = ThreadPoolBuilder::new() + .thread_name(|index| format!("hnsw-build-{}", index)) + .num_threads(parallelism) + .build() + .expect("failed to build hnsw build thread pool"); + + let first_few_num = std::cmp::min(SINGLE_THREADED_HNSW_BUILD_THRESHOLD, column.len()); + let left_num = if column.len() > first_few_num { + column.len() - first_few_num + } else { + 0 + }; + + let mut first_few_ids = Vec::with_capacity(first_few_num); + let mut ids = Vec::with_capacity(left_num); + for i in 0..first_few_num { + first_few_ids.push(i); + } + for i in first_few_num..column.len() { + ids.push(i); + } + + let (data, dim) = vector_column.as_float32().unwrap(); + let data = unsafe { std::mem::transmute::, Buffer>(data.clone()) }; + + let stopped = AtomicBool::new(false); + + let mut index_fields = Vec::with_capacity(4); + let mut index_columns = Vec::with_capacity(4); + + let insert_point = |vector_id| { + check_process_stopped(&stopped)?; + + let raw_scorer = RawScorer::Original(OriginalRawScorer { + distance_type, + index: vector_id, + column: &column, + }); + let points_scorer = FilteredScorer::new(&raw_scorer); + graph_layers_builder.link_new_point(vector_id as PointOffsetType, points_scorer); + + Ok::<_, ErrorCode>(()) + }; + + for vector_id in first_few_ids { + insert_point(vector_id)?; + } + if !ids.is_empty() { + pool.install(|| ids.into_par_iter().try_for_each(insert_point))?; + } + + // let graph_layers = graph_layers_builder.into_graph_layers_ram(GraphLinksFormat::Compressed); + let (graph_links, graph_data) = + graph_layers_builder.into_graph_data(GraphLinksFormat::Compressed)?; + + index_columns.push(BlockEntry::new_const_column( + DataType::Binary, + Scalar::Binary(graph_links), + 1, + )); + index_columns.push(BlockEntry::new_const_column( + DataType::Binary, + Scalar::Binary(graph_data), + 1, + )); + + index_fields.push(TableField::new( + &format!("{}-{}_graph_links", column_id, distance_type), + TableDataType::Binary, + )); + index_fields.push(TableField::new( + &format!("{}-{}_graph_data", column_id, distance_type), + TableDataType::Binary, + )); + + // Set invert parameter to query the closest vector (the most similar vector). + // For Dot distances: invert = false (because a larger dot product means more similar) + // For L1 distances: invert = true (because a smaller Manhattan distance means more similar) + // For L2 distances: invert = true (because a smaller Euclidean distance means more similar) + let invert = match distance_type { + DistanceType::Dot => false, + DistanceType::L1 | DistanceType::L2 => true, + }; + + let params = VectorParameters { + dim: *dim, + count: column.len(), + distance_type, + invert, + }; + + let builder = Vec::new(); + let encoded_vectors = EncodedVectorsU8::encode( + (0..params.count).map(|i| &data.as_ref()[i * params.dim..(i + 1) * params.dim]), + builder, + ¶ms, + None, + &stopped, + )?; + + let encoded_meta = encoded_vectors.build_meta()?; + let encoded_data = encoded_vectors.build_data()?; + + index_columns.push(BlockEntry::new_const_column( + DataType::Binary, + Scalar::Binary(encoded_meta), + 1, + )); + index_columns.push(BlockEntry::new_const_column( + DataType::Binary, + Scalar::Binary(encoded_data), + 1, + )); + index_fields.push(TableField::new( + &format!("{}-{}_encoded_u8_meta", column_id, distance_type), + TableDataType::Binary, + )); + index_fields.push(TableField::new( + &format!("{}-{}_encoded_u8_data", column_id, distance_type), + TableDataType::Binary, + )); + + Ok((index_fields, index_columns)) + } + + pub fn preprocess_query(distance_type: DistanceType, query_values: Vec) -> Vec { + match distance_type { + DistanceType::Dot => cosine_preprocess(query_values), + DistanceType::L1 | DistanceType::L2 => query_values, + } + } + + fn postprocess_score( + distance_type: DistanceType, + mut values: Vec, + ) -> Vec { + match distance_type { + DistanceType::L1 => { + for value in &mut values { + value.score = value.score.abs(); + } + } + DistanceType::L2 => { + for value in &mut values { + value.score = value.score.abs().sqrt(); + } + } + DistanceType::Dot => { + for value in &mut values { + value.score = (1.0_f32 - value.score).abs(); + } + } + } + values + } +} + +fn preprocess(distance_type: DistanceType, column: VectorColumn) -> VectorColumn { + match distance_type { + DistanceType::Dot => { + let ty = column.data_type(); + let len = column.len(); + let mut builder = VectorColumnBuilder::with_capacity(&ty, len); + for scalar in column.iter() { + let val = scalar.as_float32().unwrap(); + let val = unsafe { std::mem::transmute::, Vec>(val.to_vec()) }; + + let new_val = cosine_preprocess(val); + let new_val = unsafe { std::mem::transmute::, Vec>(new_val) }; + let new_scalar = VectorScalar::Float32(new_val); + builder.push(&new_scalar.as_ref()); + } + builder.build() + } + DistanceType::L1 | DistanceType::L2 => column, + } +} + +fn is_length_zero_or_normalized(length: f32) -> bool { + length < f32::EPSILON || (length - 1.0).abs() <= 1.0e-6 +} + +fn cosine_preprocess(vector: Vec) -> Vec { + let mut length: f32 = vector.iter().map(|x| x * x).sum(); + if is_length_zero_or_normalized(length) { + return vector; + } + length = length.sqrt(); + vector.iter().map(|x| x / length).collect() +} diff --git a/src/query/storages/common/index/src/hnsw_index/mod.rs b/src/query/storages/common/index/src/hnsw_index/mod.rs new file mode 100644 index 0000000000000..243e824d566a0 --- /dev/null +++ b/src/query/storages/common/index/src/hnsw_index/mod.rs @@ -0,0 +1,91 @@ +// Copyright Qdrant +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod common; +mod entry_points; +mod graph_layers; +mod graph_layers_builder; +mod graph_links; +mod hnsw; +mod point_scorer; +mod quantization; +mod search_context; +mod visited_pool; + +pub use common::fixed_length_priority_queue::FixedLengthPriorityQueue; +pub use common::types::ScoredPointOffset; +use databend_common_exception::ErrorCode; +use databend_storages_common_table_meta::meta::SingleColumnMeta; +pub use hnsw::HNSWIndex; +use parquet::format::FileMetaData; +pub use quantization::DistanceType; + +#[derive(Clone)] +pub struct VectorIndexMeta { + pub columns: Vec<(String, SingleColumnMeta)>, +} + +#[derive(Clone, Debug)] +pub struct VectorIndexFile { + pub name: String, + pub data: Vec, +} + +impl VectorIndexFile { + pub fn create(name: String, data: Vec) -> Self { + Self { name, data } + } +} + +impl TryFrom for VectorIndexMeta { + type Error = ErrorCode; + + fn try_from(mut meta: FileMetaData) -> std::result::Result { + let rg = meta.row_groups.remove(0); + let mut col_metas = Vec::with_capacity(rg.columns.len()); + for x in &rg.columns { + match &x.meta_data { + Some(chunk_meta) => { + let col_start = + if let Some(dict_page_offset) = chunk_meta.dictionary_page_offset { + dict_page_offset + } else { + chunk_meta.data_page_offset + }; + let col_len = chunk_meta.total_compressed_size; + assert!( + col_start >= 0 && col_len >= 0, + "column start and length should not be negative" + ); + let num_values = chunk_meta.num_values as u64; + let res = SingleColumnMeta { + offset: col_start as u64, + len: col_len as u64, + num_values, + }; + let column_name = chunk_meta.path_in_schema[0].to_owned(); + col_metas.push((column_name, res)); + } + None => { + panic!( + "expecting chunk meta data while converting ThriftFileMetaData to BloomIndexMeta" + ) + } + } + } + col_metas.shrink_to_fit(); + Ok(Self { columns: col_metas }) + } +} diff --git a/src/query/storages/common/index/src/hnsw_index/point_scorer.rs b/src/query/storages/common/index/src/hnsw_index/point_scorer.rs new file mode 100644 index 0000000000000..36ecc2e96a360 --- /dev/null +++ b/src/query/storages/common/index/src/hnsw_index/point_scorer.rs @@ -0,0 +1,174 @@ +// Copyright Qdrant +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use databend_common_expression::types::VectorScalarRef; +use databend_common_expression::types::F32; +use databend_common_expression::Column; +use databend_common_expression::ScalarRef; + +use crate::hnsw_index::common::types::PointOffsetType; +use crate::hnsw_index::common::types::ScoreType; +use crate::hnsw_index::common::types::ScoredPointOffset; +use crate::hnsw_index::quantization::encoded_vectors::EncodedVectors; +use crate::hnsw_index::quantization::EncodedQueryU8; +use crate::hnsw_index::quantization::EncodedVectorsU8; +use crate::DistanceType; + +pub enum RawScorer<'a> { + Original(OriginalRawScorer<'a>), + Quantized(QuantizedRawScorer<'a>), +} + +pub struct OriginalRawScorer<'a> { + pub distance_type: DistanceType, + pub index: usize, + pub column: &'a Column, +} + +pub struct QuantizedRawScorer<'a> { + pub query: EncodedQueryU8, + pub vector: &'a EncodedVectorsU8>, +} + +impl RawScorer<'_> { + pub fn score_point(&self, point_id: PointOffsetType) -> ScoreType { + match self { + RawScorer::Original(original) => { + let self_val = unsafe { original.column.index_unchecked(original.index) }; + let point_val = unsafe { original.column.index_unchecked(point_id as usize) }; + calculate_score(original.distance_type, self_val, point_val) + } + RawScorer::Quantized(quantized) => { + quantized.vector.score_point(&quantized.query, point_id) + } + } + } + + pub fn score_internal(&self, point_a: PointOffsetType, point_b: PointOffsetType) -> ScoreType { + match self { + RawScorer::Original(original) => { + let point_a_val = unsafe { original.column.index_unchecked(point_a as usize) }; + let point_b_val = unsafe { original.column.index_unchecked(point_b as usize) }; + calculate_score(original.distance_type, point_a_val, point_b_val) + } + RawScorer::Quantized(quantized) => quantized.vector.score_internal(point_a, point_b), + } + } +} + +pub struct FilteredScorer<'a> { + raw_scorer: &'a RawScorer<'a>, + points_buffer: Vec, +} + +impl<'a> FilteredScorer<'a> { + pub fn new(raw_scorer: &'a RawScorer) -> Self { + FilteredScorer { + raw_scorer, + points_buffer: Vec::new(), + } + } + + pub fn check_vector(&self, _point_id: PointOffsetType) -> bool { + true + } + + /// Method filters and calculates scores for the given slice of points IDs + /// + /// For performance reasons this function mutates input values. + /// For result slice allocation this function mutates self. + /// + /// # Arguments + /// + /// * `point_ids` - list of points to score. *Warn*: This input will be wrecked during the execution. + /// * `limit` - limits the number of points to process after filtering. + pub fn score_points( + &mut self, + point_ids: &mut [PointOffsetType], + limit: usize, + ) -> &[ScoredPointOffset] { + if limit == 0 { + self.points_buffer + .resize_with(point_ids.len(), ScoredPointOffset::default); + } else { + self.points_buffer + .resize_with(limit, ScoredPointOffset::default); + } + let mut size: usize = 0; + for point_id in point_ids.iter().copied() { + let score = self.score_point(point_id); + self.points_buffer[size] = ScoredPointOffset { + idx: point_id, + score, + }; + + size += 1; + if size == self.points_buffer.len() { + break; + } + } + &self.points_buffer[0..size] + } + + pub fn score_point(&self, point_id: PointOffsetType) -> ScoreType { + self.raw_scorer.score_point(point_id) + } + + pub fn score_internal(&self, point_a: PointOffsetType, point_b: PointOffsetType) -> ScoreType { + self.raw_scorer.score_internal(point_a, point_b) + } +} + +fn calculate_score(distance_type: DistanceType, lhs: ScalarRef, rhs: ScalarRef) -> f32 { + match (lhs, rhs) { + ( + ScalarRef::Vector(VectorScalarRef::Int8(lhs)), + ScalarRef::Vector(VectorScalarRef::Int8(rhs)), + ) => { + let l: Vec<_> = lhs.iter().map(|v| *v as f32).collect(); + let r: Vec<_> = rhs.iter().map(|v| *v as f32).collect(); + match distance_type { + DistanceType::Dot => dot_similarity(&l, &r), + DistanceType::L1 => manhattan_similarity(&l, &r), + DistanceType::L2 => euclid_similarity(&l, &r), + } + } + ( + ScalarRef::Vector(VectorScalarRef::Float32(lhs)), + ScalarRef::Vector(VectorScalarRef::Float32(rhs)), + ) => { + let l = unsafe { std::mem::transmute::<&[F32], &[f32]>(lhs) }; + let r = unsafe { std::mem::transmute::<&[F32], &[f32]>(rhs) }; + match distance_type { + DistanceType::Dot => dot_similarity(l, r), + DistanceType::L1 => manhattan_similarity(l, r), + DistanceType::L2 => euclid_similarity(l, r), + } + } + (_, _) => 0.0, + } +} + +pub fn dot_similarity(v1: &[f32], v2: &[f32]) -> f32 { + v1.iter().zip(v2).map(|(a, b)| a * b).sum() +} + +fn euclid_similarity(v1: &[f32], v2: &[f32]) -> f32 { + -v1.iter().zip(v2).map(|(a, b)| (a - b).powi(2)).sum::() +} + +fn manhattan_similarity(v1: &[f32], v2: &[f32]) -> f32 { + -v1.iter().zip(v2).map(|(a, b)| (a - b).abs()).sum::() +} diff --git a/src/query/storages/common/index/src/hnsw_index/quantization/encoded_storage.rs b/src/query/storages/common/index/src/hnsw_index/quantization/encoded_storage.rs new file mode 100644 index 0000000000000..31d7caeef61f1 --- /dev/null +++ b/src/query/storages/common/index/src/hnsw_index/quantization/encoded_storage.rs @@ -0,0 +1,68 @@ +// Copyright Qdrant +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use databend_common_exception::ErrorCode; +use databend_common_exception::Result; + +pub trait EncodedStorage { + fn get_vector_data(&self, index: usize, vector_size: usize) -> &[u8]; + + fn from_slice(slice: &[u8], quantized_vector_size: usize, vectors_count: usize) -> Result + where Self: Sized; + + fn to_vec(&self) -> Result>; +} + +pub trait EncodedStorageBuilder { + fn build(self) -> TStorage; + + fn push_vector_data(&mut self, other: &[u8]); +} + +impl EncodedStorage for Vec { + fn get_vector_data(&self, index: usize, vector_size: usize) -> &[u8] { + &self[vector_size * index..vector_size * (index + 1)] + } + + fn from_slice( + slice: &[u8], + quantized_vector_size: usize, + vectors_count: usize, + ) -> Result { + let expected_size = quantized_vector_size * vectors_count; + if slice.len() == expected_size { + Ok(slice.to_vec()) + } else { + Err(ErrorCode::Internal(format!( + "Loaded storage size {} is not equal to expected size {expected_size}", + slice.len() + ))) + } + } + + fn to_vec(&self) -> Result> { + Ok(self.clone()) + } +} + +impl EncodedStorageBuilder> for Vec { + fn build(self) -> Vec { + self + } + + fn push_vector_data(&mut self, other: &[u8]) { + self.extend_from_slice(other); + } +} diff --git a/src/query/storages/common/index/src/hnsw_index/quantization/encoded_vectors.rs b/src/query/storages/common/index/src/hnsw_index/quantization/encoded_vectors.rs new file mode 100644 index 0000000000000..4620866d928fb --- /dev/null +++ b/src/query/storages/common/index/src/hnsw_index/quantization/encoded_vectors.rs @@ -0,0 +1,96 @@ +// Copyright Qdrant +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::Display; +use std::fmt::Formatter; + +use databend_common_exception::ErrorCode; +use databend_common_exception::Result; +use serde::Deserialize; +use serde::Serialize; + +#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq)] +pub enum DistanceType { + Dot, + L1, + L2, +} + +impl Display for DistanceType { + fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { + match self { + DistanceType::Dot => write!(f, "dot"), + DistanceType::L1 => write!(f, "l1"), + DistanceType::L2 => write!(f, "l2"), + } + } +} + +#[derive(Serialize, Deserialize, Clone)] +pub struct VectorParameters { + pub dim: usize, + pub count: usize, + pub distance_type: DistanceType, + pub invert: bool, +} + +pub trait EncodedVectors: Sized { + fn build_data(&self) -> Result>; + + fn build_meta(&self) -> Result>; + + fn load(data: &[u8], meta: &[u8], vector_parameters: &VectorParameters) -> Result; + + fn encode_query(&self, query: &[f32]) -> TEncodedQuery; + + fn score_point(&self, query: &TEncodedQuery, i: u32) -> f32; + + fn score_internal(&self, i: u32, j: u32) -> f32; +} + +impl DistanceType { + pub fn distance(&self, a: &[f32], b: &[f32]) -> f32 { + match self { + DistanceType::Dot => a.iter().zip(b).map(|(a, b)| a * b).sum(), + DistanceType::L1 => a.iter().zip(b).map(|(a, b)| (a - b).abs()).sum(), + DistanceType::L2 => a.iter().zip(b).map(|(a, b)| (a - b) * (a - b)).sum(), + } + } +} + +pub(crate) fn validate_vector_parameters<'a>( + data: impl Iterator + 'a> + Clone, + vector_parameters: &VectorParameters, +) -> Result<()> { + let mut count = 0; + for vector in data { + let vector = vector.as_ref(); + if vector.len() != vector_parameters.dim { + return Err(ErrorCode::BadArguments(format!( + "Vector length {} does not match vector parameters dim {}", + vector.len(), + vector_parameters.dim + ))); + } + count += 1; + } + if count != vector_parameters.count { + return Err(ErrorCode::BadArguments(format!( + "Vector count {} does not match vector parameters count {}", + count, vector_parameters.count + ))); + } + Ok(()) +} diff --git a/src/query/storages/common/index/src/hnsw_index/quantization/encoded_vectors_u8.rs b/src/query/storages/common/index/src/hnsw_index/quantization/encoded_vectors_u8.rs new file mode 100644 index 0000000000000..652d689571462 --- /dev/null +++ b/src/query/storages/common/index/src/hnsw_index/quantization/encoded_vectors_u8.rs @@ -0,0 +1,364 @@ +// Copyright Qdrant +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::atomic::AtomicBool; +use std::sync::atomic::Ordering; + +use databend_common_exception::ErrorCode; +use databend_common_exception::Result; +use serde::Deserialize; +use serde::Serialize; + +use crate::hnsw_index::quantization::encoded_storage::EncodedStorage; +use crate::hnsw_index::quantization::encoded_storage::EncodedStorageBuilder; +use crate::hnsw_index::quantization::encoded_vectors::validate_vector_parameters; +use crate::hnsw_index::quantization::encoded_vectors::DistanceType; +use crate::hnsw_index::quantization::encoded_vectors::EncodedVectors; +use crate::hnsw_index::quantization::encoded_vectors::VectorParameters; +use crate::hnsw_index::quantization::quantile::find_min_max_from_iter; +use crate::hnsw_index::quantization::quantile::find_quantile_interval; + +pub const ALIGNMENT: usize = 16; + +pub struct EncodedVectorsU8 { + encoded_vectors: TStorage, + metadata: Metadata, +} + +pub struct EncodedQueryU8 { + offset: f32, + encoded_query: Vec, +} + +#[derive(Serialize, Deserialize)] +struct Metadata { + actual_dim: usize, + alpha: f32, + offset: f32, + multiplier: f32, + vector_parameters: VectorParameters, +} + +impl EncodedVectorsU8 { + pub fn encode<'a>( + orig_data: impl Iterator + 'a> + Clone, + mut storage_builder: impl EncodedStorageBuilder, + vector_parameters: &VectorParameters, + quantile: Option, + stopped: &AtomicBool, + ) -> Result { + let actual_dim = Self::get_actual_dim(vector_parameters); + + if vector_parameters.count == 0 { + return Ok(EncodedVectorsU8 { + encoded_vectors: storage_builder.build(), + metadata: Metadata { + actual_dim, + alpha: 0.0, + offset: 0.0, + multiplier: 0.0, + vector_parameters: vector_parameters.clone(), + }, + }); + } + + debug_assert!(validate_vector_parameters(orig_data.clone(), vector_parameters).is_ok()); + let (alpha, offset) = Self::find_alpha_offset_size_dim(orig_data.clone()); + let (alpha, offset) = if let Some(quantile) = quantile { + if let Some((min, max)) = find_quantile_interval( + orig_data.clone(), + vector_parameters.dim, + vector_parameters.count, + quantile, + ) { + Self::alpha_offset_from_min_max(min, max) + } else { + (alpha, offset) + } + } else { + (alpha, offset) + }; + + for vector in orig_data { + if stopped.load(Ordering::Relaxed) { + return Err(ErrorCode::Internal("check process stopped error")); + } + + let mut encoded_vector = Vec::with_capacity(actual_dim + std::mem::size_of::()); + encoded_vector.extend_from_slice(&f32::default().to_ne_bytes()); + for &value in vector.as_ref() { + let encoded = Self::f32_to_u8(value, alpha, offset); + encoded_vector.push(encoded); + } + if vector_parameters.dim % ALIGNMENT != 0 { + for _ in 0..(ALIGNMENT - vector_parameters.dim % ALIGNMENT) { + let placeholder = match vector_parameters.distance_type { + DistanceType::Dot => 0.0, + DistanceType::L1 | DistanceType::L2 => offset, + }; + let encoded = Self::f32_to_u8(placeholder, alpha, offset); + encoded_vector.push(encoded); + } + } + let vector_offset = match vector_parameters.distance_type { + DistanceType::Dot => { + actual_dim as f32 * offset * offset + + encoded_vector.iter().map(|&x| f32::from(x)).sum::() * alpha * offset + } + DistanceType::L1 => 0.0, + DistanceType::L2 => { + actual_dim as f32 * offset * offset + + encoded_vector + .iter() + .map(|&x| f32::from(x) * f32::from(x)) + .sum::() + * alpha + * alpha + } + }; + let vector_offset = if vector_parameters.invert { + -vector_offset + } else { + vector_offset + }; + encoded_vector[0..std::mem::size_of::()] + .copy_from_slice(&vector_offset.to_ne_bytes()); + storage_builder.push_vector_data(&encoded_vector); + } + let multiplier = match vector_parameters.distance_type { + DistanceType::Dot => alpha * alpha, + DistanceType::L1 => alpha, + DistanceType::L2 => -2.0 * alpha * alpha, + }; + let multiplier = if vector_parameters.invert { + -multiplier + } else { + multiplier + }; + + Ok(EncodedVectorsU8 { + encoded_vectors: storage_builder.build(), + metadata: Metadata { + actual_dim, + alpha, + offset, + multiplier, + vector_parameters: vector_parameters.clone(), + }, + }) + } + + pub fn score_point_simple(&self, query: &EncodedQueryU8, i: u32) -> f32 { + let (vector_offset, v_ptr) = self.get_vec_ptr(i); + + let score = match self.metadata.vector_parameters.distance_type { + DistanceType::Dot | DistanceType::L2 => impl_score_dot( + query.encoded_query.as_ptr(), + v_ptr, + self.metadata.actual_dim, + ), + DistanceType::L1 => impl_score_l1( + query.encoded_query.as_ptr(), + v_ptr, + self.metadata.actual_dim, + ), + }; + + self.metadata.multiplier * score as f32 + query.offset + vector_offset + } + + fn find_alpha_offset_size_dim<'a>( + orig_data: impl Iterator + 'a> + Clone, + ) -> (f32, f32) { + let (min, max) = find_min_max_from_iter(orig_data); + Self::alpha_offset_from_min_max(min, max) + } + + fn alpha_offset_from_min_max(min: f32, max: f32) -> (f32, f32) { + let alpha = (max - min) / 127.0; + let offset = min; + (alpha, offset) + } + + fn f32_to_u8(i: f32, alpha: f32, offset: f32) -> u8 { + let i = (i - offset) / alpha; + i.clamp(0.0, 127.0) as u8 + } + + #[inline] + fn get_vec_ptr(&self, i: u32) -> (f32, *const u8) { + unsafe { + let vector_data_size = self.metadata.actual_dim + std::mem::size_of::(); + let v_ptr = self + .encoded_vectors + .get_vector_data(i as usize, vector_data_size) + .as_ptr(); + let vector_offset = *v_ptr.cast::(); + (vector_offset, v_ptr.add(std::mem::size_of::())) + } + } + + #[allow(dead_code)] + pub fn get_quantized_vector(&self, i: u32) -> (f32, &[u8]) { + let (offset, v_ptr) = self.get_vec_ptr(i); + let vector_data_size = self.metadata.actual_dim; + (offset, unsafe { + std::slice::from_raw_parts(v_ptr, vector_data_size) + }) + } + + pub fn get_quantized_vector_size(vector_parameters: &VectorParameters) -> usize { + let actual_dim = Self::get_actual_dim(vector_parameters); + actual_dim + std::mem::size_of::() + } + + #[allow(dead_code)] + pub fn get_multiplier(&self) -> f32 { + self.metadata.multiplier + } + + #[allow(dead_code)] + pub fn get_diff(&self) -> f32 { + let diff = self.metadata.actual_dim as f32 * self.metadata.offset * self.metadata.offset; + if self.metadata.vector_parameters.invert { + -diff + } else { + diff + } + } + + pub fn get_actual_dim(vector_parameters: &VectorParameters) -> usize { + vector_parameters.dim + (ALIGNMENT - vector_parameters.dim % ALIGNMENT) % ALIGNMENT + } + + #[allow(dead_code)] + pub fn vectors_count(&self) -> usize { + self.metadata.vector_parameters.count + } +} + +impl EncodedVectors for EncodedVectorsU8 { + fn build_data(&self) -> Result> { + self.encoded_vectors.to_vec() + } + + fn build_meta(&self) -> Result> { + let mut buf = Vec::new(); + serde_json::to_writer(&mut buf, &self.metadata)?; + Ok(buf) + } + + fn load(data: &[u8], meta: &[u8], vector_parameters: &VectorParameters) -> Result { + let contents = std::str::from_utf8(meta)?; + let metadata: Metadata = serde_json::from_str(contents)?; + let quantized_vector_size = Self::get_quantized_vector_size(vector_parameters); + let encoded_vectors = + TStorage::from_slice(data, quantized_vector_size, vector_parameters.count)?; + let result = Self { + encoded_vectors, + metadata, + }; + Ok(result) + } + + fn encode_query(&self, query: &[f32]) -> EncodedQueryU8 { + let dim = query.len(); + let mut query: Vec<_> = query + .iter() + .map(|&v| Self::f32_to_u8(v, self.metadata.alpha, self.metadata.offset)) + .collect(); + if dim % ALIGNMENT != 0 { + for _ in 0..(ALIGNMENT - dim % ALIGNMENT) { + let placeholder = match self.metadata.vector_parameters.distance_type { + DistanceType::Dot => 0.0, + DistanceType::L1 | DistanceType::L2 => self.metadata.offset, + }; + let encoded = + Self::f32_to_u8(placeholder, self.metadata.alpha, self.metadata.offset); + query.push(encoded); + } + } + let offset = match self.metadata.vector_parameters.distance_type { + DistanceType::Dot => { + query.iter().map(|&x| f32::from(x)).sum::() + * self.metadata.alpha + * self.metadata.offset + } + DistanceType::L1 => 0.0, + DistanceType::L2 => { + query + .iter() + .map(|&x| f32::from(x) * f32::from(x)) + .sum::() + * self.metadata.alpha + * self.metadata.alpha + } + }; + let offset = if self.metadata.vector_parameters.invert { + -offset + } else { + offset + }; + EncodedQueryU8 { + offset, + encoded_query: query, + } + } + + fn score_point(&self, query: &EncodedQueryU8, i: u32) -> f32 { + self.score_point_simple(query, i) + } + + fn score_internal(&self, i: u32, j: u32) -> f32 { + let (query_offset, q_ptr) = self.get_vec_ptr(i); + let (vector_offset, v_ptr) = self.get_vec_ptr(j); + let diff = self.metadata.actual_dim as f32 * self.metadata.offset * self.metadata.offset; + let diff = if self.metadata.vector_parameters.invert { + -diff + } else { + diff + }; + let offset = query_offset + vector_offset - diff; + + let score = match self.metadata.vector_parameters.distance_type { + DistanceType::Dot | DistanceType::L2 => { + impl_score_dot(q_ptr, v_ptr, self.metadata.actual_dim) + } + DistanceType::L1 => impl_score_l1(q_ptr, v_ptr, self.metadata.actual_dim), + }; + + self.metadata.multiplier * score as f32 + offset + } +} + +fn impl_score_dot(q_ptr: *const u8, v_ptr: *const u8, actual_dim: usize) -> i32 { + unsafe { + let mut score = 0i32; + for i in 0..actual_dim { + score += i32::from(*q_ptr.add(i)) * i32::from(*v_ptr.add(i)); + } + score + } +} + +fn impl_score_l1(q_ptr: *const u8, v_ptr: *const u8, actual_dim: usize) -> i32 { + unsafe { + let mut score = 0i32; + for i in 0..actual_dim { + score += i32::from(*q_ptr.add(i)).abs_diff(i32::from(*v_ptr.add(i))) as i32; + } + score + } +} diff --git a/src/query/storages/common/index/src/hnsw_index/quantization/mod.rs b/src/query/storages/common/index/src/hnsw_index/quantization/mod.rs new file mode 100644 index 0000000000000..eca88c10d520e --- /dev/null +++ b/src/query/storages/common/index/src/hnsw_index/quantization/mod.rs @@ -0,0 +1,24 @@ +// Copyright Qdrant +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +pub mod encoded_storage; +pub mod encoded_vectors; +pub mod encoded_vectors_u8; +pub mod quantile; + +pub use encoded_vectors::DistanceType; +pub use encoded_vectors::VectorParameters; +pub use encoded_vectors_u8::EncodedQueryU8; +pub use encoded_vectors_u8::EncodedVectorsU8; diff --git a/src/query/storages/common/index/src/hnsw_index/quantization/quantile.rs b/src/query/storages/common/index/src/hnsw_index/quantization/quantile.rs new file mode 100644 index 0000000000000..fca620aec2b11 --- /dev/null +++ b/src/query/storages/common/index/src/hnsw_index/quantization/quantile.rs @@ -0,0 +1,97 @@ +// Copyright Qdrant +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use feistel_permutation_rs::DefaultBuildHasher; +use feistel_permutation_rs::Permutation; +use rand::rngs::SmallRng; +use rand::Rng; +use rand::SeedableRng; + +pub const QUANTILE_SAMPLE_SIZE: usize = 100_000; + +pub(crate) fn find_min_max_from_iter<'a>( + iter: impl Iterator + 'a> + Clone, +) -> (f32, f32) { + iter.fold((f32::MAX, f32::MIN), |(mut min, mut max), vector| { + for &value in vector.as_ref() { + if value < min { + min = value; + } + if value > max { + max = value; + } + } + (min, max) + }) +} + +pub(crate) fn find_quantile_interval<'a>( + vector_data: impl Iterator + 'a> + Clone, + dim: usize, + count: usize, + quantile: f32, +) -> Option<(f32, f32)> { + if count < 127 || quantile >= 1.0 { + return None; + } + + let slice_size = std::cmp::min(count, QUANTILE_SAMPLE_SIZE); + let mut rng = SmallRng::from_entropy(); + let seed: u64 = rng.gen(); + let permutor = Permutation::new(count as u64, seed, DefaultBuildHasher::new()); + let mut selected_vectors: Vec = permutor + .iter() + .map(|i| i as usize) + .take(slice_size) + .collect(); + + selected_vectors.sort_unstable(); + + let mut data_slice = Vec::with_capacity(slice_size * dim); + let mut selected_index: usize = 0; + for (vector_index, vector_data) in vector_data.into_iter().enumerate() { + if vector_index == selected_vectors[selected_index] { + data_slice.extend_from_slice(vector_data.as_ref()); + selected_index += 1; + if selected_index == slice_size { + break; + } + } + } + + let data_slice_len = data_slice.len(); + if data_slice_len < 4 { + return None; + } + + let cut_index = std::cmp::min( + (data_slice_len - 1) / 2, + (slice_size as f32 * (1.0 - quantile) / 2.0) as usize, + ); + let cut_index = std::cmp::max(cut_index, 1); + let comparator = |a: &f32, b: &f32| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal); + let (selected_values, _, _) = + data_slice.select_nth_unstable_by(data_slice_len - cut_index, comparator); + let (_, _, selected_values) = selected_values.select_nth_unstable_by(cut_index, comparator); + + if selected_values.len() < 2 { + return None; + } + + let selected_values = [selected_values]; + Some(find_min_max_from_iter( + selected_values.iter().map(|v| &v[..]), + )) +} diff --git a/src/query/storages/common/index/src/hnsw_index/search_context.rs b/src/query/storages/common/index/src/hnsw_index/search_context.rs new file mode 100644 index 0000000000000..b78637ef3ea47 --- /dev/null +++ b/src/query/storages/common/index/src/hnsw_index/search_context.rs @@ -0,0 +1,61 @@ +// Copyright Qdrant +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::BinaryHeap; +use std::iter::FromIterator; + +use num_traits::float::FloatCore; + +use crate::hnsw_index::common::fixed_length_priority_queue::FixedLengthPriorityQueue; +use crate::hnsw_index::common::types::ScoreType; +use crate::hnsw_index::common::types::ScoredPointOffset; + +/// Structure that holds context of the search +pub struct SearchContext { + /// Overall nearest points found so far + pub nearest: FixedLengthPriorityQueue, + /// Current candidates to process + pub candidates: BinaryHeap, +} + +impl SearchContext { + pub fn new(entry_point: ScoredPointOffset, ef: usize) -> Self { + let mut nearest = FixedLengthPriorityQueue::new(ef); + nearest.push(entry_point); + SearchContext { + nearest, + candidates: BinaryHeap::from_iter([entry_point]), + } + } + + pub fn lower_bound(&self) -> ScoreType { + match self.nearest.top() { + None => ScoreType::min_value(), + Some(worst_of_the_best) => worst_of_the_best.score, + } + } + + /// Updates search context with new scored point. + /// If it is closer than existing - also add it to candidates for further search + pub fn process_candidate(&mut self, score_point: ScoredPointOffset) { + let was_added = match self.nearest.push(score_point) { + None => true, + Some(removed) => removed.idx != score_point.idx, + }; + if was_added { + self.candidates.push(score_point); + } + } +} diff --git a/src/query/storages/common/index/src/hnsw_index/visited_pool.rs b/src/query/storages/common/index/src/hnsw_index/visited_pool.rs new file mode 100644 index 0000000000000..57b5dd9097fba --- /dev/null +++ b/src/query/storages/common/index/src/hnsw_index/visited_pool.rs @@ -0,0 +1,180 @@ +// Copyright Qdrant +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Structures for fast and tread-safe way to check if some points were visited or not + +use std::sync::LazyLock; + +use parking_lot::RwLock; + +use crate::hnsw_index::common::types::PointOffsetType; + +/// Max number of pooled elements to preserve in memory. +/// Scaled according to the number of logical CPU cores to account for concurrent operations. +pub static POOL_KEEP_LIMIT: LazyLock = LazyLock::new(|| num_cpus::get().clamp(16, 128)); + +/// Visited list handle is an owner of the `VisitedList`, which is returned by `VisitedPool` and returned back to it +#[derive(Debug)] +pub struct VisitedListHandle<'a> { + pool: &'a VisitedPool, + visited_list: VisitedList, +} + +/// Visited list reuses same memory to keep track of visited points ids among multiple consequent queries +/// +/// It stores the sequence number of last processed operation next to the point ID, which allows to avoid memory allocation +/// and reuse same counter for multiple queries. +#[derive(Debug)] +struct VisitedList { + current_iter: u8, + visit_counters: Vec, +} + +impl Default for VisitedList { + fn default() -> Self { + VisitedList { + current_iter: 1, + visit_counters: vec![], + } + } +} + +impl VisitedList { + fn new(num_points: usize) -> Self { + VisitedList { + current_iter: 1, + visit_counters: vec![0; num_points], + } + } +} + +impl Drop for VisitedListHandle<'_> { + fn drop(&mut self) { + self.pool + .return_back(std::mem::take(&mut self.visited_list)); + } +} + +impl<'a> VisitedListHandle<'a> { + fn new(pool: &'a VisitedPool, data: VisitedList) -> Self { + VisitedListHandle { + pool, + visited_list: data, + } + } + + /// Return `true` if visited + pub fn check(&self, point_id: PointOffsetType) -> bool { + self.visited_list + .visit_counters + .get(point_id as usize) + .is_some_and(|x| *x == self.visited_list.current_iter) + } + + /// Updates visited list + /// return `true` if point was visited before + pub fn check_and_update_visited(&mut self, point_id: PointOffsetType) -> bool { + let idx = point_id as usize; + if idx >= self.visited_list.visit_counters.len() { + self.visited_list.visit_counters.resize(idx + 1, 0); + } + std::mem::replace( + &mut self.visited_list.visit_counters[idx], + self.visited_list.current_iter, + ) == self.visited_list.current_iter + } + + pub fn next_iteration(&mut self) { + self.visited_list.current_iter = self.visited_list.current_iter.wrapping_add(1); + if self.visited_list.current_iter == 0 { + self.visited_list.current_iter = 1; + self.visited_list.visit_counters.fill(0); + } + } + + fn resize(&mut self, num_points: usize) { + // `self.current_iter` is never 0, so it's safe to use 0 as a default + // value. + self.visited_list.visit_counters.resize(num_points, 0); + } +} + +/// Keeps a list of `VisitedList` which could be requested and released from multiple threads +/// +/// If there are more requests than lists - creates a new list, but only keeps max defined amount. +#[derive(Debug)] +pub struct VisitedPool { + pool: RwLock>, +} + +impl VisitedPool { + pub fn new() -> Self { + VisitedPool { + pool: RwLock::new(Vec::with_capacity(*POOL_KEEP_LIMIT)), + } + } + + pub fn get(&self, num_points: usize) -> VisitedListHandle { + // If there are more concurrent requests, a new temporary list is created dynamically. + // This limit is implemented to prevent memory leakage. + match self.pool.write().pop() { + None => VisitedListHandle::new(self, VisitedList::new(num_points)), + Some(data) => { + let mut visited_list = VisitedListHandle::new(self, data); + visited_list.resize(num_points); + visited_list.next_iteration(); + visited_list + } + } + } + + fn return_back(&self, data: VisitedList) { + let mut pool = self.pool.write(); + if pool.len() < *POOL_KEEP_LIMIT { + pool.push(data); + } + } +} + +impl Default for VisitedPool { + fn default() -> Self { + VisitedPool::new() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_visited_list() { + let pool = VisitedPool::new(); + let mut visited_list = pool.get(10); + + for _ in 0..2 { + assert!(!visited_list.check(0)); + assert!(!visited_list.check_and_update_visited(0)); + assert!(visited_list.check(0)); + + assert!(visited_list.check_and_update_visited(0)); + assert!(visited_list.check(0)); + + for _ in 0..(u8::MAX as usize * 2 + 10) { + visited_list.next_iteration(); + assert!(!visited_list.check(0)); + } + } + } +} diff --git a/src/query/storages/common/index/src/lib.rs b/src/query/storages/common/index/src/lib.rs index 50f57df6908f8..cd6cbb69a581f 100644 --- a/src/query/storages/common/index/src/lib.rs +++ b/src/query/storages/common/index/src/lib.rs @@ -19,6 +19,7 @@ mod bloom_index; mod eliminate_cast; pub mod filters; +mod hnsw_index; mod index; mod inverted_index; mod page_index; @@ -31,6 +32,12 @@ pub use bloom_index::BloomIndexResult; pub use bloom_index::FilterEvalResult; pub use bloom_index::NgramArgs; pub use eliminate_cast::eliminate_cast; +pub use hnsw_index::DistanceType; +pub use hnsw_index::FixedLengthPriorityQueue; +pub use hnsw_index::HNSWIndex; +pub use hnsw_index::ScoredPointOffset; +pub use hnsw_index::VectorIndexFile; +pub use hnsw_index::VectorIndexMeta; pub use index::Index; pub use inverted_index::extract_component_fields; pub use inverted_index::extract_fsts; diff --git a/src/query/storages/common/pruner/src/block_meta.rs b/src/query/storages/common/pruner/src/block_meta.rs index b95280edf1284..febc97e5bdaa4 100644 --- a/src/query/storages/common/pruner/src/block_meta.rs +++ b/src/query/storages/common/pruner/src/block_meta.rs @@ -43,6 +43,8 @@ pub struct BlockMetaIndex { pub snapshot_location: Option, // The search matched rows and optional scores in the block. pub matched_rows: Option)>>, + // The vector topn rows and scores in the block. + pub vector_scores: Option>, // The optional meta of virtual columns. pub virtual_block_meta: Option, } diff --git a/src/query/storages/common/pruner/src/lib.rs b/src/query/storages/common/pruner/src/lib.rs index 9ada3141465aa..4a8414caf935b 100644 --- a/src/query/storages/common/pruner/src/lib.rs +++ b/src/query/storages/common/pruner/src/lib.rs @@ -32,4 +32,4 @@ pub use page_pruner::PagePruner; pub use page_pruner::PagePrunerCreator; pub use range_pruner::RangePruner; pub use range_pruner::RangePrunerCreator; -pub use topn_pruner::TopNPrunner; +pub use topn_pruner::TopNPruner; diff --git a/src/query/storages/common/pruner/src/topn_pruner.rs b/src/query/storages/common/pruner/src/topn_pruner.rs index 93cf9cdb075bc..fcba2ebe7e344 100644 --- a/src/query/storages/common/pruner/src/topn_pruner.rs +++ b/src/query/storages/common/pruner/src/topn_pruner.rs @@ -26,13 +26,13 @@ use crate::BlockMetaIndex; /// TopN pruner. /// Pruning for order by x limit N. #[derive(Clone)] -pub struct TopNPrunner { +pub struct TopNPruner { schema: TableSchemaRef, sort: Vec<(RemoteExpr, bool, bool)>, limit: usize, } -impl TopNPrunner { +impl TopNPruner { pub fn create( schema: TableSchemaRef, sort: Vec<(RemoteExpr, bool, bool)>, @@ -46,7 +46,7 @@ impl TopNPrunner { } } -impl TopNPrunner { +impl TopNPruner { pub fn prune( &self, metas: Vec<(BlockMetaIndex, Arc)>, diff --git a/src/query/storages/common/table_meta/src/meta/v2/segment.rs b/src/query/storages/common/table_meta/src/meta/v2/segment.rs index bd31c3bf80396..47c2127145d60 100644 --- a/src/query/storages/common/table_meta/src/meta/v2/segment.rs +++ b/src/query/storages/common/table_meta/src/meta/v2/segment.rs @@ -170,6 +170,8 @@ pub struct BlockMeta { pub bloom_filter_index_size: u64, pub inverted_index_size: Option, pub ngram_filter_index_size: Option, + pub vector_index_size: Option, + pub vector_index_location: Option, /// The block meta of virtual columns. pub virtual_block_meta: Option, pub compression: Compression, @@ -192,6 +194,8 @@ impl BlockMeta { bloom_filter_index_size: u64, inverted_index_size: Option, ngram_filter_index_size: Option, + vector_index_size: Option, + vector_index_location: Option, virtual_block_meta: Option, compression: Compression, create_on: Option>, @@ -208,6 +212,8 @@ impl BlockMeta { bloom_filter_index_size, inverted_index_size, ngram_filter_index_size, + vector_index_size, + vector_index_location, virtual_block_meta, compression, create_on, @@ -368,6 +374,8 @@ impl BlockMeta { bloom_filter_index_size: 0, compression: Compression::Lz4, inverted_index_size: None, + vector_index_size: None, + vector_index_location: None, virtual_block_meta: None, create_on: None, ngram_filter_index_size: None, @@ -394,6 +402,8 @@ impl BlockMeta { bloom_filter_index_size: s.bloom_filter_index_size, compression: s.compression, inverted_index_size: None, + vector_index_size: None, + vector_index_location: None, virtual_block_meta: None, create_on: None, ngram_filter_index_size: None, diff --git a/src/query/storages/common/table_meta/src/meta/v3/frozen/block_meta.rs b/src/query/storages/common/table_meta/src/meta/v3/frozen/block_meta.rs index 41207315d2d19..ccc22fdbdb600 100644 --- a/src/query/storages/common/table_meta/src/meta/v3/frozen/block_meta.rs +++ b/src/query/storages/common/table_meta/src/meta/v3/frozen/block_meta.rs @@ -63,6 +63,8 @@ impl From for crate::meta::BlockMeta { bloom_filter_index_size: value.bloom_filter_index_size, inverted_index_size: None, ngram_filter_index_size: None, + vector_index_size: None, + vector_index_location: None, virtual_block_meta: None, compression: value.compression.into(), create_on: None, diff --git a/src/query/storages/fuse/src/constants.rs b/src/query/storages/fuse/src/constants.rs index 890153cb5f15b..f16b4975939ad 100644 --- a/src/query/storages/fuse/src/constants.rs +++ b/src/query/storages/fuse/src/constants.rs @@ -36,6 +36,7 @@ pub const FUSE_TBL_LAST_SNAPSHOT_HINT_V2: &str = "last_snapshot_location_hint_v2 pub const FUSE_TBL_VIRTUAL_BLOCK_PREFIX: &str = "_vb"; pub const FUSE_TBL_AGG_INDEX_PREFIX: &str = "_i_a"; pub const FUSE_TBL_INVERTED_INDEX_PREFIX: &str = "_i_i"; +pub const FUSE_TBL_VECTOR_INDEX_PREFIX: &str = "_i_v"; pub const DEFAULT_ROW_PER_PAGE: usize = 8192; pub const DEFAULT_ROW_PER_INDEX: usize = 100000; diff --git a/src/query/storages/fuse/src/fuse_table.rs b/src/query/storages/fuse/src/fuse_table.rs index 96c17992f4141..f832b97be638c 100644 --- a/src/query/storages/fuse/src/fuse_table.rs +++ b/src/query/storages/fuse/src/fuse_table.rs @@ -54,7 +54,7 @@ use databend_common_expression::TableSchema; use databend_common_expression::ORIGIN_BLOCK_ID_COL_NAME; use databend_common_expression::ORIGIN_BLOCK_ROW_NUM_COL_NAME; use databend_common_expression::ORIGIN_VERSION_COL_NAME; -use databend_common_expression::SEARCH_SCORE_COLUMN_ID; +use databend_common_expression::VECTOR_SCORE_COLUMN_ID; use databend_common_io::constants::DEFAULT_BLOCK_BUFFER_SIZE; use databend_common_io::constants::DEFAULT_BLOCK_COMPRESSED_SIZE; use databend_common_io::constants::DEFAULT_BLOCK_PER_SEGMENT; @@ -766,7 +766,7 @@ impl Table for FuseTable { } fn supported_internal_column(&self, column_id: ColumnId) -> bool { - column_id >= SEARCH_SCORE_COLUMN_ID + column_id >= VECTOR_SCORE_COLUMN_ID } fn support_column_projection(&self) -> bool { diff --git a/src/query/storages/fuse/src/io/locations.rs b/src/query/storages/fuse/src/io/locations.rs index faf266d9016fd..13272ee77706c 100644 --- a/src/query/storages/fuse/src/io/locations.rs +++ b/src/query/storages/fuse/src/io/locations.rs @@ -38,6 +38,7 @@ use crate::index::InvertedIndexFile; use crate::FUSE_TBL_AGG_INDEX_PREFIX; use crate::FUSE_TBL_INVERTED_INDEX_PREFIX; use crate::FUSE_TBL_LAST_SNAPSHOT_HINT_V2; +use crate::FUSE_TBL_VECTOR_INDEX_PREFIX; use crate::FUSE_TBL_XOR_BLOOM_INDEX_PREFIX; static SNAPSHOT_V0: SnapshotVersion = SnapshotVersion::V0(PhantomData); static SNAPSHOT_V1: SnapshotVersion = SnapshotVersion::V1(PhantomData); @@ -63,6 +64,7 @@ pub struct TableMetaLocationGenerator { snapshot_location_prefix: String, agg_index_location_prefix: String, inverted_index_location_prefix: String, + vector_index_location_prefix: String, } impl TableMetaLocationGenerator { @@ -75,6 +77,7 @@ impl TableMetaLocationGenerator { let agg_index_location_prefix = format!("{}/{}/", &prefix, FUSE_TBL_AGG_INDEX_PREFIX); let inverted_index_location_prefix = format!("{}/{}/", &prefix, FUSE_TBL_INVERTED_INDEX_PREFIX); + let vector_index_location_prefix = format!("{}/{}/", &prefix, FUSE_TBL_VECTOR_INDEX_PREFIX); Self { prefix, block_location_prefix, @@ -83,6 +86,7 @@ impl TableMetaLocationGenerator { snapshot_location_prefix, agg_index_location_prefix, inverted_index_location_prefix, + vector_index_location_prefix, } } @@ -98,6 +102,10 @@ impl TableMetaLocationGenerator { &self.bloom_index_location_prefix } + pub fn block_vector_index_prefix(&self) -> &str { + &self.vector_index_location_prefix + } + pub fn segment_location_prefix(&self) -> &str { &self.segment_info_location_prefix } @@ -134,6 +142,19 @@ impl TableMetaLocationGenerator { ) } + pub fn block_vector_index_location(&self) -> Location { + let uuid = Uuid::now_v7(); + ( + format!( + "{}{}_v{}.parquet", + self.block_vector_index_prefix(), + uuid.as_simple(), + BlockFilter::VERSION, + ), + BlockFilter::VERSION, + ) + } + pub fn gen_segment_info_location( &self, table_meta_timestamps: TableMetaTimestamps, diff --git a/src/query/storages/fuse/src/io/mod.rs b/src/query/storages/fuse/src/io/mod.rs index 63b43a9ff785f..94d22a40bbb90 100644 --- a/src/query/storages/fuse/src/io/mod.rs +++ b/src/query/storages/fuse/src/io/mod.rs @@ -52,6 +52,7 @@ pub use write::InvertedIndexWriter; pub use write::MetaWriter; pub(crate) use write::StreamBlockBuilder; pub(crate) use write::StreamBlockProperties; +pub use write::VectorIndexBuilder; pub use write::VirtualColumnBuilder; pub use write::WriteSettings; pub use write::MAX_BLOCK_UNCOMPRESSED_SIZE; diff --git a/src/query/storages/fuse/src/io/read/meta/meta_readers.rs b/src/query/storages/fuse/src/io/read/meta/meta_readers.rs index f0ce5c5c789db..a07e089a42cca 100644 --- a/src/query/storages/fuse/src/io/read/meta/meta_readers.rs +++ b/src/query/storages/fuse/src/io/read/meta/meta_readers.rs @@ -27,6 +27,7 @@ use databend_storages_common_cache::LoadParams; use databend_storages_common_cache::Loader; use databend_storages_common_index::BloomIndexMeta; use databend_storages_common_index::InvertedIndexMeta; +use databend_storages_common_index::VectorIndexMeta; use databend_storages_common_table_meta::meta::CompactSegmentInfo; use databend_storages_common_table_meta::meta::SegmentInfoVersion; use databend_storages_common_table_meta::meta::SingleColumnMeta; @@ -51,6 +52,7 @@ pub type TableSnapshotReader = InMemoryCacheReader>; pub type InvertedIndexMetaReader = InMemoryCacheReader>; +pub type VectorIndexMetaReader = InMemoryCacheReader>; pub struct MetaReaders; @@ -100,6 +102,13 @@ impl MetaReaders { LoaderWrapper(dal), ) } + + pub fn vector_index_meta_reader(dal: Operator) -> VectorIndexMetaReader { + VectorIndexMetaReader::new( + CacheManager::instance().get_vector_index_meta_cache(), + LoaderWrapper(dal), + ) + } } // workaround for the orphan rules @@ -254,6 +263,24 @@ impl Loader for LoaderWrapper { } } +#[async_trait::async_trait] +impl Loader for LoaderWrapper { + #[async_backtrace::framed] + async fn load(&self, params: &LoadParams) -> Result { + // read the ThriftFileMetaData, omit unnecessary conversions + let meta = read_thrift_file_metadata(self.0.clone(), ¶ms.location, params.len_hint) + .await + .map_err(|err| { + ErrorCode::StorageOther(format!( + "read file meta failed, {}, {:?}", + params.location, err + )) + })?; + + VectorIndexMeta::try_from(meta) + } +} + pub async fn bytes_reader(op: &Operator, path: &str, len_hint: Option) -> Result { let reader = if let Some(len) = len_hint { op.read_with(path).range(0..len).await? diff --git a/src/query/storages/fuse/src/io/read/mod.rs b/src/query/storages/fuse/src/io/read/mod.rs index 4dbac4c46e853..391f23e33c6e9 100644 --- a/src/query/storages/fuse/src/io/read/mod.rs +++ b/src/query/storages/fuse/src/io/read/mod.rs @@ -20,6 +20,7 @@ pub mod meta; mod segment_reader; mod snapshot_history_reader; mod utils; +mod vector_index; mod virtual_column; pub use agg_index::AggIndexReader; @@ -38,5 +39,6 @@ pub use segment_reader::RowOrientedSegmentReader; pub use segment_reader::SegmentReader; pub use snapshot_history_reader::SnapshotHistoryReader; pub use utils::build_columns_meta; +pub use vector_index::VectorIndexReader; pub use virtual_column::VirtualBlockReadResult; pub use virtual_column::VirtualColumnReader; diff --git a/src/query/storages/fuse/src/io/read/vector_index/mod.rs b/src/query/storages/fuse/src/io/read/vector_index/mod.rs new file mode 100644 index 0000000000000..f62d5686f187c --- /dev/null +++ b/src/query/storages/fuse/src/io/read/vector_index/mod.rs @@ -0,0 +1,18 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod vector_index_loader; +mod vector_index_reader; + +pub use vector_index_reader::VectorIndexReader; diff --git a/src/query/storages/fuse/src/io/read/vector_index/vector_index_loader.rs b/src/query/storages/fuse/src/io/read/vector_index/vector_index_loader.rs new file mode 100644 index 0000000000000..53a220b622c80 --- /dev/null +++ b/src/query/storages/fuse/src/io/read/vector_index/vector_index_loader.rs @@ -0,0 +1,224 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; +use std::future::Future; +use std::sync::Arc; +use std::time::Instant; + +use arrow::datatypes::Field; +use arrow::datatypes::Fields; +use arrow::datatypes::Schema; +use bytes::Bytes; +use databend_common_base::runtime::GlobalIORuntime; +use databend_common_base::runtime::Runtime; +use databend_common_base::runtime::TrySpawn; +use databend_common_exception::ErrorCode; +use databend_common_exception::Result; +use databend_common_expression::Column; +use databend_common_metrics::storage::metrics_inc_block_vector_index_read_bytes; +use databend_common_metrics::storage::metrics_inc_block_vector_index_read_milliseconds; +use databend_storages_common_cache::CacheAccessor; +use databend_storages_common_cache::CacheManager; +use databend_storages_common_cache::LoadParams; +use databend_storages_common_index::VectorIndexMeta; +use databend_storages_common_io::MergeIOReader; +use databend_storages_common_io::ReadSettings; +use databend_storages_common_table_meta::table::TableCompression; +use opendal::Operator; +use parquet::arrow::arrow_reader::ParquetRecordBatchReader; +use parquet::arrow::parquet_to_arrow_field_levels; +use parquet::arrow::ArrowSchemaConverter; +use parquet::arrow::ProjectionMask; + +use crate::index::VectorIndexFile; +use crate::io::read::block::parquet::RowGroupImplBuilder; +use crate::io::MetaReaders; + +#[async_trait::async_trait] +trait InRuntime +where Self: Future +{ + async fn execute_in_runtime(self, runtime: &Runtime) -> Result; +} + +#[async_trait::async_trait] +impl InRuntime for T +where + T: Future + Send + 'static, + T::Output: Send + 'static, +{ + #[async_backtrace::framed] + async fn execute_in_runtime(self, runtime: &Runtime) -> Result { + runtime + .try_spawn(self, None)? + .await + .map_err(|e| ErrorCode::TokioError(format!("runtime join error. {}", e))) + } +} + +/// Loads vector index meta data +/// read data from cache, or populate cache items if possible +#[fastrace::trace] +pub(crate) async fn load_vector_index_meta( + dal: Operator, + path: &str, +) -> Result> { + let path_owned = path.to_owned(); + async move { + let reader = MetaReaders::vector_index_meta_reader(dal); + let version = 0; + + let load_params = LoadParams { + location: path_owned, + len_hint: None, + ver: version, + put_cache: true, + }; + + reader.read(&load_params).await + } + .execute_in_runtime(&GlobalIORuntime::instance()) + .await? +} + +/// load index column data +#[fastrace::trace] +pub(crate) async fn load_vector_index_files<'a>( + operator: Operator, + settings: &ReadSettings, + column_names: &'a [String], + location: &'a str, +) -> Result> { + let start = Instant::now(); + + // 1. load index meta + let vector_index_meta = load_vector_index_meta(operator.clone(), location).await?; + + // 2. build index schema + let vector_index_fields: Vec<_> = vector_index_meta + .columns + .iter() + .map(|col| Field::new(col.0.clone(), arrow::datatypes::DataType::Binary, false)) + .collect(); + let vector_index_schema = Schema::new(Fields::from(vector_index_fields)); + + let vector_index_schema_desc = + Arc::new(ArrowSchemaConverter::new().convert(&vector_index_schema)?); + + // 3. collect column metas that needed to build vector index + let vector_column_chunk_metas = &vector_index_meta.columns; + + let mut column_indices = Vec::with_capacity(column_names.len()); + for column_name in column_names { + let column_index = vector_index_schema.index_of(column_name)?; + column_indices.push(column_index); + } + + let projected_column_metas: Vec<_> = vector_column_chunk_metas + .iter() + .enumerate() + .filter(|(i, _)| column_indices.contains(i)) + .map(|(_, meta)| meta) + .collect(); + + // 4. read column data, first try to read from cache, + // if not exists, fetch from object storage + let mut ranges = Vec::new(); + let mut names_map = HashMap::new(); + let mut column_data = HashMap::new(); + let vector_index_file_cache = CacheManager::instance().get_vector_index_file_cache(); + for (i, (name, col_meta)) in column_indices + .iter() + .zip(projected_column_metas.into_iter()) + { + let cache_key = cache_key_of_column(location, name); + if let Some(cache_file) = vector_index_file_cache.get_sized(&cache_key, col_meta.len) { + column_data.insert(*i, cache_file); + continue; + } + + // if cache missed, prepare the ranges to be read + let col_range = col_meta.offset..(col_meta.offset + col_meta.len); + + ranges.push((*i as u32, col_range)); + names_map.insert(*i as u32, (name, cache_key)); + } + + if !ranges.is_empty() { + let merge_io_result = + MergeIOReader::merge_io_read(settings, operator.clone(), location, &ranges).await?; + + // merge column data fetched from object storage + for (i, (chunk_idx, range)) in &merge_io_result.columns_chunk_offsets { + let chunk = merge_io_result + .owner_memory + .get_chunk(*chunk_idx, &merge_io_result.block_path)?; + let data = chunk.slice(range.clone()); + + let (name, cache_key) = names_map.remove(i).unwrap(); + let file = VectorIndexFile::create(name.clone(), data.into()); + + // add index file to cache + vector_index_file_cache.insert(cache_key, file.clone()); + column_data.insert(*i as usize, Arc::new(file)); + } + } + + // 5. deserialize raw data to vector index data + let mut builder = + RowGroupImplBuilder::new(1, &vector_index_schema_desc, TableCompression::Zstd.into()); + + for (i, column_data) in column_data { + builder.add_column_chunk(i, Bytes::copy_from_slice(&column_data.data)); + } + let row_group = Box::new(builder.build()); + let field_levels = parquet_to_arrow_field_levels( + vector_index_schema_desc.as_ref(), + ProjectionMask::leaves(&vector_index_schema_desc, column_indices), + None, + )?; + let mut record_reader = ParquetRecordBatchReader::try_new_with_row_groups( + &field_levels, + row_group.as_ref(), + 1, + None, + )?; + let record = record_reader.next().unwrap()?; + assert!(record_reader.next().is_none()); + + let mut vector_bytes_len = 0; + let mut vector_columns = Vec::with_capacity(4); + for i in 0..record.num_columns() { + let vector_binary = record.column(i).clone(); + let column = Column::from_arrow_rs( + vector_binary, + &databend_common_expression::types::DataType::Binary, + )?; + vector_bytes_len += column.memory_size(); + vector_columns.push(column); + } + + // Perf. + { + metrics_inc_block_vector_index_read_bytes(vector_bytes_len as u64); + metrics_inc_block_vector_index_read_milliseconds(start.elapsed().as_millis() as u64); + } + + Ok(vector_columns) +} + +fn cache_key_of_column(index_path: &str, index_column_name: &str) -> String { + format!("{index_path}-{index_column_name}") +} diff --git a/src/query/storages/fuse/src/io/read/vector_index/vector_index_reader.rs b/src/query/storages/fuse/src/io/read/vector_index/vector_index_reader.rs new file mode 100644 index 0000000000000..bfb084787a126 --- /dev/null +++ b/src/query/storages/fuse/src/io/read/vector_index/vector_index_reader.rs @@ -0,0 +1,112 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::time::Instant; + +use databend_common_exception::Result; +use databend_common_metrics::storage::metrics_inc_block_vector_index_search_milliseconds; +use databend_storages_common_index::DistanceType; +use databend_storages_common_index::HNSWIndex; +use databend_storages_common_index::ScoredPointOffset; +use databend_storages_common_io::ReadSettings; +use opendal::Operator; + +use crate::io::read::vector_index::vector_index_loader::load_vector_index_files; + +#[derive(Clone)] +pub struct VectorIndexReader { + operator: Operator, + settings: ReadSettings, + dim: usize, + distance_type: DistanceType, + columns: Vec, + query_values: Vec, +} + +impl VectorIndexReader { + pub fn create( + operator: Operator, + settings: ReadSettings, + distance_type: DistanceType, + columns: Vec, + query_values: Vec, + ) -> Self { + let dim = query_values.len(); + let processed_query_values = HNSWIndex::preprocess_query(distance_type, query_values); + + Self { + operator, + settings, + dim, + distance_type, + columns, + query_values: processed_query_values, + } + } + + pub async fn prune( + &self, + limit: usize, + row_count: usize, + location: &str, + ) -> Result> { + let start = Instant::now(); + + let binary_columns = load_vector_index_files( + self.operator.clone(), + &self.settings, + &self.columns, + location, + ) + .await?; + + let hnsw_index = HNSWIndex::open(self.distance_type, self.dim, row_count, binary_columns)?; + + let res = hnsw_index.search(limit, &self.query_values)?; + + // Perf. + { + metrics_inc_block_vector_index_search_milliseconds(start.elapsed().as_millis() as u64); + } + + Ok(res) + } + + pub async fn generate_scores( + &self, + row_count: usize, + location: &str, + ) -> Result> { + let start = Instant::now(); + + let binary_columns = load_vector_index_files( + self.operator.clone(), + &self.settings, + &self.columns, + location, + ) + .await?; + + let hnsw_index = HNSWIndex::open(self.distance_type, self.dim, row_count, binary_columns)?; + + let res = hnsw_index.generate_scores(row_count as u32, &self.query_values)?; + + // Perf. + { + metrics_inc_block_vector_index_search_milliseconds(start.elapsed().as_millis() as u64); + } + + Ok(res) + } +} diff --git a/src/query/storages/fuse/src/io/write/block_writer.rs b/src/query/storages/fuse/src/io/write/block_writer.rs index c6593a248c8af..1f1b6a5579e25 100644 --- a/src/query/storages/fuse/src/io/write/block_writer.rs +++ b/src/query/storages/fuse/src/io/write/block_writer.rs @@ -32,6 +32,9 @@ use databend_common_metrics::storage::metrics_inc_block_index_write_nums; use databend_common_metrics::storage::metrics_inc_block_inverted_index_write_bytes; use databend_common_metrics::storage::metrics_inc_block_inverted_index_write_milliseconds; use databend_common_metrics::storage::metrics_inc_block_inverted_index_write_nums; +use databend_common_metrics::storage::metrics_inc_block_vector_index_write_bytes; +use databend_common_metrics::storage::metrics_inc_block_vector_index_write_milliseconds; +use databend_common_metrics::storage::metrics_inc_block_vector_index_write_nums; use databend_common_metrics::storage::metrics_inc_block_virtual_column_write_bytes; use databend_common_metrics::storage::metrics_inc_block_virtual_column_write_milliseconds; use databend_common_metrics::storage::metrics_inc_block_virtual_column_write_nums; @@ -52,6 +55,8 @@ use crate::io::write::virtual_column_builder::VirtualColumnBuilder; use crate::io::write::virtual_column_builder::VirtualColumnState; use crate::io::write::InvertedIndexBuilder; use crate::io::write::InvertedIndexState; +use crate::io::write::VectorIndexBuilder; +use crate::io::write::VectorIndexState; use crate::io::write::WriteSettings; use crate::io::BloomIndexState; use crate::io::TableMetaLocationGenerator; @@ -130,6 +135,7 @@ pub struct BlockSerialization { pub bloom_index_state: Option, pub inverted_index_states: Vec, pub virtual_column_state: Option, + pub vector_index_state: Option, } #[derive(Clone)] @@ -143,6 +149,7 @@ pub struct BlockBuilder { pub ngram_args: Vec, pub inverted_index_builders: Vec, pub virtual_column_builder: Option, + pub vector_index_builder: Option, pub table_meta_timestamps: TableMetaTimestamps, } @@ -177,6 +184,15 @@ impl BlockBuilder { )?; inverted_index_states.push(inverted_index_state); } + let vector_index_state = if let Some(ref vector_index_builder) = self.vector_index_builder { + let vector_index_location = self.meta_locations.block_vector_index_location(); + let mut vector_index_builder = vector_index_builder.clone(); + vector_index_builder.add_block(&data_block)?; + let vector_index_state = vector_index_builder.finalize(&vector_index_location)?; + Some(vector_index_state) + } else { + None + }; let virtual_column_state = if let Some(ref virtual_column_builder) = self.virtual_column_builder { @@ -225,6 +241,8 @@ impl BlockBuilder { .as_ref() .map(|v| v.ngram_size) .unwrap_or_default(), + vector_index_size: vector_index_state.as_ref().map(|v| v.size), + vector_index_location: vector_index_state.as_ref().map(|v| v.location.clone()), compression: self.write_settings.table_compression.into(), inverted_index_size, virtual_block_meta: None, @@ -237,6 +255,7 @@ impl BlockBuilder { bloom_index_state, inverted_index_states, virtual_column_state, + vector_index_state, }; Ok(serialized) } @@ -268,6 +287,7 @@ impl BlockWriter { Self::write_down_data_block(dal, serialized.block_raw_data, &block_meta.location.0).await?; Self::write_down_bloom_index_state(dal, serialized.bloom_index_state).await?; + Self::write_down_vector_index_state(dal, serialized.vector_index_state).await?; Self::write_down_inverted_index_state(dal, serialized.inverted_index_states).await?; Self::write_down_virtual_column_state(dal, serialized.virtual_column_state).await?; @@ -308,6 +328,24 @@ impl BlockWriter { Ok(()) } + pub async fn write_down_vector_index_state( + dal: &Operator, + vector_index_state: Option, + ) -> Result<()> { + if let Some(vector_index_state) = vector_index_state { + let start = Instant::now(); + + let location = &vector_index_state.location.0; + let index_size = vector_index_state.size; + write_data(vector_index_state.data, dal, location).await?; + + metrics_inc_block_vector_index_write_nums(1); + metrics_inc_block_vector_index_write_bytes(index_size); + metrics_inc_block_vector_index_write_milliseconds(start.elapsed().as_millis() as u64); + } + Ok(()) + } + pub async fn write_down_inverted_index_state( dal: &Operator, inverted_index_states: Vec, diff --git a/src/query/storages/fuse/src/io/write/mod.rs b/src/query/storages/fuse/src/io/write/mod.rs index b0af3633055dc..e7f3bfbe82c2f 100644 --- a/src/query/storages/fuse/src/io/write/mod.rs +++ b/src/query/storages/fuse/src/io/write/mod.rs @@ -17,6 +17,7 @@ mod bloom_index_writer; mod inverted_index_writer; mod meta_writer; mod stream; +mod vector_index_writer; mod virtual_column_builder; mod write_settings; @@ -37,6 +38,8 @@ pub use meta_writer::CachedMetaWriter; pub use meta_writer::MetaWriter; pub(crate) use stream::StreamBlockBuilder; pub(crate) use stream::StreamBlockProperties; +pub use vector_index_writer::VectorIndexBuilder; +pub(crate) use vector_index_writer::VectorIndexState; pub use virtual_column_builder::VirtualColumnBuilder; pub use write_settings::WriteSettings; pub use write_settings::MAX_BLOCK_UNCOMPRESSED_SIZE; diff --git a/src/query/storages/fuse/src/io/write/stream/block_builder.rs b/src/query/storages/fuse/src/io/write/stream/block_builder.rs index 7193d988952e8..73ee3c1ded29e 100644 --- a/src/query/storages/fuse/src/io/write/stream/block_builder.rs +++ b/src/query/storages/fuse/src/io/write/stream/block_builder.rs @@ -34,6 +34,7 @@ use databend_common_expression::TableSchema; use databend_common_expression::TableSchemaRef; use databend_common_expression::ORIGIN_BLOCK_ROW_NUM_COLUMN_ID; use databend_common_io::constants::DEFAULT_BLOCK_BUFFER_SIZE; +use databend_common_meta_app::schema::TableIndex; use databend_common_native::write::NativeWriter; use databend_storages_common_index::BloomIndex; use databend_storages_common_index::BloomIndexBuilder; @@ -59,6 +60,7 @@ use crate::io::BloomIndexState; use crate::io::InvertedIndexBuilder; use crate::io::InvertedIndexWriter; use crate::io::TableMetaLocationGenerator; +use crate::io::VectorIndexBuilder; use crate::io::VirtualColumnBuilder; use crate::io::WriteSettings; use crate::operations::column_parquet_metas; @@ -150,6 +152,7 @@ pub struct StreamBlockBuilder { inverted_index_writers: Vec, bloom_index_builder: BloomIndexBuilder, virtual_column_builder: Option, + vector_index_builder: Option, cluster_stats_state: ClusterStatisticsState, column_stats_state: ColumnStatisticsState, @@ -228,6 +231,11 @@ impl StreamBlockBuilder { } else { None }; + let vector_index_builder = VectorIndexBuilder::try_create( + properties.ctx.clone(), + &properties.table_indexes, + properties.source_schema.clone(), + ); let cluster_stats_state = ClusterStatisticsState::new(properties.cluster_stats_builder.clone()); @@ -240,6 +248,7 @@ impl StreamBlockBuilder { inverted_index_writers, bloom_index_builder, virtual_column_builder, + vector_index_builder, row_count: 0, block_size: 0, column_stats_state, @@ -278,7 +287,9 @@ impl StreamBlockBuilder { if let Some(ref mut virtual_column_builder) = self.virtual_column_builder { virtual_column_builder.add_block(&block)?; } - + if let Some(ref mut vector_index_builder) = self.vector_index_builder { + vector_index_builder.add_block(&block)?; + } self.row_count += block.num_rows(); self.block_size += block.estimate_block_size(); self.block_writer @@ -331,6 +342,17 @@ impl StreamBlockBuilder { } else { None }; + let vector_index_state = + if let Some(ref mut vector_index_builder) = self.vector_index_builder { + let vector_index_location = + self.properties.meta_locations.block_vector_index_location(); + let vector_index_state = vector_index_builder.finalize(&vector_index_location)?; + Some(vector_index_state) + } else { + None + }; + let vector_index_size = vector_index_state.as_ref().map(|v| v.size); + let vector_index_location = vector_index_state.as_ref().map(|v| v.location.clone()); let col_metas = self.block_writer.finish(&self.properties.source_schema)?; let block_raw_data = mem::take(self.block_writer.inner_mut()); @@ -361,6 +383,8 @@ impl StreamBlockBuilder { .unwrap_or_default(), compression: self.properties.write_settings.table_compression.into(), inverted_index_size, + vector_index_size, + vector_index_location, create_on: Some(Utc::now()), ngram_filter_index_size: None, virtual_block_meta: None, @@ -371,6 +395,7 @@ impl StreamBlockBuilder { bloom_index_state, inverted_index_states, virtual_column_state, + vector_index_state, }; Ok(serialized) } @@ -392,6 +417,7 @@ pub struct StreamBlockProperties { inverted_index_builders: Vec, table_meta_timestamps: TableMetaTimestamps, support_virtual_columns: bool, + table_indexes: BTreeMap, } impl StreamBlockProperties { @@ -448,6 +474,7 @@ impl StreamBlockProperties { } } let support_virtual_columns = table.support_virtual_columns(); + let table_indexes = table.table_info.meta.indexes.clone(); Ok(Arc::new(StreamBlockProperties { ctx, meta_locations: table.meta_location_generator().clone(), @@ -462,6 +489,7 @@ impl StreamBlockProperties { inverted_index_builders, table_meta_timestamps, support_virtual_columns, + table_indexes, })) } } diff --git a/src/query/storages/fuse/src/io/write/vector_index_writer.rs b/src/query/storages/fuse/src/io/write/vector_index_writer.rs new file mode 100644 index 0000000000000..ee21b769ae826 --- /dev/null +++ b/src/query/storages/fuse/src/io/write/vector_index_writer.rs @@ -0,0 +1,247 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::BTreeMap; +use std::collections::HashSet; +use std::sync::Arc; +use std::time::Instant; + +use databend_common_catalog::table_context::TableContext; +use databend_common_exception::ErrorCode; +use databend_common_exception::Result; +use databend_common_expression::Column; +use databend_common_expression::ColumnId; +use databend_common_expression::DataBlock; +use databend_common_expression::TableSchemaRef; +use databend_common_expression::TableSchemaRefExt; +use databend_common_io::constants::DEFAULT_BLOCK_INDEX_BUFFER_SIZE; +use databend_common_license::license::Feature; +use databend_common_license::license_manager::LicenseManagerSwitch; +use databend_common_meta_app::schema::TableIndex; +use databend_common_meta_app::schema::TableIndexType; +use databend_common_metrics::storage::metrics_inc_block_vector_index_generate_milliseconds; +use databend_storages_common_blocks::blocks_to_parquet; +use databend_storages_common_index::DistanceType; +use databend_storages_common_index::HNSWIndex; +use databend_storages_common_table_meta::meta::Location; +use databend_storages_common_table_meta::table::TableCompression; + +#[derive(Debug, Clone)] +pub struct VectorIndexState { + pub location: Location, + pub size: u64, + pub data: Vec, +} + +#[derive(Debug, Clone)] +struct VectorIndexParam { + index_name: String, + index_version: String, + m: usize, + ef_construct: usize, + distances: Vec, +} + +#[derive(Clone)] +pub struct VectorIndexBuilder { + // Parameters for each vector index + index_params: Vec, + field_offsets: Vec>, + field_offsets_set: HashSet, + + // Collected vector columns + columns: BTreeMap>, +} + +impl VectorIndexBuilder { + pub fn try_create( + ctx: Arc, + table_indexes: &BTreeMap, + schema: TableSchemaRef, + ) -> Option { + LicenseManagerSwitch::instance() + .check_enterprise_enabled(ctx.get_license_key(), Feature::VectorIndex) + .ok()?; + + let mut index_params = Vec::with_capacity(table_indexes.len()); + let mut field_offsets = Vec::with_capacity(table_indexes.len()); + let mut field_offsets_set = HashSet::new(); + + for index in table_indexes.values() { + if !matches!(index.index_type, TableIndexType::Vector) { + continue; + } + if !index.sync_creation { + continue; + } + + let mut offsets = Vec::with_capacity(index.column_ids.len()); + for column_id in &index.column_ids { + for (offset, field) in schema.fields.iter().enumerate() { + if field.column_id() == *column_id { + offsets.push((offset, *column_id)); + break; + } + } + } + // ignore invalid index + if offsets.len() != index.column_ids.len() { + continue; + } + for (offset, _) in &offsets { + field_offsets_set.insert(*offset); + } + field_offsets.push(offsets); + + // Parse index parameters + let m = match index.options.get("m") { + Some(value) => value.parse::().unwrap_or(16), + None => 16, + }; + + let ef_construct = match index.options.get("ef_construct") { + Some(value) => value.parse::().unwrap_or(64), + None => 64, + }; + + let mut distances = Vec::new(); + match index.options.get("distance") { + Some(value) => { + let distance_types: Vec<&str> = value.split(',').collect(); + for distance_type in distance_types { + let distance = match distance_type { + "cosine" => DistanceType::Dot, + "l1" => DistanceType::L1, + "l2" => DistanceType::L2, + _ => continue, + }; + distances.push(distance); + } + } + None => continue, + }; + if distances.is_empty() { + continue; + } + let index_param = VectorIndexParam { + index_name: index.name.clone(), + index_version: index.version.clone(), + m, + ef_construct, + distances, + }; + index_params.push(index_param); + } + + let mut columns = BTreeMap::new(); + for offset in &field_offsets_set { + columns.insert(*offset, vec![]); + } + + if !field_offsets.is_empty() { + Some(VectorIndexBuilder { + index_params, + field_offsets, + field_offsets_set, + columns, + }) + } else { + None + } + } + + pub fn add_block(&mut self, block: &DataBlock) -> Result<()> { + for offset in &self.field_offsets_set { + let block_entry = block.get_by_offset(*offset); + let column = block_entry.to_column(); + + if let Some(columns) = self.columns.get_mut(offset) { + columns.push(column); + } else { + return Err(ErrorCode::Internal("Can't find vector column")); + } + } + Ok(()) + } + + #[async_backtrace::framed] + pub fn finalize(&mut self, location: &Location) -> Result { + let start = Instant::now(); + + let mut columns = BTreeMap::new(); + for offset in &self.field_offsets_set { + columns.insert(*offset, vec![]); + } + std::mem::swap(&mut self.columns, &mut columns); + + let mut concated_columns = BTreeMap::new(); + for (offset, columns) in columns.into_iter() { + let concated_column = Column::concat_columns(columns.into_iter())?; + concated_columns.insert(offset, concated_column); + } + + let mut index_fields = Vec::new(); + let mut index_columns = Vec::new(); + let mut metadata = BTreeMap::new(); + + for (field_offsets, index_param) in self.field_offsets.iter().zip(&self.index_params) { + for (offset, column_id) in field_offsets { + let Some(column) = concated_columns.get(offset) else { + return Err(ErrorCode::Internal("Can't find vector column")); + }; + for distance in &index_param.distances { + let (mut hnsw_index_fields, mut hnsw_index_columns) = HNSWIndex::build( + index_param.m, + index_param.ef_construct, + *column_id, + column.clone(), + *distance, + )?; + index_fields.append(&mut hnsw_index_fields); + index_columns.append(&mut hnsw_index_columns); + } + } + metadata.insert( + index_param.index_name.clone(), + index_param.index_version.clone(), + ); + } + + let index_schema = TableSchemaRefExt::create(index_fields); + let index_block = DataBlock::new(index_columns, 1); + + let mut data = Vec::with_capacity(DEFAULT_BLOCK_INDEX_BUFFER_SIZE); + let _ = blocks_to_parquet( + index_schema.as_ref(), + vec![index_block], + &mut data, + // Zstd has the best compression ratio + TableCompression::Zstd, + )?; + + let size = data.len() as u64; + let state = VectorIndexState { + location: location.clone(), + size, + data, + }; + + // Perf. + { + metrics_inc_block_vector_index_generate_milliseconds(start.elapsed().as_millis() as u64); + } + + Ok(state) + } +} diff --git a/src/query/storages/fuse/src/operations/common/processors/transform_serialize_block.rs b/src/query/storages/fuse/src/operations/common/processors/transform_serialize_block.rs index 81dfc6dccbe26..f13ed6701482d 100644 --- a/src/query/storages/fuse/src/operations/common/processors/transform_serialize_block.rs +++ b/src/query/storages/fuse/src/operations/common/processors/transform_serialize_block.rs @@ -41,6 +41,7 @@ use crate::io::create_inverted_index_builders; use crate::io::BlockBuilder; use crate::io::BlockSerialization; use crate::io::BlockWriter; +use crate::io::VectorIndexBuilder; use crate::io::VirtualColumnBuilder; use crate::operations::common::BlockMetaIndex; use crate::operations::common::MutationLogEntry; @@ -168,6 +169,11 @@ impl TransformSerializeBlock { } else { None }; + let vector_index_builder = VectorIndexBuilder::try_create( + ctx.clone(), + &table.table_info.meta.indexes, + source_schema.clone(), + ); let block_builder = BlockBuilder { ctx, @@ -179,6 +185,7 @@ impl TransformSerializeBlock { ngram_args, inverted_index_builders, virtual_column_builder, + vector_index_builder, table_meta_timestamps, }; Ok(TransformSerializeBlock { diff --git a/src/query/storages/fuse/src/operations/merge.rs b/src/query/storages/fuse/src/operations/merge.rs index 49a6e2b89f300..e149196075dcd 100644 --- a/src/query/storages/fuse/src/operations/merge.rs +++ b/src/query/storages/fuse/src/operations/merge.rs @@ -28,6 +28,7 @@ use super::merge_into::MatchedAggregator; use super::mutation::SegmentIndex; use crate::io::create_inverted_index_builders; use crate::io::BlockBuilder; +use crate::io::VectorIndexBuilder; use crate::statistics::ClusterStatsGenerator; use crate::FuseTable; @@ -97,6 +98,11 @@ impl FuseTable { &self.table_info.meta.schema, )?; let inverted_index_builders = create_inverted_index_builders(&self.table_info.meta); + let vector_index_builder = VectorIndexBuilder::try_create( + ctx.clone(), + &self.table_info.meta.indexes, + new_schema.clone(), + ); let block_builder = BlockBuilder { ctx: ctx.clone(), @@ -107,6 +113,7 @@ impl FuseTable { bloom_columns_map, ngram_args, inverted_index_builders, + vector_index_builder, // todo virtual_column_builder: None, table_meta_timestamps, diff --git a/src/query/storages/fuse/src/operations/read/util.rs b/src/query/storages/fuse/src/operations/read/util.rs index e93dfe503c82d..0eed2a1250614 100644 --- a/src/query/storages/fuse/src/operations/read/util.rs +++ b/src/query/storages/fuse/src/operations/read/util.rs @@ -77,6 +77,7 @@ pub(crate) fn add_data_block_meta( base_block_ids, inner: meta, matched_rows: block_meta.matched_rows.clone(), + vector_scores: block_meta.vector_scores.clone(), }; meta = Some(Box::new(internal_column_meta)); } diff --git a/src/query/storages/fuse/src/operations/read_partitions.rs b/src/query/storages/fuse/src/operations/read_partitions.rs index d68885f5335ac..b42078783cb85 100644 --- a/src/query/storages/fuse/src/operations/read_partitions.rs +++ b/src/query/storages/fuse/src/operations/read_partitions.rs @@ -51,7 +51,7 @@ use databend_storages_common_cache::CachedObject; use databend_storages_common_index::BloomIndex; use databend_storages_common_index::NgramArgs; use databend_storages_common_pruner::BlockMetaIndex; -use databend_storages_common_pruner::TopNPrunner; +use databend_storages_common_pruner::TopNPruner; use databend_storages_common_table_meta::meta::column_oriented_segment::meta_name; use databend_storages_common_table_meta::meta::column_oriented_segment::stat_name; use databend_storages_common_table_meta::meta::column_oriented_segment::BLOCK_SIZE; @@ -157,7 +157,15 @@ impl FuseTable { nodes_num = cluster.nodes.len(); } - if self.is_column_oriented() || (segment_len > nodes_num && distributed_pruning) { + let has_vector_topn = if let Some(ref push_downs) = push_downs { + push_downs.vector_topn() + } else { + false + }; + + if (self.is_column_oriented() || (segment_len > nodes_num && distributed_pruning)) + && !has_vector_topn + { let mut segments = Vec::with_capacity(segment_locs.len()); for (idx, segment_location) in segment_locs.into_iter().enumerate() { segments.push(FuseLazyPartInfo::create(idx, segment_location)) @@ -476,7 +484,7 @@ impl FuseTable { let push_down = push_down.as_ref().unwrap(); let limit = push_down.limit.unwrap(); let sort = push_down.order_by.clone(); - let topn_pruner = TopNPrunner::create(schema, sort, limit); + let topn_pruner = TopNPruner::create(schema, sort, limit); prune_pipeline.resize(1, false)?; prune_pipeline.add_transform(move |input, output| { TopNPruneTransform::create(input, output, topn_pruner.clone()) diff --git a/src/query/storages/fuse/src/pruning/block_pruner.rs b/src/query/storages/fuse/src/pruning/block_pruner.rs index a08e270689fc1..4db5d01d28d14 100644 --- a/src/query/storages/fuse/src/pruning/block_pruner.rs +++ b/src/query/storages/fuse/src/pruning/block_pruner.rs @@ -289,6 +289,7 @@ impl BlockPruner { segment_location: segment_location.location.0.clone(), snapshot_location: segment_location.snapshot_loc.clone(), matched_rows: prune_result.matched_rows.clone(), + vector_scores: None, virtual_block_meta: prune_result.virtual_block_meta.clone(), }, block, @@ -357,6 +358,7 @@ impl BlockPruner { segment_location: segment_location.location.0.clone(), snapshot_location: segment_location.snapshot_loc.clone(), matched_rows: None, + vector_scores: None, virtual_block_meta: None, }, block_meta.clone(), diff --git a/src/query/storages/fuse/src/pruning/fuse_pruner.rs b/src/query/storages/fuse/src/pruning/fuse_pruner.rs index 333092da29b09..ec907dea94603 100644 --- a/src/query/storages/fuse/src/pruning/fuse_pruner.rs +++ b/src/query/storages/fuse/src/pruning/fuse_pruner.rs @@ -41,7 +41,7 @@ use databend_storages_common_pruner::PagePruner; use databend_storages_common_pruner::PagePrunerCreator; use databend_storages_common_pruner::RangePruner; use databend_storages_common_pruner::RangePrunerCreator; -use databend_storages_common_pruner::TopNPrunner; +use databend_storages_common_pruner::TopNPruner; use databend_storages_common_table_meta::meta::BlockMeta; use databend_storages_common_table_meta::meta::ClusterKey; use databend_storages_common_table_meta::meta::ColumnStatistics; @@ -64,6 +64,7 @@ use crate::pruning::BloomPrunerCreator; use crate::pruning::FusePruningStatistics; use crate::pruning::InvertedIndexPruner; use crate::pruning::SegmentLocation; +use crate::pruning::VectorIndexPruner; use crate::pruning::VirtualColumnPruner; const SMALL_DATASET_SAMPLE_THRESHOLD: usize = 100; @@ -447,7 +448,8 @@ impl FusePruner { // Todo:: for now, all operation (contains other mutation other than delete, like select,update etc.) // will get here, we can prevent other mutations like update and so on. // TopN pruner. - self.topn_pruning(metas) + let metas = self.topn_pruning(metas)?; + self.vector_pruning(metas).await } } @@ -516,7 +518,8 @@ impl FusePruner { let res = worker?; metas.extend(res); } - self.topn_pruning(metas) + let metas = self.topn_pruning(metas)?; + self.vector_pruning(metas).await } // topn pruner: @@ -535,12 +538,44 @@ impl FusePruner { let push_down = push_down.as_ref().unwrap(); let limit = push_down.limit.unwrap(); let sort = push_down.order_by.clone(); - let topn_pruner = TopNPrunner::create(schema, sort, limit); + let topn_pruner = TopNPruner::create(schema, sort, limit); return Ok(topn_pruner.prune(metas.clone()).unwrap_or(metas)); } Ok(metas) } + async fn vector_pruning( + &self, + metas: Vec<(BlockMetaIndex, Arc)>, + ) -> Result)>> { + let push_down = self.push_down.clone(); + if push_down + .as_ref() + .filter(|p| p.vector_index.is_some()) + .is_some() + { + let schema = self.table_schema.clone(); + let push_down = push_down.as_ref().unwrap(); + let filters = push_down.filters.clone(); + let sort = push_down.order_by.clone(); + let limit = push_down.limit; + let vector_index = push_down.vector_index.clone().unwrap(); + + let vector_pruner = VectorIndexPruner::create( + self.pruning_ctx.ctx.clone(), + self.pruning_ctx.dal.clone(), + schema, + vector_index, + filters, + sort, + limit, + )?; + let pruned_metas = vector_pruner.prune(metas.clone()).await?; + return Ok(pruned_metas); + } + Ok(metas) + } + // Pruning stats. pub fn pruning_stats(&self) -> databend_common_catalog::plan::PruningStatistics { let stats = self.pruning_ctx.pruning_stats.clone(); diff --git a/src/query/storages/fuse/src/pruning/mod.rs b/src/query/storages/fuse/src/pruning/mod.rs index 3c90972b2df5a..650e11aba4c7b 100644 --- a/src/query/storages/fuse/src/pruning/mod.rs +++ b/src/query/storages/fuse/src/pruning/mod.rs @@ -21,6 +21,7 @@ mod inverted_index_pruner; mod pruner_location; mod pruning_statistics; mod segment_pruner; +mod vector_index_pruner; mod virtual_column_pruner; pub use block_pruner::BlockPruner; @@ -37,4 +38,5 @@ pub use pruner_location::create_segment_location_vector; pub use pruner_location::SegmentLocation; pub use pruning_statistics::FusePruningStatistics; pub use segment_pruner::SegmentPruner; +pub use vector_index_pruner::VectorIndexPruner; pub use virtual_column_pruner::VirtualColumnPruner; diff --git a/src/query/storages/fuse/src/pruning/vector_index_pruner.rs b/src/query/storages/fuse/src/pruning/vector_index_pruner.rs new file mode 100644 index 0000000000000..6257048c84891 --- /dev/null +++ b/src/query/storages/fuse/src/pruning/vector_index_pruner.rs @@ -0,0 +1,230 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::cmp::Ordering; +use std::collections::HashSet; +use std::sync::Arc; + +use databend_common_catalog::plan::Filters; +use databend_common_catalog::plan::VectorIndexInfo; +use databend_common_catalog::table_context::TableContext; +use databend_common_exception::ErrorCode; +use databend_common_exception::Result; +use databend_common_expression::types::F32; +use databend_common_expression::RemoteExpr; +use databend_common_expression::TableSchemaRef; +use databend_common_expression::VECTOR_SCORE_COL_NAME; +use databend_storages_common_index::DistanceType; +use databend_storages_common_index::FixedLengthPriorityQueue; +use databend_storages_common_io::ReadSettings; +use databend_storages_common_pruner::BlockMetaIndex; +use databend_storages_common_table_meta::meta::BlockMeta; +use opendal::Operator; + +use crate::io::read::VectorIndexReader; + +/// Vector index pruner. +#[derive(Clone)] +pub struct VectorIndexPruner { + ctx: Arc, + operator: Operator, + _schema: TableSchemaRef, + vector_index: VectorIndexInfo, + filters: Option, + sort: Vec<(RemoteExpr, bool, bool)>, + limit: Option, +} + +impl VectorIndexPruner { + pub fn create( + ctx: Arc, + operator: Operator, + schema: TableSchemaRef, + vector_index: VectorIndexInfo, + filters: Option, + sort: Vec<(RemoteExpr, bool, bool)>, + limit: Option, + ) -> Result { + Ok(Self { + ctx, + operator, + _schema: schema, + vector_index, + filters, + sort, + limit, + }) + } +} + +impl VectorIndexPruner { + pub async fn prune( + &self, + metas: Vec<(BlockMetaIndex, Arc)>, + ) -> Result)>> { + let settings = ReadSettings::from_ctx(&self.ctx)?; + let distance_type = match self.vector_index.func_name.as_str() { + "cosine_distance" => DistanceType::Dot, + "l1_distance" => DistanceType::L1, + "l2_distance" => DistanceType::L2, + _ => unreachable!(), + }; + let columns = vec![ + format!( + "{}-{}_graph_links", + self.vector_index.column_id, distance_type + ), + format!( + "{}-{}_graph_data", + self.vector_index.column_id, distance_type + ), + format!( + "{}-{}_encoded_u8_meta", + self.vector_index.column_id, distance_type + ), + format!( + "{}-{}_encoded_u8_data", + self.vector_index.column_id, distance_type + ), + ]; + + let query_values = unsafe { + std::mem::transmute::, Vec>(self.vector_index.query_values.clone()) + }; + + let vector_reader = VectorIndexReader::create( + self.operator.clone(), + settings, + distance_type, + columns, + query_values, + ); + + // @TODO support filters + if self.filters.is_none() && !self.sort.is_empty() && self.limit.is_some() { + let (sort, asc, _nulls_first) = &self.sort[0]; + if let RemoteExpr::ColumnRef { id, .. } = sort { + if id == VECTOR_SCORE_COL_NAME && *asc { + let limit = self.limit.unwrap(); + return self + .vector_index_topn_prune(vector_reader, limit, metas) + .await; + } + } + } + + self.vector_index_prune(vector_reader, metas).await + } + + async fn vector_index_topn_prune( + &self, + vector_reader: VectorIndexReader, + limit: usize, + metas: Vec<(BlockMetaIndex, Arc)>, + ) -> Result)>> { + let mut top_queue = FixedLengthPriorityQueue::new(limit); + + for (index, (_, block_meta)) in metas.iter().enumerate() { + let Some(location) = block_meta.vector_index_location.clone() else { + return Err(ErrorCode::StorageUnavailable(format!( + "vector index {} file don't exist, need refresh", + self.vector_index.index_name + ))); + }; + + let row_count = block_meta.row_count as usize; + let score_offsets = vector_reader.prune(limit, row_count, &location.0).await?; + + for score_offset in score_offsets { + let vector_score = VectorScore { + index, + row_idx: score_offset.idx, + score: F32::from(score_offset.score), + }; + top_queue.push(vector_score); + } + } + let top_scores = top_queue.into_sorted_vec(); + let top_indexes: HashSet = top_scores.iter().map(|s| s.index).collect(); + + let mut pruned_metas = Vec::with_capacity(top_indexes.len()); + for (index, (mut block_meta_index, block_meta)) in metas.into_iter().enumerate() { + if !top_indexes.contains(&index) { + continue; + } + let mut vector_scores = Vec::new(); + for top_score in &top_scores { + if top_score.index == index { + vector_scores.push((top_score.row_idx as usize, top_score.score)); + } + } + block_meta_index.vector_scores = Some(vector_scores); + pruned_metas.push((block_meta_index, block_meta)); + } + + Ok(pruned_metas) + } + + async fn vector_index_prune( + &self, + vector_reader: VectorIndexReader, + metas: Vec<(BlockMetaIndex, Arc)>, + ) -> Result)>> { + // can't use vector index topn to prune, only generate vector scores. + let mut new_metas = Vec::with_capacity(metas.len()); + for (mut block_meta_index, block_meta) in metas.into_iter() { + let Some(location) = block_meta.vector_index_location.clone() else { + return Err(ErrorCode::StorageUnavailable(format!( + "vector index {} file don't exist, need refresh", + self.vector_index.index_name + ))); + }; + + let row_count = block_meta.row_count as usize; + // use row_count as limit to generate scores for all rows. + let score_offsets = vector_reader + .generate_scores(row_count, &location.0) + .await?; + + let mut vector_scores = Vec::with_capacity(row_count); + for score_offset in &score_offsets { + vector_scores.push((score_offset.idx as usize, F32::from(score_offset.score))); + } + block_meta_index.vector_scores = Some(vector_scores); + new_metas.push((block_meta_index, block_meta)); + } + + Ok(new_metas) + } +} + +#[derive(Clone, Debug, Eq, PartialEq)] +struct VectorScore { + index: usize, + row_idx: u32, + score: F32, +} + +impl Ord for VectorScore { + fn cmp(&self, other: &Self) -> Ordering { + // reverse order to keep lower score. + other.score.cmp(&self.score) + } +} + +impl PartialOrd for VectorScore { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} diff --git a/src/query/storages/fuse/src/pruning_pipeline/column_oriented_block_prune.rs b/src/query/storages/fuse/src/pruning_pipeline/column_oriented_block_prune.rs index d7f5369a79f7e..d65b7473a835f 100644 --- a/src/query/storages/fuse/src/pruning_pipeline/column_oriented_block_prune.rs +++ b/src/query/storages/fuse/src/pruning_pipeline/column_oriented_block_prune.rs @@ -237,6 +237,7 @@ impl AsyncSink for ColumnOrientedBlockPruneSink { segment_location: segment_location.location.0.clone(), snapshot_location: segment_location.snapshot_loc.clone(), matched_rows: None, + vector_scores: None, virtual_block_meta: None, }; diff --git a/src/query/storages/fuse/src/pruning_pipeline/topn_prune_transform.rs b/src/query/storages/fuse/src/pruning_pipeline/topn_prune_transform.rs index 69e9681187967..d79a9d4dd7b8c 100644 --- a/src/query/storages/fuse/src/pruning_pipeline/topn_prune_transform.rs +++ b/src/query/storages/fuse/src/pruning_pipeline/topn_prune_transform.rs @@ -22,7 +22,7 @@ use databend_common_pipeline_core::processors::ProcessorPtr; use databend_common_pipeline_transforms::BlockMetaAccumulatingTransform; use databend_common_pipeline_transforms::BlockMetaAccumulatingTransformer; use databend_storages_common_pruner::BlockMetaIndex; -use databend_storages_common_pruner::TopNPrunner; +use databend_storages_common_pruner::TopNPruner; use databend_storages_common_table_meta::meta::BlockMeta; use crate::pruning_pipeline::block_prune_result_meta::BlockPruneResult; @@ -30,7 +30,7 @@ use crate::pruning_pipeline::block_prune_result_meta::BlockPruneResult; // TopNPruneTransform is a processor that will accumulate the block meta and not push to // downstream until all data is received and pruned. pub struct TopNPruneTransform { - topn_pruner: TopNPrunner, + topn_pruner: TopNPruner, metas: Vec<(BlockMetaIndex, Arc)>, } @@ -51,7 +51,7 @@ impl TopNPruneTransform { pub fn create( input: Arc, output: Arc, - topn_pruner: TopNPrunner, + topn_pruner: TopNPruner, ) -> Result { Ok(ProcessorPtr::create( BlockMetaAccumulatingTransformer::create(input, output, TopNPruneTransform { diff --git a/src/query/storages/parquet/src/parquet_reader/reader/row_group_reader.rs b/src/query/storages/parquet/src/parquet_reader/reader/row_group_reader.rs index 39cd32b9e79f7..3d915f0800e0c 100644 --- a/src/query/storages/parquet/src/parquet_reader/reader/row_group_reader.rs +++ b/src/query/storages/parquet/src/parquet_reader/reader/row_group_reader.rs @@ -99,6 +99,7 @@ static DELETES_FILE_PUSHDOWN_INFO: LazyLock = LazyLock::new(|| Pus agg_index: None, change_type: None, inverted_index: None, + vector_index: None, sample: None, }); diff --git a/tests/sqllogictests/suites/ee/09_ee_vector_index/09_0000_vector_index_base.test b/tests/sqllogictests/suites/ee/09_ee_vector_index/09_0000_vector_index_base.test index 448295c44fa07..a9e43d7a43fd7 100644 --- a/tests/sqllogictests/suites/ee/09_ee_vector_index/09_0000_vector_index_base.test +++ b/tests/sqllogictests/suites/ee/09_ee_vector_index/09_0000_vector_index_base.test @@ -22,21 +22,24 @@ statement ok use test_vector_index statement ok -CREATE TABLE IF NOT EXISTS t(id Int, embedding Vector(128), VECTOR INDEX idx (embedding) m=4 ef_construct=6) Engine = Fuse +CREATE TABLE IF NOT EXISTS t(id Int, embedding Vector(8), VECTOR INDEX idx (embedding) m=10 ef_construct=40 distance='cosine') Engine = Fuse query TT SHOW CREATE TABLE t ---- -t CREATE TABLE t ( id INT NULL, embedding VECTOR(128) NULL, SYNC VECTOR INDEX idx (embedding) ef_construct = '6', m = '4' ) ENGINE=FUSE +t CREATE TABLE t ( id INT NULL, embedding VECTOR(8) NULL, SYNC VECTOR INDEX idx (embedding) distance = 'cosine', ef_construct = '40', m = '10' ) ENGINE=FUSE statement ok DROP VECTOR INDEX idx ON t; statement error -CREATE VECTOR INDEX idx2 ON t(embedding) m=0 ef_construct=5; +CREATE VECTOR INDEX idx2 ON t(embedding) m=10 ef_construct=5 + +statement error +CREATE VECTOR INDEX idx2 ON t(embedding) m=0 ef_construct=5 distance='cosine,l1,l2'; statement ok -CREATE VECTOR INDEX idx2 ON t(embedding) m=5 ef_construct=7; +CREATE VECTOR INDEX idx2 ON t(embedding) m=10 ef_construct=40 distance='cosine,l1,l2'; statement error DROP INVERTED INDEX idx2 ON t; @@ -44,7 +47,151 @@ DROP INVERTED INDEX idx2 ON t; query TT SHOW CREATE TABLE t ---- -t CREATE TABLE t ( id INT NULL, embedding VECTOR(128) NULL, SYNC VECTOR INDEX idx2 (embedding) ef_construct = '7', m = '5' ) ENGINE=FUSE +t CREATE TABLE t ( id INT NULL, embedding VECTOR(8) NULL, SYNC VECTOR INDEX idx2 (embedding) distance = 'cosine,l1,l2', ef_construct = '40', m = '10' ) ENGINE=FUSE + +statement ok +INSERT INTO t VALUES +(1, [0.50515236, 0.8561939, 0.87169914, 0.55843271, 0.73689797, 0.49985862, 0.64527255, 0.29313098]), +(2, [0.17790798, 0.0132427, 0.55352279, 0.49129727, 0.74246407, 0.97345777, 0.83489323, 0.86012174]), +(3, [0.2703968, 0.26768266, 0.96587005, 0.04760408, 0.92289409, 0.15799311, 0.86381163, 0.2922287]), +(4, [0.0810719, 0.27882267, 0.6015564, 0.34236571, 0.58889543, 0.83293431, 0.67012723, 0.76303241]) + +statement ok +INSERT INTO t VALUES +(5, [0.66399931, 0.35041433, 0.2159864, 0.89537508, 0.44577037, 0.57896497, 0.36630178, 0.33816571]), +(6, [0.32052319, 0.38567453, 0.62853221, 0.84816365, 0.15853234, 0.33207714, 0.7673085, 0.69513879]), +(7, [0.82590676, 0.35860656, 0.6277274, 0.95148122, 0.81893313, 0.91440945, 0.15803721, 0.5866869]), +(8, [0.42135513, 0.05637937, 0.88864157, 0.59217909, 0.98435169, 0.39234101, 0.41490889, 0.02760555]) + +statement ok +INSERT INTO t VALUES +(9, [0.61418788, 0.34545306, 0.14638622, 0.53249639, 0.09139293, 0.84940919, 0.105433, 0.4156201]), +(10, [0.21828953, 0.87852734, 0.64221122, 0.24536394, 0.81689593, 0.86341877, 0.7218334, 0.45028494]), +(11, [0.43279006, 0.45523681, 0.76060274, 0.66284758, 0.19131476, 0.13564463, 0.88712212, 0.93279565]), +(12, [0.79671359, 0.86079789, 0.94477631, 0.5116732, 0.29733205, 0.33645561, 0.41380333, 0.75909903]) + +statement ok +INSERT INTO t VALUES +(13, [0.94666755, 0.39522571, 0.39857241, 0.88080323, 0.53470771, 0.09486194, 0.17524627, 0.86497559]), +(14, [0.8397819, 0.37221789, 0.32885295, 0.20470829, 0.49838217, 0.00736057, 0.45418757, 0.6956924 ]), +(15, [0.13230447, 0.630588, 0.10812326, 0.21558228, 0.83768057, 0.48870546, 0.65021806, 0.31626541]), +(16, [0.2667851, 0.01529589, 0.98994706, 0.31870983, 0.31783372, 0.34863699, 0.30254189, 0.84441678]) + + +statement ok +CREATE TABLE IF NOT EXISTS t1(id Int, embedding Vector(8)) Engine = Fuse + +statement ok +INSERT INTO t1 SELECT id, embedding FROM t + + +query IF +SELECT id, cosine_distance(embedding, [0.50515236, 0.8561939, 0.87169914, 0.55843271, 0.73689797, 0.49985862, 0.64527255, 0.29313098]::vector(8)) AS similarity FROM t ORDER BY similarity ASC LIMIT 5; +---- +1 0.009774268 +10 0.033747792 +12 0.060161233 +11 0.14048636 +8 0.14554787 + +query IF +SELECT id, cosine_distance(embedding, [0.50515236, 0.8561939, 0.87169914, 0.55843271, 0.73689797, 0.49985862, 0.64527255, 0.29313098]::vector(8)) AS similarity FROM t1 ORDER BY similarity ASC LIMIT 5; +---- +1 0.0 +10 0.0592916 +12 0.085179806 +8 0.13477594 +3 0.13801938 + +query IF +SELECT id, cosine_distance(embedding, [0.02559146, 0.38549544, 0.77889671, 0.31591033, 0.48453478, 0.51902057, 0.74154714, 0.75059576]::vector(8)) AS similarity FROM t ORDER BY similarity ASC LIMIT 5; +---- +4 0.043031156 +11 0.05496204 +10 0.056846976 +2 0.085320055 +16 0.111266375 + +query IF +SELECT id, cosine_distance(embedding, [0.02559146, 0.38549544, 0.77889671, 0.31591033, 0.48453478, 0.51902057, 0.74154714, 0.75059576]::vector(8)) AS similarity FROM t1 ORDER BY similarity ASC LIMIT 5; +---- +4 0.031086385 +2 0.075579524 +10 0.09369081 +11 0.097252846 +16 0.105270445 + +query IF +SELECT id, l1_distance(embedding, [0.50515236, 0.8561939, 0.87169914, 0.55843271, 0.73689797, 0.49985862, 0.64527255, 0.29313098]::vector(8)) AS similarity FROM t ORDER BY similarity ASC LIMIT 5; +---- +1 0.0 +10 1.5320582 +12 1.7202058 +8 1.770357 +15 1.8413826 + +query IF +SELECT id, l1_distance(embedding, [0.50515236, 0.8561939, 0.87169914, 0.55843271, 0.73689797, 0.49985862, 0.64527255, 0.29313098]::vector(8)) AS similarity FROM t1 ORDER BY similarity ASC LIMIT 5; +---- +1 1.4901161e-7 +10 1.5290257 +12 1.7164081 +8 1.7851611 +15 1.8448958 + +query IF +SELECT id, l1_distance(embedding, [0.02559146, 0.38549544, 0.77889671, 0.31591033, 0.48453478, 0.51902057, 0.74154714, 0.75059576]::vector(8)) AS similarity FROM t ORDER BY similarity ASC LIMIT 5; +---- +4 0.87704676 +6 1.559421 +16 1.6866446 +11 1.7874013 +10 1.8277186 + +query IF +SELECT id, l1_distance(embedding, [0.02559146, 0.38549544, 0.77889671, 0.31591033, 0.48453478, 0.51902057, 0.74154714, 0.75059576]::vector(8)) AS similarity FROM t1 ORDER BY similarity ASC LIMIT 5; +---- +4 0.86807996 +6 1.571893 +16 1.6951541 +2 1.8405688 +11 1.8465424 + +query IF +SELECT id, l2_distance(embedding, [0.50515236, 0.8561939, 0.87169914, 0.55843271, 0.73689797, 0.49985862, 0.64527255, 0.29313098]::vector(8)) AS similarity FROM t ORDER BY similarity ASC LIMIT 5; +---- +1 0.052975703 +10 0.73442644 +12 0.84724534 +8 0.92271036 +3 0.9368646 + +query IF +SELECT id, l2_distance(embedding, [0.50515236, 0.8561939, 0.87169914, 0.55843271, 0.73689797, 0.49985862, 0.64527255, 0.29313098]::vector(8)) AS similarity FROM t1 ORDER BY similarity ASC LIMIT 5; +---- +1 8.940697e-8 +10 0.63450795 +12 0.7636615 +8 0.91897535 +3 0.93300396 + +query IF +SELECT id, l2_distance(embedding, [0.02559146, 0.38549544, 0.77889671, 0.31591033, 0.48453478, 0.51902057, 0.74154714, 0.75059576]::vector(8)) AS similarity FROM t ORDER BY similarity ASC LIMIT 5; +---- +4 0.41017252 +16 0.69951516 +6 0.73418504 +2 0.7379028 +11 0.8144757 + +query IF +SELECT id, l2_distance(embedding, [0.02559146, 0.38549544, 0.77889671, 0.31591033, 0.48453478, 0.51902057, 0.74154714, 0.75059576]::vector(8)) AS similarity FROM t1 ORDER BY similarity ASC LIMIT 5; +---- +4 0.40161562 +16 0.7057761 +2 0.7328551 +6 0.73338425 +11 0.76073563 statement ok use default diff --git a/tests/sqllogictests/suites/query/functions/02_0063_function_vector.test b/tests/sqllogictests/suites/query/functions/02_0063_function_vector.test index a5e02dd865000..8256f5f6976a6 100644 --- a/tests/sqllogictests/suites/query/functions/02_0063_function_vector.test +++ b/tests/sqllogictests/suites/query/functions/02_0063_function_vector.test @@ -8,3 +8,33 @@ query F select [1, 2] <-> [2, 3] as sim ---- 1.4142135 + +query FF +select cosine_distance([1.1,2.2,3], [1,1,1]), cosine_distance([1,2.2,3], [4,6,8]) +---- +0.062412794753543555 0.00699537571767439 + +query FF +select cosine_distance([1.1,2.2,3]::vector(3), [1,1,1]::vector(3)), cosine_distance([1,2.2,3]::vector(3), [4,6,8]::vector(3)) +---- +0.06241274 0.0069953203 + +query FF +select l1_distance([1.1,2.2,3], [1,1,1]), l1_distance([1,2.2,3], [4,6,8]) +---- +3.3000000000000003 11.8 + +query FF +select l1_distance([1.1,2.2,3]::vector(3), [1,1,1]::vector(3)), l1_distance([1,2.2,3]::vector(3), [4,6,8]::vector(3)) +---- +3.3000002 11.8 + +query FF +select l2_distance([1.1,2.2,3], [1,1,1]), l2_distance([1,2.2,3], [4,6,8]) +---- +2.3345235059857505 6.959885056522126 + +query FF +select l2_distance([1.1,2.2,3]::vector(3), [1,1,1]::vector(3)), l2_distance([1,2.2,3]::vector(3), [4,6,8]::vector(3)) +---- +2.3345234 6.959885 From 779dfdfb2d34c6672d653a50ffcbf221c63c2840 Mon Sep 17 00:00:00 2001 From: baishen Date: Thu, 10 Jul 2025 13:57:11 +0800 Subject: [PATCH 2/4] support explain display vetor pruning, add write logs --- Cargo.lock | 1 - src/common/metrics/src/metrics/storage.rs | 24 ++++++++++ .../catalog/src/plan/pruning_statistics.rs | 6 +++ src/query/expression/src/evaluator.rs | 9 ++-- src/query/expression/src/type_check.rs | 5 ++- src/query/sql/src/executor/format.rs | 12 +++++ src/query/storages/common/index/Cargo.toml | 1 - .../fuse/src/io/write/vector_index_writer.rs | 45 ++++++++++++++++--- .../storages/fuse/src/pruning/fuse_pruner.rs | 31 +++++++++++++ .../fuse/src/pruning/pruning_statistics.rs | 24 ++++++++++ .../mode/cluster/memo/aggregate_property.test | 4 +- .../mode/cluster/memo/join_property.test | 8 ++-- .../mode/cluster/memo/mix_property.test | 2 +- 13 files changed, 154 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 551227dddbed7..312e321a41e5d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5486,7 +5486,6 @@ dependencies = [ "databend-common-exception", "databend-common-expression", "databend-common-functions", - "databend-common-vector", "databend-storages-common-table-meta", "divan", "fastrace", diff --git a/src/common/metrics/src/metrics/storage.rs b/src/common/metrics/src/metrics/storage.rs index b60a72acb416d..730e5ca093a7d 100644 --- a/src/common/metrics/src/metrics/storage.rs +++ b/src/common/metrics/src/metrics/storage.rs @@ -254,6 +254,14 @@ static BYTES_BLOCK_INVERTED_INDEX_PRUNING_BEFORE: LazyLock = LazyLock::new(|| register_counter("fuse_bytes_block_inverted_index_pruning_before")); static BYTES_BLOCK_INVERTED_INDEX_PRUNING_AFTER: LazyLock = LazyLock::new(|| register_counter("fuse_bytes_block_inverted_index_pruning_after")); +static BLOCKS_VECTOR_INDEX_PRUNING_BEFORE: LazyLock = + LazyLock::new(|| register_counter("fuse_blocks_vector_index_pruning_before")); +static BLOCKS_VECTOR_INDEX_PRUNING_AFTER: LazyLock = + LazyLock::new(|| register_counter("fuse_blocks_vector_index_pruning_after")); +static BYTES_BLOCK_VECTOR_INDEX_PRUNING_BEFORE: LazyLock = + LazyLock::new(|| register_counter("fuse_bytes_block_vector_index_pruning_before")); +static BYTES_BLOCK_VECTOR_INDEX_PRUNING_AFTER: LazyLock = + LazyLock::new(|| register_counter("fuse_bytes_block_vector_index_pruning_after")); static PRUNING_PREWHERE_NUMS: LazyLock = LazyLock::new(|| register_counter("fuse_pruning_prewhere_nums")); static PRUNING_MILLISECONDS: LazyLock = @@ -716,6 +724,22 @@ pub fn metrics_inc_bytes_block_inverted_index_pruning_after(c: u64) { BYTES_BLOCK_INVERTED_INDEX_PRUNING_AFTER.inc_by(c); } +pub fn metrics_inc_blocks_vector_index_pruning_before(c: u64) { + BLOCKS_VECTOR_INDEX_PRUNING_BEFORE.inc_by(c); +} + +pub fn metrics_inc_blocks_vector_index_pruning_after(c: u64) { + BLOCKS_VECTOR_INDEX_PRUNING_AFTER.inc_by(c); +} + +pub fn metrics_inc_bytes_block_vector_index_pruning_before(c: u64) { + BYTES_BLOCK_VECTOR_INDEX_PRUNING_BEFORE.inc_by(c); +} + +pub fn metrics_inc_bytes_block_vector_index_pruning_after(c: u64) { + BYTES_BLOCK_VECTOR_INDEX_PRUNING_AFTER.inc_by(c); +} + pub fn metrics_inc_pruning_prewhere_nums(c: u64) { PRUNING_PREWHERE_NUMS.inc_by(c); } diff --git a/src/query/catalog/src/plan/pruning_statistics.rs b/src/query/catalog/src/plan/pruning_statistics.rs index 9dee48cd5bbf4..13d59825143dc 100644 --- a/src/query/catalog/src/plan/pruning_statistics.rs +++ b/src/query/catalog/src/plan/pruning_statistics.rs @@ -29,6 +29,10 @@ pub struct PruningStatistics { /// Block inverted index filter pruning stats. pub blocks_inverted_index_pruning_before: usize, pub blocks_inverted_index_pruning_after: usize, + + /// Block vector index filter pruning stats. + pub blocks_vector_index_pruning_before: usize, + pub blocks_vector_index_pruning_after: usize, } impl PruningStatistics { @@ -41,5 +45,7 @@ impl PruningStatistics { self.blocks_bloom_pruning_after += other.blocks_bloom_pruning_after; self.blocks_inverted_index_pruning_before += other.blocks_inverted_index_pruning_before; self.blocks_inverted_index_pruning_after += other.blocks_inverted_index_pruning_after; + self.blocks_vector_index_pruning_before += other.blocks_vector_index_pruning_before; + self.blocks_vector_index_pruning_after += other.blocks_vector_index_pruning_after; } } diff --git a/src/query/expression/src/evaluator.rs b/src/query/expression/src/evaluator.rs index b4ebe56e3c48f..fccb97ddc43c7 100644 --- a/src/query/expression/src/evaluator.rs +++ b/src/query/expression/src/evaluator.rs @@ -921,11 +921,13 @@ impl<'a> Evaluator<'a> { } } (DataType::Array(inner_src_ty), DataType::Vector(inner_dest_ty)) => { - if !matches!(&**inner_src_ty, DataType::Number(_) | DataType::Decimal(_)) - || matches!(inner_dest_ty, VectorDataType::Int8(_)) + if !matches!( + inner_src_ty.remove_nullable(), + DataType::Number(_) | DataType::Decimal(_) + ) || matches!(inner_dest_ty, VectorDataType::Int8(_)) { return Err(ErrorCode::BadArguments(format!( - "unable to cast type `{src_type}` to type `{dest_type}`" + "unable to cast type `{src_type}` to vector type `{dest_type}`" )) .set_span(span)); } @@ -971,6 +973,7 @@ impl<'a> Evaluator<'a> { ) .set_span(span)); } + let col = col.remove_nullable(); match col { Column::Number(num_col) => { for i in 0..dimension { diff --git a/src/query/expression/src/type_check.rs b/src/query/expression/src/type_check.rs index 2b60877f13d12..2df907c4ac17e 100755 --- a/src/query/expression/src/type_check.rs +++ b/src/query/expression/src/type_check.rs @@ -639,7 +639,10 @@ fn can_cast_to(src_ty: &DataType, dest_ty: &DataType) -> bool { true } (DataType::Array(fields_src_ty), DataType::Vector(_)) - if matches!(&**fields_src_ty, DataType::Number(_) | DataType::Decimal(_)) => + if matches!( + fields_src_ty.remove_nullable(), + DataType::Number(_) | DataType::Decimal(_) + ) => { true } diff --git a/src/query/sql/src/executor/format.rs b/src/query/sql/src/executor/format.rs index 684607c500e4e..b825e831de176 100644 --- a/src/query/sql/src/executor/format.rs +++ b/src/query/sql/src/executor/format.rs @@ -1734,6 +1734,18 @@ fn part_stats_info_to_format_tree(info: &PartStatistics) -> Vec 0 { + if !blocks_pruning_description.is_empty() { + blocks_pruning_description += ", "; + } + blocks_pruning_description += &format!( + "vector pruning: {} to {}", + info.pruning_stats.blocks_vector_index_pruning_before, + info.pruning_stats.blocks_vector_index_pruning_after + ); + } + // Combine segment pruning and blocks pruning descriptions if any if info.pruning_stats.segments_range_pruning_before > 0 || !blocks_pruning_description.is_empty() diff --git a/src/query/storages/common/index/Cargo.toml b/src/query/storages/common/index/Cargo.toml index 7f230d41573a1..07cf10ba70de2 100644 --- a/src/query/storages/common/index/Cargo.toml +++ b/src/query/storages/common/index/Cargo.toml @@ -14,7 +14,6 @@ databend-common-ast = { workspace = true } databend-common-exception = { workspace = true } databend-common-expression = { workspace = true } databend-common-functions = { workspace = true } -databend-common-vector = { workspace = true } databend-storages-common-table-meta = { workspace = true } anyerror = { workspace = true } diff --git a/src/query/storages/fuse/src/io/write/vector_index_writer.rs b/src/query/storages/fuse/src/io/write/vector_index_writer.rs index ee21b769ae826..8a69b985b8084 100644 --- a/src/query/storages/fuse/src/io/write/vector_index_writer.rs +++ b/src/query/storages/fuse/src/io/write/vector_index_writer.rs @@ -36,6 +36,11 @@ use databend_storages_common_index::DistanceType; use databend_storages_common_index::HNSWIndex; use databend_storages_common_table_meta::meta::Location; use databend_storages_common_table_meta::table::TableCompression; +use log::debug; +use log::info; + +const DEFAULT_M: usize = 16; +const DEFAULT_EF_CONSTRUCT: usize = 100; #[derive(Debug, Clone)] pub struct VectorIndexState { @@ -70,6 +75,11 @@ impl VectorIndexBuilder { table_indexes: &BTreeMap, schema: TableSchemaRef, ) -> Option { + info!( + "Starting vector index creation with {} table indexes", + table_indexes.len() + ); + LicenseManagerSwitch::instance() .check_enterprise_enabled(ctx.get_license_key(), Feature::VectorIndex) .ok()?; @@ -86,6 +96,7 @@ impl VectorIndexBuilder { continue; } + info!("Processing vector index: {}", index.name); let mut offsets = Vec::with_capacity(index.column_ids.len()); for column_id in &index.column_ids { for (offset, field) in schema.fields.iter().enumerate() { @@ -97,6 +108,10 @@ impl VectorIndexBuilder { } // ignore invalid index if offsets.len() != index.column_ids.len() { + debug!( + "Ignoring invalid vector index: {}, missing columns", + index.name + ); continue; } for (offset, _) in &offsets { @@ -106,13 +121,13 @@ impl VectorIndexBuilder { // Parse index parameters let m = match index.options.get("m") { - Some(value) => value.parse::().unwrap_or(16), - None => 16, + Some(value) => value.parse::().unwrap_or(DEFAULT_M), + None => DEFAULT_M, }; let ef_construct = match index.options.get("ef_construct") { - Some(value) => value.parse::().unwrap_or(64), - None => 64, + Some(value) => value.parse::().unwrap_or(DEFAULT_EF_CONSTRUCT), + None => DEFAULT_EF_CONSTRUCT, }; let mut distances = Vec::new(); @@ -132,8 +147,16 @@ impl VectorIndexBuilder { None => continue, }; if distances.is_empty() { + debug!( + "Ignoring vector index: {}, no valid distance types", + index.name + ); continue; } + info!( + "Added vector index parameters for {}: m={}, ef_construct={}, distances={:?}", + index.name, m, ef_construct, distances + ); let index_param = VectorIndexParam { index_name: index.name.clone(), index_version: index.version.clone(), @@ -162,6 +185,11 @@ impl VectorIndexBuilder { } pub fn add_block(&mut self, block: &DataBlock) -> Result<()> { + info!( + "Adding block with {} rows to vector index", + block.num_rows() + ); + for offset in &self.field_offsets_set { let block_entry = block.get_by_offset(*offset); let column = block_entry.to_column(); @@ -178,6 +206,7 @@ impl VectorIndexBuilder { #[async_backtrace::framed] pub fn finalize(&mut self, location: &Location) -> Result { let start = Instant::now(); + info!("Start build vector HNSW index for location: {}", location.0); let mut columns = BTreeMap::new(); for offset in &self.field_offsets_set { @@ -196,6 +225,7 @@ impl VectorIndexBuilder { let mut metadata = BTreeMap::new(); for (field_offsets, index_param) in self.field_offsets.iter().zip(&self.index_params) { + debug!("Building HNSW index for {}", index_param.index_name); for (offset, column_id) in field_offsets { let Some(column) = concated_columns.get(offset) else { return Err(ErrorCode::Internal("Can't find vector column")); @@ -238,9 +268,14 @@ impl VectorIndexBuilder { }; // Perf. + let elapsed_ms = start.elapsed().as_millis() as u64; { - metrics_inc_block_vector_index_generate_milliseconds(start.elapsed().as_millis() as u64); + metrics_inc_block_vector_index_generate_milliseconds(elapsed_ms); } + info!( + "Finish build vector HNSW index: location={}, size={} bytes in {} ms", + location.0, size, elapsed_ms + ); Ok(state) } diff --git a/src/query/storages/fuse/src/pruning/fuse_pruner.rs b/src/query/storages/fuse/src/pruning/fuse_pruner.rs index ec907dea94603..703edeb55fe54 100644 --- a/src/query/storages/fuse/src/pruning/fuse_pruner.rs +++ b/src/query/storages/fuse/src/pruning/fuse_pruner.rs @@ -26,6 +26,10 @@ use databend_common_expression::RemoteExpr; use databend_common_expression::TableSchemaRef; use databend_common_expression::SEGMENT_NAME_COL_NAME; use databend_common_functions::BUILTIN_FUNCTIONS; +use databend_common_metrics::storage::metrics_inc_blocks_vector_index_pruning_after; +use databend_common_metrics::storage::metrics_inc_blocks_vector_index_pruning_before; +use databend_common_metrics::storage::metrics_inc_bytes_block_vector_index_pruning_after; +use databend_common_metrics::storage::metrics_inc_bytes_block_vector_index_pruning_before; use databend_common_sql::BloomIndexColumns; use databend_common_sql::DefaultExprBinder; use databend_storages_common_cache::CacheAccessor; @@ -570,7 +574,27 @@ impl FusePruner { sort, limit, )?; + + // Perf. + { + let block_size = metas.iter().map(|(_, m)| m.block_size).sum(); + metrics_inc_blocks_vector_index_pruning_before(metas.len() as u64); + metrics_inc_bytes_block_vector_index_pruning_before(block_size); + self.pruning_ctx + .pruning_stats + .set_blocks_vector_index_pruning_before(metas.len() as u64); + } let pruned_metas = vector_pruner.prune(metas.clone()).await?; + + // Perf. + { + let block_size = pruned_metas.iter().map(|(_, m)| m.block_size).sum(); + metrics_inc_blocks_vector_index_pruning_after(pruned_metas.len() as u64); + metrics_inc_bytes_block_vector_index_pruning_after(block_size); + self.pruning_ctx + .pruning_stats + .set_blocks_vector_index_pruning_after(pruned_metas.len() as u64); + } return Ok(pruned_metas); } Ok(metas) @@ -594,6 +618,11 @@ impl FusePruner { let blocks_inverted_index_pruning_after = stats.get_blocks_inverted_index_pruning_after() as usize; + let blocks_vector_index_pruning_before = + stats.get_blocks_vector_index_pruning_before() as usize; + let blocks_vector_index_pruning_after = + stats.get_blocks_vector_index_pruning_after() as usize; + databend_common_catalog::plan::PruningStatistics { segments_range_pruning_before, segments_range_pruning_after, @@ -603,6 +632,8 @@ impl FusePruner { blocks_bloom_pruning_after, blocks_inverted_index_pruning_before, blocks_inverted_index_pruning_after, + blocks_vector_index_pruning_before, + blocks_vector_index_pruning_after, } } diff --git a/src/query/storages/fuse/src/pruning/pruning_statistics.rs b/src/query/storages/fuse/src/pruning/pruning_statistics.rs index 97838c30e2800..345baee8b22a4 100644 --- a/src/query/storages/fuse/src/pruning/pruning_statistics.rs +++ b/src/query/storages/fuse/src/pruning/pruning_statistics.rs @@ -32,6 +32,10 @@ pub struct FusePruningStatistics { /// Block inverted index filter pruning stats. pub blocks_inverted_index_pruning_before: AtomicU64, pub blocks_inverted_index_pruning_after: AtomicU64, + + /// Block vector index filter pruning stats. + pub blocks_vector_index_pruning_before: AtomicU64, + pub blocks_vector_index_pruning_after: AtomicU64, } impl FusePruningStatistics { @@ -108,4 +112,24 @@ impl FusePruningStatistics { self.blocks_inverted_index_pruning_after .load(Ordering::Relaxed) } + + pub fn set_blocks_vector_index_pruning_before(&self, v: u64) { + self.blocks_vector_index_pruning_before + .fetch_add(v, Ordering::Relaxed); + } + + pub fn get_blocks_vector_index_pruning_before(&self) -> u64 { + self.blocks_vector_index_pruning_before + .load(Ordering::Relaxed) + } + + pub fn set_blocks_vector_index_pruning_after(&self, v: u64) { + self.blocks_vector_index_pruning_after + .fetch_add(v, Ordering::Relaxed); + } + + pub fn get_blocks_vector_index_pruning_after(&self) -> u64 { + self.blocks_vector_index_pruning_after + .load(Ordering::Relaxed) + } } diff --git a/tests/sqllogictests/suites/mode/cluster/memo/aggregate_property.test b/tests/sqllogictests/suites/mode/cluster/memo/aggregate_property.test index 2e6590a9dfeb3..087095317314d 100644 --- a/tests/sqllogictests/suites/mode/cluster/memo/aggregate_property.test +++ b/tests/sqllogictests/suites/mode/cluster/memo/aggregate_property.test @@ -26,7 +26,7 @@ where t_10.a = t_1000.a and t_100.a = t_1000.a ---- Memo ├── root group: #8 -├── estimated memory: 8.44 KiB +├── estimated memory: 10.69 KiB ├── Group #0 │ ├── Best properties │ │ ├── { dist: Any }: expr: #0, cost: 1000.000, children: [] @@ -89,7 +89,7 @@ group by t_10.a, t_100.a ---- Memo ├── root group: #8 -├── estimated memory: 21.09 KiB +├── estimated memory: 26.72 KiB ├── Group #0 │ ├── Best properties │ │ ├── { dist: Any }: expr: #0, cost: 1000.000, children: [] diff --git a/tests/sqllogictests/suites/mode/cluster/memo/join_property.test b/tests/sqllogictests/suites/mode/cluster/memo/join_property.test index 1e91a3baa62b6..18cc76ce805bc 100644 --- a/tests/sqllogictests/suites/mode/cluster/memo/join_property.test +++ b/tests/sqllogictests/suites/mode/cluster/memo/join_property.test @@ -25,7 +25,7 @@ select * from t_10, t_100, t_1000 where t_10.a = t_1000.a and t_100.a = t_1000.a ---- Memo ├── root group: #5 -├── estimated memory: 6.56 KiB +├── estimated memory: 8.31 KiB ├── Group #0 │ ├── Best properties │ │ ├── { dist: Any }: expr: #0, cost: 1000.000, children: [] @@ -73,7 +73,7 @@ select * from t_1000 left join t_10 on t_1000.a = t_10.a left join t_100 on t_10 ---- Memo ├── root group: #5 -├── estimated memory: 6.09 KiB +├── estimated memory: 7.72 KiB ├── Group #0 │ ├── Best properties │ │ ├── { dist: Any }: expr: #0, cost: 1000.000, children: [] @@ -119,7 +119,7 @@ select * from t_1000 right join t_10 on t_1000.a = t_10.a right join t_100 on t_ ---- Memo ├── root group: #5 -├── estimated memory: 5.16 KiB +├── estimated memory: 6.53 KiB ├── Group #0 │ ├── Best properties │ │ ├── { dist: Any }: expr: #0, cost: 1000.000, children: [] @@ -161,7 +161,7 @@ select * from t_1000 full join t_10 on t_1000.a = t_10.a full join t_100 on t_10 ---- Memo ├── root group: #5 -├── estimated memory: 5.16 KiB +├── estimated memory: 6.53 KiB ├── Group #0 │ ├── Best properties │ │ ├── { dist: Any }: expr: #0, cost: 1000.000, children: [] diff --git a/tests/sqllogictests/suites/mode/cluster/memo/mix_property.test b/tests/sqllogictests/suites/mode/cluster/memo/mix_property.test index b40dcef29861a..15274e1c00eed 100644 --- a/tests/sqllogictests/suites/mode/cluster/memo/mix_property.test +++ b/tests/sqllogictests/suites/mode/cluster/memo/mix_property.test @@ -29,7 +29,7 @@ limit 10 ---- Memo ├── root group: #10 -├── estimated memory: 22.03 KiB +├── estimated memory: 27.91 KiB ├── Group #0 │ ├── Best properties │ │ ├── { dist: Any }: expr: #0, cost: 1000.000, children: [] From eeeda237c640f7fdb64ae974a83089407d5b27dc Mon Sep 17 00:00:00 2001 From: baishen Date: Fri, 11 Jul 2025 13:11:00 +0800 Subject: [PATCH 3/4] fuse_block add vector_index_size --- .../fuse/src/table_functions/fuse_block.rs | 7 +++++ .../09_0000_vector_index_base.test | 27 ++++++++++++++++++- .../mode/cluster/memo/join_property.test | 2 +- 3 files changed, 34 insertions(+), 2 deletions(-) diff --git a/src/query/storages/fuse/src/table_functions/fuse_block.rs b/src/query/storages/fuse/src/table_functions/fuse_block.rs index 18950f522eff7..eaecb5425c809 100644 --- a/src/query/storages/fuse/src/table_functions/fuse_block.rs +++ b/src/query/storages/fuse/src/table_functions/fuse_block.rs @@ -67,6 +67,10 @@ impl TableMetaFunc for FuseBlock { "ngram_index_size", TableDataType::Nullable(Box::new(TableDataType::Number(NumberDataType::UInt64))), ), + TableField::new( + "vector_index_size", + TableDataType::Nullable(Box::new(TableDataType::Number(NumberDataType::UInt64))), + ), TableField::new( "virtual_column_size", TableDataType::Nullable(Box::new(TableDataType::Number(NumberDataType::UInt64))), @@ -93,6 +97,7 @@ impl TableMetaFunc for FuseBlock { let mut bloom_filter_size = Vec::with_capacity(len); let mut inverted_index_size = Vec::with_capacity(len); let mut ngram_index_size = Vec::with_capacity(len); + let mut vector_index_size = Vec::with_capacity(len); let mut virtual_column_size = Vec::with_capacity(len); let segments_io = SegmentsIO::create(ctx.clone(), tbl.operator.clone(), tbl.schema()); @@ -122,6 +127,7 @@ impl TableMetaFunc for FuseBlock { bloom_filter_size.push(block.bloom_filter_index_size); inverted_index_size.push(block.inverted_index_size); ngram_index_size.push(block.ngram_filter_index_size); + vector_index_size.push(block.vector_index_size); virtual_column_size.push( block .virtual_block_meta @@ -149,6 +155,7 @@ impl TableMetaFunc for FuseBlock { UInt64Type::from_data(bloom_filter_size).into(), UInt64Type::from_opt_data(inverted_index_size).into(), UInt64Type::from_opt_data(ngram_index_size).into(), + UInt64Type::from_opt_data(vector_index_size).into(), UInt64Type::from_opt_data(virtual_column_size).into(), ], num_rows, diff --git a/tests/sqllogictests/suites/ee/09_ee_vector_index/09_0000_vector_index_base.test b/tests/sqllogictests/suites/ee/09_ee_vector_index/09_0000_vector_index_base.test index a9e43d7a43fd7..19f6103aba605 100644 --- a/tests/sqllogictests/suites/ee/09_ee_vector_index/09_0000_vector_index_base.test +++ b/tests/sqllogictests/suites/ee/09_ee_vector_index/09_0000_vector_index_base.test @@ -77,13 +77,38 @@ INSERT INTO t VALUES (15, [0.13230447, 0.630588, 0.10812326, 0.21558228, 0.83768057, 0.48870546, 0.65021806, 0.31626541]), (16, [0.2667851, 0.01529589, 0.98994706, 0.31870983, 0.31783372, 0.34863699, 0.30254189, 0.84441678]) - statement ok CREATE TABLE IF NOT EXISTS t1(id Int, embedding Vector(8)) Engine = Fuse statement ok INSERT INTO t1 SELECT id, embedding FROM t +query T +EXPLAIN SELECT id, cosine_distance(embedding, [0.50515236, 0.8561939, 0.87169914, 0.55843271, 0.73689797, 0.49985862, 0.64527255, 0.29313098]::vector(8)) AS similarity FROM t ORDER BY similarity ASC LIMIT 5; +---- +RowFetch +├── output columns: [t._vector_score (#2), t._row_id (#3), t.id (#0)] +├── columns to fetch: [id] +├── estimated rows: 5.00 +└── Limit + ├── output columns: [t._vector_score (#2), t._row_id (#3)] + ├── limit: 5 + ├── offset: 0 + ├── estimated rows: 5.00 + └── Sort + ├── output columns: [t._vector_score (#2), t._row_id (#3)] + ├── sort keys: [_vector_score ASC NULLS LAST] + ├── estimated rows: 16.00 + └── TableScan + ├── table: default.test_vector_index.t + ├── output columns: [_vector_score (#2), _row_id (#3)] + ├── read rows: 12 + ├── read size: 0 + ├── partitions total: 4 + ├── partitions scanned: 3 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [], limit: 5] + └── estimated rows: 16.00 query IF SELECT id, cosine_distance(embedding, [0.50515236, 0.8561939, 0.87169914, 0.55843271, 0.73689797, 0.49985862, 0.64527255, 0.29313098]::vector(8)) AS similarity FROM t ORDER BY similarity ASC LIMIT 5; diff --git a/tests/sqllogictests/suites/mode/cluster/memo/join_property.test b/tests/sqllogictests/suites/mode/cluster/memo/join_property.test index 18cc76ce805bc..3d49805b0577b 100644 --- a/tests/sqllogictests/suites/mode/cluster/memo/join_property.test +++ b/tests/sqllogictests/suites/mode/cluster/memo/join_property.test @@ -203,7 +203,7 @@ select * from t_10, t_100, t_1000 ---- Memo ├── root group: #5 -├── estimated memory: 4.22 KiB +├── estimated memory: 5.34 KiB ├── Group #0 │ ├── Best properties │ │ └── { dist: Any }: expr: #0, cost: 10.000, children: [] From 16a7800ebd2c292dfa1448a03fab6358a8552e04 Mon Sep 17 00:00:00 2001 From: baishen Date: Sun, 13 Jul 2025 01:27:39 +0800 Subject: [PATCH 4/4] multi thread pruning --- src/common/metrics/src/metrics/storage.rs | 8 +- src/query/ast/src/ast/statements/statement.rs | 2 + src/query/catalog/src/plan/pushdown.rs | 4 - .../read/vector_index/vector_index_reader.rs | 27 +- .../fuse/src/operations/read_partitions.rs | 33 ++- .../storages/fuse/src/pruning/fuse_pruner.rs | 26 +- .../fuse/src/pruning/vector_index_pruner.rs | 252 ++++++++++++++---- .../storages/fuse/src/pruning_pipeline/mod.rs | 2 + .../vector_index_prune_transform.rs | 86 ++++++ .../09_0000_vector_index_base.test | 21 ++ 10 files changed, 345 insertions(+), 116 deletions(-) create mode 100644 src/query/storages/fuse/src/pruning_pipeline/vector_index_prune_transform.rs diff --git a/src/common/metrics/src/metrics/storage.rs b/src/common/metrics/src/metrics/storage.rs index 730e5ca093a7d..8059be0b39645 100644 --- a/src/common/metrics/src/metrics/storage.rs +++ b/src/common/metrics/src/metrics/storage.rs @@ -190,8 +190,8 @@ static BLOCK_VECTOR_INDEX_GENERATE_MILLISECONDS: LazyLock = LazyLock: static BLOCK_VECTOR_INDEX_READ_MILLISECONDS: LazyLock = LazyLock::new(|| { register_histogram_in_milliseconds("fuse_block_vector_index_read_milliseconds") }); -static BLOCK_VECTOR_INDEX_SEARCH_MILLISECONDS: LazyLock = LazyLock::new(|| { - register_histogram_in_milliseconds("fuse_block_vector_index_search_milliseconds") +static BLOCK_VECTOR_INDEX_PRUNING_MILLISECONDS: LazyLock = LazyLock::new(|| { + register_histogram_in_milliseconds("fuse_block_vector_index_pruning_milliseconds") }); static BLOCK_VECTOR_INDEX_READ_BYTES: LazyLock = LazyLock::new(|| register_counter("fuse_block_vector_index_read_bytes")); @@ -626,8 +626,8 @@ pub fn metrics_inc_block_vector_index_read_milliseconds(c: u64) { BLOCK_VECTOR_INDEX_READ_MILLISECONDS.observe(c as f64); } -pub fn metrics_inc_block_vector_index_search_milliseconds(c: u64) { - BLOCK_VECTOR_INDEX_SEARCH_MILLISECONDS.observe(c as f64); +pub fn metrics_inc_block_vector_index_pruning_milliseconds(c: u64) { + BLOCK_VECTOR_INDEX_PRUNING_MILLISECONDS.observe(c as f64); } pub fn metrics_inc_block_vector_index_read_bytes(c: u64) { diff --git a/src/query/ast/src/ast/statements/statement.rs b/src/query/ast/src/ast/statements/statement.rs index 8cc197aae5bb8..e1c0a9974f68f 100644 --- a/src/query/ast/src/ast/statements/statement.rs +++ b/src/query/ast/src/ast/statements/statement.rs @@ -675,6 +675,8 @@ impl Display for Statement { unreachable!(); } write!(f, ") ")?; + } else { + write!(f, "SETTINGS ")?; } write!(f, "{stmt}")?; } diff --git a/src/query/catalog/src/plan/pushdown.rs b/src/query/catalog/src/plan/pushdown.rs index d090d4b47d59d..9bbf44e529315 100644 --- a/src/query/catalog/src/plan/pushdown.rs +++ b/src/query/catalog/src/plan/pushdown.rs @@ -256,10 +256,6 @@ impl PushDownInfo { } } - pub fn vector_topn(&self) -> bool { - !self.order_by.is_empty() && self.limit.is_some() && self.vector_index.is_some() - } - pub fn prewhere_of_push_downs(push_downs: Option<&PushDownInfo>) -> Option { if let Some(PushDownInfo { prewhere, .. }) = push_downs { prewhere.clone() diff --git a/src/query/storages/fuse/src/io/read/vector_index/vector_index_reader.rs b/src/query/storages/fuse/src/io/read/vector_index/vector_index_reader.rs index bfb084787a126..13467fdef8870 100644 --- a/src/query/storages/fuse/src/io/read/vector_index/vector_index_reader.rs +++ b/src/query/storages/fuse/src/io/read/vector_index/vector_index_reader.rs @@ -12,10 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::time::Instant; - use databend_common_exception::Result; -use databend_common_metrics::storage::metrics_inc_block_vector_index_search_milliseconds; use databend_storages_common_index::DistanceType; use databend_storages_common_index::HNSWIndex; use databend_storages_common_index::ScoredPointOffset; @@ -61,8 +58,6 @@ impl VectorIndexReader { row_count: usize, location: &str, ) -> Result> { - let start = Instant::now(); - let binary_columns = load_vector_index_files( self.operator.clone(), &self.settings, @@ -72,15 +67,7 @@ impl VectorIndexReader { .await?; let hnsw_index = HNSWIndex::open(self.distance_type, self.dim, row_count, binary_columns)?; - - let res = hnsw_index.search(limit, &self.query_values)?; - - // Perf. - { - metrics_inc_block_vector_index_search_milliseconds(start.elapsed().as_millis() as u64); - } - - Ok(res) + hnsw_index.search(limit, &self.query_values) } pub async fn generate_scores( @@ -88,8 +75,6 @@ impl VectorIndexReader { row_count: usize, location: &str, ) -> Result> { - let start = Instant::now(); - let binary_columns = load_vector_index_files( self.operator.clone(), &self.settings, @@ -99,14 +84,6 @@ impl VectorIndexReader { .await?; let hnsw_index = HNSWIndex::open(self.distance_type, self.dim, row_count, binary_columns)?; - - let res = hnsw_index.generate_scores(row_count as u32, &self.query_values)?; - - // Perf. - { - metrics_inc_block_vector_index_search_milliseconds(start.elapsed().as_millis() as u64); - } - - Ok(res) + hnsw_index.generate_scores(row_count as u32, &self.query_values) } } diff --git a/src/query/storages/fuse/src/operations/read_partitions.rs b/src/query/storages/fuse/src/operations/read_partitions.rs index b42078783cb85..df8f10895316f 100644 --- a/src/query/storages/fuse/src/operations/read_partitions.rs +++ b/src/query/storages/fuse/src/operations/read_partitions.rs @@ -83,6 +83,7 @@ use crate::pruning::BlockPruner; use crate::pruning::FusePruner; use crate::pruning::SegmentLocation; use crate::pruning::SegmentPruner; +use crate::pruning::VectorIndexPruner; use crate::pruning_pipeline::AsyncBlockPruneTransform; use crate::pruning_pipeline::ColumnOrientedBlockPruneSink; use crate::pruning_pipeline::ExtractSegmentTransform; @@ -95,6 +96,7 @@ use crate::pruning_pipeline::SendPartInfoSink; use crate::pruning_pipeline::SendPartState; use crate::pruning_pipeline::SyncBlockPruneTransform; use crate::pruning_pipeline::TopNPruneTransform; +use crate::pruning_pipeline::VectorIndexPruneTransform; use crate::segment_format_from_location; use crate::FuseLazyPartInfo; use crate::FuseSegmentFormat; @@ -157,15 +159,7 @@ impl FuseTable { nodes_num = cluster.nodes.len(); } - let has_vector_topn = if let Some(ref push_downs) = push_downs { - push_downs.vector_topn() - } else { - false - }; - - if (self.is_column_oriented() || (segment_len > nodes_num && distributed_pruning)) - && !has_vector_topn - { + if self.is_column_oriented() || (segment_len > nodes_num && distributed_pruning) { let mut segments = Vec::with_capacity(segment_locs.len()); for (idx, segment_location) in segment_locs.into_iter().enumerate() { segments.push(FuseLazyPartInfo::create(idx, segment_location)) @@ -491,6 +485,27 @@ impl FuseTable { })?; } + if push_down + .as_ref() + .filter(|p| p.vector_index.is_some()) + .is_some() + { + let pruning_ctx = pruner.pruning_ctx.clone(); + let schema = pruner.table_schema.clone(); + let push_down = push_down.as_ref().unwrap(); + let filters = push_down.filters.clone(); + let sort = push_down.order_by.clone(); + let limit = push_down.limit; + let vector_index = push_down.vector_index.clone().unwrap(); + + let vector_index_pruner = + VectorIndexPruner::create(pruning_ctx, schema, vector_index, filters, sort, limit)?; + prune_pipeline.resize(1, false)?; + prune_pipeline.add_transform(move |input, output| { + VectorIndexPruneTransform::create(input, output, vector_index_pruner.clone()) + })?; + } + let top_k = push_down .as_ref() .filter(|_| self.is_native()) // Only native format supports topk push down. diff --git a/src/query/storages/fuse/src/pruning/fuse_pruner.rs b/src/query/storages/fuse/src/pruning/fuse_pruner.rs index 703edeb55fe54..03e6cbfd51d94 100644 --- a/src/query/storages/fuse/src/pruning/fuse_pruner.rs +++ b/src/query/storages/fuse/src/pruning/fuse_pruner.rs @@ -26,10 +26,6 @@ use databend_common_expression::RemoteExpr; use databend_common_expression::TableSchemaRef; use databend_common_expression::SEGMENT_NAME_COL_NAME; use databend_common_functions::BUILTIN_FUNCTIONS; -use databend_common_metrics::storage::metrics_inc_blocks_vector_index_pruning_after; -use databend_common_metrics::storage::metrics_inc_blocks_vector_index_pruning_before; -use databend_common_metrics::storage::metrics_inc_bytes_block_vector_index_pruning_after; -use databend_common_metrics::storage::metrics_inc_bytes_block_vector_index_pruning_before; use databend_common_sql::BloomIndexColumns; use databend_common_sql::DefaultExprBinder; use databend_storages_common_cache::CacheAccessor; @@ -566,8 +562,7 @@ impl FusePruner { let vector_index = push_down.vector_index.clone().unwrap(); let vector_pruner = VectorIndexPruner::create( - self.pruning_ctx.ctx.clone(), - self.pruning_ctx.dal.clone(), + self.pruning_ctx.clone(), schema, vector_index, filters, @@ -575,26 +570,7 @@ impl FusePruner { limit, )?; - // Perf. - { - let block_size = metas.iter().map(|(_, m)| m.block_size).sum(); - metrics_inc_blocks_vector_index_pruning_before(metas.len() as u64); - metrics_inc_bytes_block_vector_index_pruning_before(block_size); - self.pruning_ctx - .pruning_stats - .set_blocks_vector_index_pruning_before(metas.len() as u64); - } let pruned_metas = vector_pruner.prune(metas.clone()).await?; - - // Perf. - { - let block_size = pruned_metas.iter().map(|(_, m)| m.block_size).sum(); - metrics_inc_blocks_vector_index_pruning_after(pruned_metas.len() as u64); - metrics_inc_bytes_block_vector_index_pruning_after(block_size); - self.pruning_ctx - .pruning_stats - .set_blocks_vector_index_pruning_after(pruned_metas.len() as u64); - } return Ok(pruned_metas); } Ok(metas) diff --git a/src/query/storages/fuse/src/pruning/vector_index_pruner.rs b/src/query/storages/fuse/src/pruning/vector_index_pruner.rs index 6257048c84891..41b9855403658 100644 --- a/src/query/storages/fuse/src/pruning/vector_index_pruner.rs +++ b/src/query/storages/fuse/src/pruning/vector_index_pruner.rs @@ -13,32 +13,45 @@ // limitations under the License. use std::cmp::Ordering; +use std::collections::HashMap; use std::collections::HashSet; +use std::future::Future; +use std::pin::Pin; use std::sync::Arc; +use std::time::Instant; +use databend_common_base::base::tokio::sync::OwnedSemaphorePermit; use databend_common_catalog::plan::Filters; use databend_common_catalog::plan::VectorIndexInfo; -use databend_common_catalog::table_context::TableContext; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::types::F32; use databend_common_expression::RemoteExpr; use databend_common_expression::TableSchemaRef; use databend_common_expression::VECTOR_SCORE_COL_NAME; +use databend_common_metrics::storage::metrics_inc_block_vector_index_pruning_milliseconds; +use databend_common_metrics::storage::metrics_inc_blocks_vector_index_pruning_after; +use databend_common_metrics::storage::metrics_inc_blocks_vector_index_pruning_before; +use databend_common_metrics::storage::metrics_inc_bytes_block_vector_index_pruning_after; +use databend_common_metrics::storage::metrics_inc_bytes_block_vector_index_pruning_before; use databend_storages_common_index::DistanceType; use databend_storages_common_index::FixedLengthPriorityQueue; use databend_storages_common_io::ReadSettings; use databend_storages_common_pruner::BlockMetaIndex; use databend_storages_common_table_meta::meta::BlockMeta; -use opendal::Operator; +use futures_util::future; use crate::io::read::VectorIndexReader; +use crate::pruning::PruningContext; + +type VectorPruningFutureReturn = Pin> + Send>>; +type VectorPruningFuture = + Box VectorPruningFutureReturn + Send + 'static>; /// Vector index pruner. #[derive(Clone)] pub struct VectorIndexPruner { - ctx: Arc, - operator: Operator, + pruning_ctx: Arc, _schema: TableSchemaRef, vector_index: VectorIndexInfo, filters: Option, @@ -48,8 +61,7 @@ pub struct VectorIndexPruner { impl VectorIndexPruner { pub fn create( - ctx: Arc, - operator: Operator, + pruning_ctx: Arc, schema: TableSchemaRef, vector_index: VectorIndexInfo, filters: Option, @@ -57,8 +69,7 @@ impl VectorIndexPruner { limit: Option, ) -> Result { Ok(Self { - ctx, - operator, + pruning_ctx, _schema: schema, vector_index, filters, @@ -73,7 +84,7 @@ impl VectorIndexPruner { &self, metas: Vec<(BlockMetaIndex, Arc)>, ) -> Result)>> { - let settings = ReadSettings::from_ctx(&self.ctx)?; + let settings = ReadSettings::from_ctx(&self.pruning_ctx.ctx)?; let distance_type = match self.vector_index.func_name.as_str() { "cosine_distance" => DistanceType::Dot, "l1_distance" => DistanceType::L1, @@ -104,7 +115,7 @@ impl VectorIndexPruner { }; let vector_reader = VectorIndexReader::create( - self.operator.clone(), + self.pruning_ctx.dal.clone(), settings, distance_type, columns, @@ -133,44 +144,117 @@ impl VectorIndexPruner { limit: usize, metas: Vec<(BlockMetaIndex, Arc)>, ) -> Result)>> { + let pruning_runtime = &self.pruning_ctx.pruning_runtime; + let pruning_semaphore = &self.pruning_ctx.pruning_semaphore; + + // Perf. + { + let block_size = metas.iter().map(|(_, m)| m.block_size).sum(); + metrics_inc_blocks_vector_index_pruning_before(metas.len() as u64); + metrics_inc_bytes_block_vector_index_pruning_before(block_size); + self.pruning_ctx + .pruning_stats + .set_blocks_vector_index_pruning_before(metas.len() as u64); + } + + let mut block_meta_indexes = metas.into_iter().enumerate(); + let pruning_tasks = std::iter::from_fn(move || { + block_meta_indexes + .next() + .map(|(index, (block_meta_index, block_meta))| { + let vector_reader = vector_reader.clone(); + let index_name = self.vector_index.index_name.clone(); + + let v: VectorPruningFuture = Box::new(move |permit: OwnedSemaphorePermit| { + Box::pin(async move { + let _permit = permit; + + let Some(location) = &block_meta.vector_index_location else { + return Err(ErrorCode::StorageUnavailable(format!( + "vector index {} file don't exist, need refresh", + index_name + ))); + }; + + let row_count = block_meta.row_count as usize; + let score_offsets = + vector_reader.prune(limit, row_count, &location.0).await?; + + let mut vector_scores = Vec::with_capacity(score_offsets.len()); + for score_offset in score_offsets { + let vector_score = VectorScore { + index, + row_idx: score_offset.idx, + score: F32::from(score_offset.score), + }; + vector_scores.push(vector_score); + } + + Ok(VectorPruneResult { + block_idx: index, + scores: vector_scores, + block_meta_index, + block_meta, + }) + }) + }); + v + }) + }); + + let start = Instant::now(); + + let join_handlers = pruning_runtime + .try_spawn_batch_with_owned_semaphore(pruning_semaphore.clone(), pruning_tasks) + .await?; + + let joint = future::try_join_all(join_handlers) + .await + .map_err(|e| ErrorCode::StorageOther(format!("vector topn pruning failure, {}", e)))?; + let mut top_queue = FixedLengthPriorityQueue::new(limit); + let mut vector_prune_result_map = HashMap::with_capacity(joint.len()); + for vector_prune_result in joint { + let vector_prune_result = vector_prune_result?; - for (index, (_, block_meta)) in metas.iter().enumerate() { - let Some(location) = block_meta.vector_index_location.clone() else { - return Err(ErrorCode::StorageUnavailable(format!( - "vector index {} file don't exist, need refresh", - self.vector_index.index_name - ))); - }; - - let row_count = block_meta.row_count as usize; - let score_offsets = vector_reader.prune(limit, row_count, &location.0).await?; - - for score_offset in score_offsets { - let vector_score = VectorScore { - index, - row_idx: score_offset.idx, - score: F32::from(score_offset.score), - }; - top_queue.push(vector_score); + for vector_score in &vector_prune_result.scores { + top_queue.push(vector_score.clone()); } + vector_prune_result_map.insert(vector_prune_result.block_idx, vector_prune_result); } + let top_scores = top_queue.into_sorted_vec(); let top_indexes: HashSet = top_scores.iter().map(|s| s.index).collect(); let mut pruned_metas = Vec::with_capacity(top_indexes.len()); - for (index, (mut block_meta_index, block_meta)) in metas.into_iter().enumerate() { + let len = vector_prune_result_map.len(); + for index in 0..len { if !top_indexes.contains(&index) { continue; } + let vector_prune_result = vector_prune_result_map.remove(&index).unwrap(); + let mut vector_scores = Vec::new(); for top_score in &top_scores { if top_score.index == index { vector_scores.push((top_score.row_idx as usize, top_score.score)); } } + let mut block_meta_index = vector_prune_result.block_meta_index; block_meta_index.vector_scores = Some(vector_scores); - pruned_metas.push((block_meta_index, block_meta)); + + pruned_metas.push((block_meta_index, vector_prune_result.block_meta)); + } + + // Perf. + { + let block_size = pruned_metas.iter().map(|(_, m)| m.block_size).sum(); + metrics_inc_blocks_vector_index_pruning_after(pruned_metas.len() as u64); + metrics_inc_bytes_block_vector_index_pruning_after(block_size); + self.pruning_ctx + .pruning_stats + .set_blocks_vector_index_pruning_after(pruned_metas.len() as u64); + metrics_inc_block_vector_index_pruning_milliseconds(start.elapsed().as_millis() as u64); } Ok(pruned_metas) @@ -182,33 +266,103 @@ impl VectorIndexPruner { metas: Vec<(BlockMetaIndex, Arc)>, ) -> Result)>> { // can't use vector index topn to prune, only generate vector scores. - let mut new_metas = Vec::with_capacity(metas.len()); - for (mut block_meta_index, block_meta) in metas.into_iter() { - let Some(location) = block_meta.vector_index_location.clone() else { - return Err(ErrorCode::StorageUnavailable(format!( - "vector index {} file don't exist, need refresh", - self.vector_index.index_name - ))); - }; - - let row_count = block_meta.row_count as usize; - // use row_count as limit to generate scores for all rows. - let score_offsets = vector_reader - .generate_scores(row_count, &location.0) - .await?; - - let mut vector_scores = Vec::with_capacity(row_count); - for score_offset in &score_offsets { - vector_scores.push((score_offset.idx as usize, F32::from(score_offset.score))); + let pruning_runtime = &self.pruning_ctx.pruning_runtime; + let pruning_semaphore = &self.pruning_ctx.pruning_semaphore; + + let mut block_meta_indexes = metas.into_iter().enumerate(); + let pruning_tasks = std::iter::from_fn(move || { + block_meta_indexes + .next() + .map(|(index, (block_meta_index, block_meta))| { + let vector_reader = vector_reader.clone(); + let index_name = self.vector_index.index_name.clone(); + + let v: VectorPruningFuture = Box::new(move |permit: OwnedSemaphorePermit| { + Box::pin(async move { + let _permit = permit; + + let Some(location) = &block_meta.vector_index_location else { + return Err(ErrorCode::StorageUnavailable(format!( + "vector index {} file don't exist, need refresh", + index_name + ))); + }; + let row_count = block_meta.row_count as usize; + let score_offsets = vector_reader + .generate_scores(row_count, &location.0) + .await?; + + let mut vector_scores = Vec::with_capacity(score_offsets.len()); + for score_offset in score_offsets { + let vector_score = VectorScore { + index, + row_idx: score_offset.idx, + score: F32::from(score_offset.score), + }; + vector_scores.push(vector_score); + } + + Ok(VectorPruneResult { + block_idx: index, + scores: vector_scores, + block_meta_index, + block_meta, + }) + }) + }); + v + }) + }); + + let start = Instant::now(); + + let join_handlers = pruning_runtime + .try_spawn_batch_with_owned_semaphore(pruning_semaphore.clone(), pruning_tasks) + .await?; + + let joint = future::try_join_all(join_handlers) + .await + .map_err(|e| ErrorCode::StorageOther(format!("vector pruning failure, {}", e)))?; + + let mut vector_prune_result_map = HashMap::with_capacity(joint.len()); + for vector_prune_result in joint { + let vector_prune_result = vector_prune_result?; + vector_prune_result_map.insert(vector_prune_result.block_idx, vector_prune_result); + } + + let len = vector_prune_result_map.len(); + let mut new_metas = Vec::with_capacity(len); + for index in 0..len { + let vector_prune_result = vector_prune_result_map.remove(&index).unwrap(); + let mut vector_scores = + Vec::with_capacity(vector_prune_result.block_meta.row_count as usize); + for score in &vector_prune_result.scores { + vector_scores.push((score.row_idx as usize, score.score)); } + let mut block_meta_index = vector_prune_result.block_meta_index; block_meta_index.vector_scores = Some(vector_scores); - new_metas.push((block_meta_index, block_meta)); + + new_metas.push((block_meta_index, vector_prune_result.block_meta)); + } + + // Perf. + { + metrics_inc_block_vector_index_pruning_milliseconds(start.elapsed().as_millis() as u64); } Ok(new_metas) } } +// result of block pruning +struct VectorPruneResult { + // the block index in segment + block_idx: usize, + scores: Vec, + block_meta_index: BlockMetaIndex, + block_meta: Arc, +} + #[derive(Clone, Debug, Eq, PartialEq)] struct VectorScore { index: usize, diff --git a/src/query/storages/fuse/src/pruning_pipeline/mod.rs b/src/query/storages/fuse/src/pruning_pipeline/mod.rs index 6256687c213c0..d3761ff8af2f7 100644 --- a/src/query/storages/fuse/src/pruning_pipeline/mod.rs +++ b/src/query/storages/fuse/src/pruning_pipeline/mod.rs @@ -25,6 +25,7 @@ mod segment_prune_transform; mod send_part_info_sink; mod sync_block_prune_transform; mod topn_prune_transform; +mod vector_index_prune_transform; pub use async_block_prune_transform::AsyncBlockPruneTransform; pub use column_oriented_block_prune::ColumnOrientedBlockPruneSink; @@ -40,3 +41,4 @@ pub use send_part_info_sink::SendPartInfoSink; pub use send_part_info_sink::SendPartState; pub use sync_block_prune_transform::SyncBlockPruneTransform; pub use topn_prune_transform::TopNPruneTransform; +pub use vector_index_prune_transform::VectorIndexPruneTransform; diff --git a/src/query/storages/fuse/src/pruning_pipeline/vector_index_prune_transform.rs b/src/query/storages/fuse/src/pruning_pipeline/vector_index_prune_transform.rs new file mode 100644 index 0000000000000..96935cb56e707 --- /dev/null +++ b/src/query/storages/fuse/src/pruning_pipeline/vector_index_prune_transform.rs @@ -0,0 +1,86 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use databend_common_exception::ErrorCode; +use databend_common_exception::Result; +use databend_common_expression::BlockMetaInfoDowncast; +use databend_common_expression::DataBlock; +use databend_common_pipeline_core::processors::InputPort; +use databend_common_pipeline_core::processors::OutputPort; +use databend_common_pipeline_core::processors::ProcessorPtr; +use databend_common_pipeline_transforms::AsyncAccumulatingTransform; +use databend_common_pipeline_transforms::AsyncAccumulatingTransformer; +use databend_storages_common_pruner::BlockMetaIndex; +use databend_storages_common_table_meta::meta::BlockMeta; + +use crate::pruning::VectorIndexPruner; +use crate::pruning_pipeline::block_prune_result_meta::BlockPruneResult; + +// VectorIndexPruneTransform is a processor that will accumulate the block meta and not push to +// downstream until all data is received and pruned. +pub struct VectorIndexPruneTransform { + vector_index_pruner: VectorIndexPruner, + metas: Vec<(BlockMetaIndex, Arc)>, +} + +#[async_trait::async_trait] +impl AsyncAccumulatingTransform for VectorIndexPruneTransform { + const NAME: &'static str = "VectorIndexPruneTransform"; + + async fn transform(&mut self, mut data: DataBlock) -> Result> { + if let Some(ptr) = data.take_meta() { + if let Some(meta) = BlockPruneResult::downcast_from(ptr) { + self.metas.extend(meta.block_metas); + return Ok(None); + } + } + Err(ErrorCode::Internal( + "Cannot downcast meta to BlockPruneResult", + )) + } + + async fn on_finish(&mut self, _output: bool) -> Result> { + self.do_vector_index_prune().await + } +} + +impl VectorIndexPruneTransform { + pub fn create( + input: Arc, + output: Arc, + vector_index_pruner: VectorIndexPruner, + ) -> Result { + Ok(ProcessorPtr::create(AsyncAccumulatingTransformer::create( + input, + output, + VectorIndexPruneTransform { + vector_index_pruner, + metas: vec![], + }, + ))) + } + + async fn do_vector_index_prune(&self) -> Result> { + let pruned = self.vector_index_pruner.prune(self.metas.clone()).await?; + if pruned.is_empty() { + Ok(None) + } else { + Ok(Some(DataBlock::empty_with_meta(BlockPruneResult::create( + pruned, + )))) + } + } +} diff --git a/tests/sqllogictests/suites/ee/09_ee_vector_index/09_0000_vector_index_base.test b/tests/sqllogictests/suites/ee/09_ee_vector_index/09_0000_vector_index_base.test index 19f6103aba605..70e2ca4afb4de 100644 --- a/tests/sqllogictests/suites/ee/09_ee_vector_index/09_0000_vector_index_base.test +++ b/tests/sqllogictests/suites/ee/09_ee_vector_index/09_0000_vector_index_base.test @@ -218,6 +218,27 @@ SELECT id, l2_distance(embedding, [0.02559146, 0.38549544, 0.77889671, 0.3159103 6 0.73338425 11 0.76073563 + +query IF +SELECT id, cosine_distance(embedding, [0.50515236, 0.8561939, 0.87169914, 0.55843271, 0.73689797, 0.49985862, 0.64527255, 0.29313098]::vector(8)) AS similarity FROM t ORDER BY similarity DESC; +---- +9 0.2568838 +16 0.25626028 +13 0.24121934 +2 0.2268933 +14 0.21996021 +5 0.17328858 +4 0.16786504 +6 0.1645267 +7 0.15616316 +15 0.150944 +3 0.14645952 +8 0.14554787 +11 0.14048636 +12 0.060161233 +10 0.033747792 +1 0.009774268 + statement ok use default