From 96d16a07b65fd319c8c9b77db6ca95a3cc9f6384 Mon Sep 17 00:00:00 2001
From: baishen <baishen2009@gmail.com>
Date: Tue, 3 Jun 2025 17:02:18 +0800
Subject: [PATCH 1/4] feat(query): Implement Vector Index with HNSW Algorithm

---
 Cargo.lock                                    |  45 +-
 Cargo.toml                                    |   6 +-
 src/common/metrics/src/metrics/storage.rs     |  50 +-
 src/common/vector/src/distance.rs             |  80 +-
 src/common/vector/src/lib.rs                  |   2 +
 src/query/catalog/src/plan/internal_column.rs |  22 +
 src/query/catalog/src/plan/pushdown.rs        |  24 +
 src/query/config/src/config.rs                |  30 +
 src/query/config/src/inner.rs                 |  12 +
 src/query/ee/tests/it/main.rs                 |   1 +
 src/query/ee/tests/it/vector_index/mod.rs     |  15 +
 src/query/ee/tests/it/vector_index/pruning.rs | 757 ++++++++++++++++++
 src/query/expression/src/evaluator.rs         |  83 ++
 src/query/expression/src/schema.rs            |  12 +-
 src/query/expression/src/type_check.rs        |   6 +-
 src/query/expression/src/types/decimal.rs     |   6 +
 src/query/expression/src/types/number.rs      |   8 +
 src/query/formats/src/field_decoder/nested.rs |  17 +-
 src/query/functions/src/scalars/vector.rs     | 204 ++++-
 .../it/scalars/testdata/function_list.txt     |   7 +
 .../tests/it/scalars/testdata/vector.txt      | 190 ++++-
 .../functions/tests/it/scalars/vector.rs      |  61 +-
 .../service/src/test_kits/block_writer.rs     |   2 +
 .../it/storages/fuse/bloom_index_meta_size.rs |   2 +
 .../fuse/operations/internal_column.rs        |   1 +
 .../operations/mutation/recluster_mutator.rs  |   2 +
 .../mutation/segments_compact_mutator.rs      |   2 +
 .../it/storages/fuse/operations/read_plan.rs  |   2 +
 .../tests/it/storages/fuse/statistics.rs      |   2 +
 .../storages/testdata/configs_table_basic.txt |   3 +
 .../physical_plans/physical_table_scan.rs     |   1 +
 .../sql/src/planner/binder/bind_context.rs    |   5 +
 .../bind_mutation/mutation_expression.rs      |   2 +-
 src/query/sql/src/planner/binder/binder.rs    |   9 +-
 src/query/sql/src/planner/binder/ddl/index.rs |  37 +
 .../planner/binder/internal_column_factory.rs |   6 +
 src/query/sql/src/planner/binder/table.rs     |   1 +
 .../src/planner/optimizer/ir/expr/s_expr.rs   |  15 +-
 src/query/sql/src/planner/plans/scan.rs       |   3 +
 .../sql/src/planner/semantic/type_check.rs    | 197 +++++
 .../storages/common/cache/src/cache_items.rs  |   2 +
 src/query/storages/common/cache/src/caches.rs |  35 +
 .../storages/common/cache/src/manager.rs      |  47 ++
 src/query/storages/common/index/Cargo.toml    |  14 +
 .../index/src/hnsw_index/common/bitpacking.rs | 407 ++++++++++
 .../src/hnsw_index/common/bitpacking_links.rs | 192 +++++
 .../hnsw_index/common/bitpacking_ordered.rs   | 311 +++++++
 .../common/fixed_length_priority_queue.rs     | 112 +++
 .../common/index/src/hnsw_index/common/mod.rs |  22 +
 .../index/src/hnsw_index/common/types.rs      |  48 ++
 .../index/src/hnsw_index/common/utils.rs      |  31 +
 .../index/src/hnsw_index/common/zeros.rs      |  33 +
 .../index/src/hnsw_index/entry_points.rs      | 162 ++++
 .../index/src/hnsw_index/graph_layers.rs      | 291 +++++++
 .../src/hnsw_index/graph_layers_builder.rs    | 571 +++++++++++++
 .../index/src/hnsw_index/graph_links.rs       | 130 +++
 .../src/hnsw_index/graph_links/header.rs      |  52 ++
 .../src/hnsw_index/graph_links/serializer.rs  | 233 ++++++
 .../index/src/hnsw_index/graph_links/view.rs  | 189 +++++
 .../common/index/src/hnsw_index/hnsw.rs       | 374 +++++++++
 .../common/index/src/hnsw_index/mod.rs        |  91 +++
 .../index/src/hnsw_index/point_scorer.rs      | 174 ++++
 .../quantization/encoded_storage.rs           |  68 ++
 .../quantization/encoded_vectors.rs           |  96 +++
 .../quantization/encoded_vectors_u8.rs        | 364 +++++++++
 .../index/src/hnsw_index/quantization/mod.rs  |  24 +
 .../src/hnsw_index/quantization/quantile.rs   |  97 +++
 .../index/src/hnsw_index/search_context.rs    |  61 ++
 .../index/src/hnsw_index/visited_pool.rs      | 180 +++++
 src/query/storages/common/index/src/lib.rs    |   7 +
 .../storages/common/pruner/src/block_meta.rs  |   2 +
 src/query/storages/common/pruner/src/lib.rs   |   2 +-
 .../storages/common/pruner/src/topn_pruner.rs |   6 +-
 .../common/table_meta/src/meta/v2/segment.rs  |  10 +
 .../src/meta/v3/frozen/block_meta.rs          |   2 +
 src/query/storages/fuse/src/constants.rs      |   1 +
 src/query/storages/fuse/src/fuse_table.rs     |   4 +-
 src/query/storages/fuse/src/io/locations.rs   |  21 +
 src/query/storages/fuse/src/io/mod.rs         |   1 +
 .../fuse/src/io/read/meta/meta_readers.rs     |  27 +
 src/query/storages/fuse/src/io/read/mod.rs    |   2 +
 .../fuse/src/io/read/vector_index/mod.rs      |  18 +
 .../read/vector_index/vector_index_loader.rs  | 224 ++++++
 .../read/vector_index/vector_index_reader.rs  | 112 +++
 .../fuse/src/io/write/block_writer.rs         |  38 +
 src/query/storages/fuse/src/io/write/mod.rs   |   3 +
 .../fuse/src/io/write/stream/block_builder.rs |  30 +-
 .../fuse/src/io/write/vector_index_writer.rs  | 247 ++++++
 .../processors/transform_serialize_block.rs   |   7 +
 .../storages/fuse/src/operations/merge.rs     |   7 +
 .../storages/fuse/src/operations/read/util.rs |   1 +
 .../fuse/src/operations/read_partitions.rs    |  14 +-
 .../storages/fuse/src/pruning/block_pruner.rs |   2 +
 .../storages/fuse/src/pruning/fuse_pruner.rs  |  43 +-
 src/query/storages/fuse/src/pruning/mod.rs    |   2 +
 .../fuse/src/pruning/vector_index_pruner.rs   | 230 ++++++
 .../column_oriented_block_prune.rs            |   1 +
 .../pruning_pipeline/topn_prune_transform.rs  |   6 +-
 .../parquet_reader/reader/row_group_reader.rs |   1 +
 .../09_0000_vector_index_base.test            | 157 +++-
 .../functions/02_0063_function_vector.test    |  30 +
 101 files changed, 7486 insertions(+), 113 deletions(-)
 create mode 100644 src/query/ee/tests/it/vector_index/mod.rs
 create mode 100644 src/query/ee/tests/it/vector_index/pruning.rs
 create mode 100644 src/query/storages/common/index/src/hnsw_index/common/bitpacking.rs
 create mode 100644 src/query/storages/common/index/src/hnsw_index/common/bitpacking_links.rs
 create mode 100644 src/query/storages/common/index/src/hnsw_index/common/bitpacking_ordered.rs
 create mode 100644 src/query/storages/common/index/src/hnsw_index/common/fixed_length_priority_queue.rs
 create mode 100644 src/query/storages/common/index/src/hnsw_index/common/mod.rs
 create mode 100644 src/query/storages/common/index/src/hnsw_index/common/types.rs
 create mode 100644 src/query/storages/common/index/src/hnsw_index/common/utils.rs
 create mode 100644 src/query/storages/common/index/src/hnsw_index/common/zeros.rs
 create mode 100644 src/query/storages/common/index/src/hnsw_index/entry_points.rs
 create mode 100644 src/query/storages/common/index/src/hnsw_index/graph_layers.rs
 create mode 100644 src/query/storages/common/index/src/hnsw_index/graph_layers_builder.rs
 create mode 100644 src/query/storages/common/index/src/hnsw_index/graph_links.rs
 create mode 100644 src/query/storages/common/index/src/hnsw_index/graph_links/header.rs
 create mode 100644 src/query/storages/common/index/src/hnsw_index/graph_links/serializer.rs
 create mode 100644 src/query/storages/common/index/src/hnsw_index/graph_links/view.rs
 create mode 100644 src/query/storages/common/index/src/hnsw_index/hnsw.rs
 create mode 100644 src/query/storages/common/index/src/hnsw_index/mod.rs
 create mode 100644 src/query/storages/common/index/src/hnsw_index/point_scorer.rs
 create mode 100644 src/query/storages/common/index/src/hnsw_index/quantization/encoded_storage.rs
 create mode 100644 src/query/storages/common/index/src/hnsw_index/quantization/encoded_vectors.rs
 create mode 100644 src/query/storages/common/index/src/hnsw_index/quantization/encoded_vectors_u8.rs
 create mode 100644 src/query/storages/common/index/src/hnsw_index/quantization/mod.rs
 create mode 100644 src/query/storages/common/index/src/hnsw_index/quantization/quantile.rs
 create mode 100644 src/query/storages/common/index/src/hnsw_index/search_context.rs
 create mode 100644 src/query/storages/common/index/src/hnsw_index/visited_pool.rs
 create mode 100644 src/query/storages/fuse/src/io/read/vector_index/mod.rs
 create mode 100644 src/query/storages/fuse/src/io/read/vector_index/vector_index_loader.rs
 create mode 100644 src/query/storages/fuse/src/io/read/vector_index/vector_index_reader.rs
 create mode 100644 src/query/storages/fuse/src/io/write/vector_index_writer.rs
 create mode 100644 src/query/storages/fuse/src/pruning/vector_index_pruner.rs

diff --git a/Cargo.lock b/Cargo.lock
index 0b6e9bc040c39..551227dddbed7 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -5478,29 +5478,42 @@ version = "0.1.0"
 dependencies = [
  "anyerror",
  "bincode 2.0.1",
+ "bitvec",
+ "bytemuck",
  "bytes",
  "cbordata",
  "databend-common-ast",
  "databend-common-exception",
  "databend-common-expression",
  "databend-common-functions",
+ "databend-common-vector",
  "databend-storages-common-table-meta",
  "divan",
  "fastrace",
+ "feistel-permutation-rs",
  "goldenfile",
+ "itertools 0.13.0",
  "jsonb",
  "levenshtein_automata",
  "log",
  "match-template",
+ "num-traits",
+ "num_cpus",
+ "ordered-float 5.0.0",
+ "parking_lot 0.12.3",
  "parquet",
  "rand 0.8.5",
+ "rayon",
  "roaring",
+ "self_cell",
  "serde",
+ "serde_json",
  "tantivy",
  "tantivy-common",
  "tantivy-fst",
  "thiserror 1.0.69",
  "xorfilter-rs",
+ "zerocopy",
 ]
 
 [[package]]
@@ -6545,6 +6558,12 @@ dependencies = [
  "semver",
 ]
 
+[[package]]
+name = "feistel-permutation-rs"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6db6c829a796418de937efceee04e784abd1c756a82037d84a26f868bc2279f2"
+
 [[package]]
 name = "ff"
 version = "0.13.1"
@@ -8254,12 +8273,6 @@ dependencies = [
  "libc",
 ]
 
-[[package]]
-name = "hermit-abi"
-version = "0.3.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024"
-
 [[package]]
 name = "hermit-abi"
 version = "0.5.1"
@@ -10733,11 +10746,11 @@ dependencies = [
 
 [[package]]
 name = "num_cpus"
-version = "1.16.0"
+version = "1.17.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
+checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b"
 dependencies = [
- "hermit-abi 0.3.9",
+ "hermit-abi 0.5.1",
  "libc",
 ]
 
@@ -13567,6 +13580,12 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "self_cell"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0f7d95a54511e0c7be3f51e8867aa8cf35148d7b9445d44de2f943e2b206e749"
+
 [[package]]
 name = "semver"
 version = "1.0.26"
@@ -17458,18 +17477,18 @@ checksum = "9b3a41ce106832b4da1c065baa4c31cf640cf965fa1483816402b7f6b96f0a64"
 
 [[package]]
 name = "zerocopy"
-version = "0.8.25"
+version = "0.8.26"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb"
+checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f"
 dependencies = [
  "zerocopy-derive",
 ]
 
 [[package]]
 name = "zerocopy-derive"
-version = "0.8.25"
+version = "0.8.26"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef"
+checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181"
 dependencies = [
  "proc-macro2",
  "quote",
diff --git a/Cargo.toml b/Cargo.toml
index f3f8a78f654c7..93d78ab729bf2 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -263,6 +263,7 @@ base64 = "0.22"
 bincode = { version = "2.0.0-rc.3", features = ["serde", "std", "alloc"] }
 bincode_v1 = { package = "bincode", version = "1.3.3" }
 bitpacking = "0.8.0"
+bitvec = "1.0.1"
 blake3 = "1.3.1"
 bollard = { version = "0.17" }
 borsh = { version = "1.2.1", features = ["derive"] }
@@ -311,6 +312,7 @@ enumflags2 = { version = "0.7.7", features = ["serde"] }
 ethnum = { version = "1.5.1" }
 faststr = "0.2"
 feature-set = { version = "0.1.1" }
+feistel-permutation-rs = "0.1.1"
 flatbuffers = "25" # Must use the same version with arrow-ipc
 foreign_vec = "0.1.0"
 form_urlencoded = { version = "1" }
@@ -393,7 +395,7 @@ num = "0.4.0"
 num-bigint = "0.4.6"
 num-derive = "0.4.2"
 num-traits = "0.2.19"
-num_cpus = "1.13.1"
+num_cpus = "1.17"
 object = "0.36.5"
 object_store_opendal = { version = "0.52.0" }
 once_cell = "1.15.0"
@@ -472,6 +474,7 @@ rustls-pemfile = "2"
 rustls-pki-types = "1"
 rustyline = "14"
 scroll = "0.12.0"
+self_cell = "1.2.0"
 semver = "1.0.14"
 serde = { version = "1.0.164", features = ["derive", "rc"] }
 serde_derive = "1"
@@ -542,6 +545,7 @@ wiremock = "0.6"
 wkt = "0.11.1"
 xorf = { version = "0.11.0", default-features = false, features = ["binary-fuse"] }
 xorfilter-rs = "0.5"
+zerocopy = "0.8.26"
 zip = "3.0.0"
 zstd = "0.12.3"
 
diff --git a/src/common/metrics/src/metrics/storage.rs b/src/common/metrics/src/metrics/storage.rs
index a1766d77b1bb5..b60a72acb416d 100644
--- a/src/common/metrics/src/metrics/storage.rs
+++ b/src/common/metrics/src/metrics/storage.rs
@@ -177,6 +177,24 @@ static BLOCK_INVERTED_INDEX_READ_MILLISECONDS: LazyLock<Histogram> = LazyLock::n
 static BLOCK_INVERTED_INDEX_SEARCH_MILLISECONDS: LazyLock<Histogram> = LazyLock::new(|| {
     register_histogram_in_milliseconds("fuse_block_inverted_index_search_milliseconds")
 });
+static BLOCK_VECTOR_INDEX_WRITE_NUMS: LazyLock<Counter> =
+    LazyLock::new(|| register_counter("fuse_block_vector_index_write_nums"));
+static BLOCK_VECTOR_INDEX_WRITE_BYTES: LazyLock<Counter> =
+    LazyLock::new(|| register_counter("fuse_block_vector_index_write_bytes"));
+static BLOCK_VECTOR_INDEX_WRITE_MILLISECONDS: LazyLock<Histogram> = LazyLock::new(|| {
+    register_histogram_in_milliseconds("fuse_block_vector_index_write_milliseconds")
+});
+static BLOCK_VECTOR_INDEX_GENERATE_MILLISECONDS: LazyLock<Histogram> = LazyLock::new(|| {
+    register_histogram_in_milliseconds("fuse_block_vector_index_generate_milliseconds")
+});
+static BLOCK_VECTOR_INDEX_READ_MILLISECONDS: LazyLock<Histogram> = LazyLock::new(|| {
+    register_histogram_in_milliseconds("fuse_block_vector_index_read_milliseconds")
+});
+static BLOCK_VECTOR_INDEX_SEARCH_MILLISECONDS: LazyLock<Histogram> = LazyLock::new(|| {
+    register_histogram_in_milliseconds("fuse_block_vector_index_search_milliseconds")
+});
+static BLOCK_VECTOR_INDEX_READ_BYTES: LazyLock<Counter> =
+    LazyLock::new(|| register_counter("fuse_block_vector_index_read_bytes"));
 static COMPACT_BLOCK_READ_NUMS: LazyLock<Counter> =
     LazyLock::new(|| register_counter("fuse_compact_block_read_nums"));
 static COMPACT_BLOCK_READ_BYTES: LazyLock<Counter> =
@@ -548,6 +566,10 @@ pub fn metrics_inc_block_index_write_bytes(c: u64) {
     BLOCK_INDEX_WRITE_BYTES.inc_by(c);
 }
 
+pub fn metrics_inc_block_index_read_bytes(c: u64) {
+    BLOCK_INDEX_READ_BYTES.inc_by(c);
+}
+
 pub fn metrics_inc_block_index_write_milliseconds(c: u64) {
     BLOCK_INDEX_WRITE_MILLISECONDS.observe(c as f64);
 }
@@ -576,8 +598,32 @@ pub fn metrics_inc_block_inverted_index_search_milliseconds(c: u64) {
     BLOCK_INVERTED_INDEX_SEARCH_MILLISECONDS.observe(c as f64);
 }
 
-pub fn metrics_inc_block_index_read_bytes(c: u64) {
-    BLOCK_INDEX_READ_BYTES.inc_by(c);
+pub fn metrics_inc_block_vector_index_write_nums(c: u64) {
+    BLOCK_VECTOR_INDEX_WRITE_NUMS.inc_by(c);
+}
+
+pub fn metrics_inc_block_vector_index_write_bytes(c: u64) {
+    BLOCK_VECTOR_INDEX_WRITE_BYTES.inc_by(c);
+}
+
+pub fn metrics_inc_block_vector_index_write_milliseconds(c: u64) {
+    BLOCK_VECTOR_INDEX_WRITE_MILLISECONDS.observe(c as f64);
+}
+
+pub fn metrics_inc_block_vector_index_generate_milliseconds(c: u64) {
+    BLOCK_VECTOR_INDEX_GENERATE_MILLISECONDS.observe(c as f64);
+}
+
+pub fn metrics_inc_block_vector_index_read_milliseconds(c: u64) {
+    BLOCK_VECTOR_INDEX_READ_MILLISECONDS.observe(c as f64);
+}
+
+pub fn metrics_inc_block_vector_index_search_milliseconds(c: u64) {
+    BLOCK_VECTOR_INDEX_SEARCH_MILLISECONDS.observe(c as f64);
+}
+
+pub fn metrics_inc_block_vector_index_read_bytes(c: u64) {
+    BLOCK_VECTOR_INDEX_READ_BYTES.inc_by(c);
 }
 
 /// Compact metrics.
diff --git a/src/common/vector/src/distance.rs b/src/common/vector/src/distance.rs
index 97953374d239a..aff7caa4a26ab 100644
--- a/src/common/vector/src/distance.rs
+++ b/src/common/vector/src/distance.rs
@@ -16,69 +16,101 @@ use databend_common_exception::ErrorCode;
 use databend_common_exception::Result;
 use ndarray::ArrayView;
 
-pub fn cosine_distance(from: &[f32], to: &[f32]) -> Result<f32> {
-    if from.len() != to.len() {
+pub fn cosine_distance(lhs: &[f32], rhs: &[f32]) -> Result<f32> {
+    if lhs.len() != rhs.len() {
         return Err(ErrorCode::InvalidArgument(format!(
             "Vector length not equal: {:} != {:}",
-            from.len(),
-            to.len(),
+            lhs.len(),
+            rhs.len(),
         )));
     }
 
-    let a = ArrayView::from(from);
-    let b = ArrayView::from(to);
+    let a = ArrayView::from(lhs);
+    let b = ArrayView::from(rhs);
     let aa_sum = (&a * &a).sum();
     let bb_sum = (&b * &b).sum();
 
     Ok(1.0 - (&a * &b).sum() / ((aa_sum).sqrt() * (bb_sum).sqrt()))
 }
 
-pub fn l2_distance(from: &[f32], to: &[f32]) -> Result<f32> {
-    if from.len() != to.len() {
+pub fn l1_distance(lhs: &[f32], rhs: &[f32]) -> Result<f32> {
+    if lhs.len() != rhs.len() {
         return Err(ErrorCode::InvalidArgument(format!(
             "Vector length not equal: {:} != {:}",
-            from.len(),
-            to.len(),
+            lhs.len(),
+            rhs.len(),
         )));
     }
 
-    Ok(from
+    Ok(lhs
         .iter()
-        .zip(to.iter())
+        .zip(rhs.iter())
+        .map(|(a, b)| (a - b).abs())
+        .sum::<f32>())
+}
+
+pub fn l2_distance(lhs: &[f32], rhs: &[f32]) -> Result<f32> {
+    if lhs.len() != rhs.len() {
+        return Err(ErrorCode::InvalidArgument(format!(
+            "Vector length not equal: {:} != {:}",
+            lhs.len(),
+            rhs.len(),
+        )));
+    }
+
+    Ok(lhs
+        .iter()
+        .zip(rhs.iter())
         .map(|(a, b)| (a - b).powi(2))
         .sum::<f32>()
         .sqrt())
 }
 
-pub fn cosine_distance_64(from: &[f64], to: &[f64]) -> Result<f64> {
-    if from.len() != to.len() {
+pub fn cosine_distance_64(lhs: &[f64], rhs: &[f64]) -> Result<f64> {
+    if lhs.len() != rhs.len() {
         return Err(ErrorCode::InvalidArgument(format!(
             "Vector length not equal: {:} != {:}",
-            from.len(),
-            to.len(),
+            lhs.len(),
+            rhs.len(),
         )));
     }
 
-    let a = ArrayView::from(from);
-    let b = ArrayView::from(to);
+    let a = ArrayView::from(lhs);
+    let b = ArrayView::from(rhs);
     let aa_sum = (&a * &a).sum();
     let bb_sum = (&b * &b).sum();
 
     Ok(1.0 - (&a * &b).sum() / ((aa_sum).sqrt() * (bb_sum).sqrt()))
 }
 
-pub fn l2_distance_64(from: &[f64], to: &[f64]) -> Result<f64> {
-    if from.len() != to.len() {
+pub fn l1_distance_64(lhs: &[f64], rhs: &[f64]) -> Result<f64> {
+    if lhs.len() != rhs.len() {
+        return Err(ErrorCode::InvalidArgument(format!(
+            "Vector length not equal: {:} != {:}",
+            lhs.len(),
+            rhs.len(),
+        )));
+    }
+
+    Ok(lhs
+        .iter()
+        .zip(rhs.iter())
+        .map(|(a, b)| (a - b).abs())
+        .sum::<f64>())
+}
+
+pub fn l2_distance_64(lhs: &[f64], rhs: &[f64]) -> Result<f64> {
+    if lhs.len() != rhs.len() {
         return Err(ErrorCode::InvalidArgument(format!(
             "Vector length not equal: {:} != {:}",
-            from.len(),
-            to.len(),
+            lhs.len(),
+            rhs.len(),
         )));
     }
 
-    Ok(from
+    Ok(lhs
         .iter()
-        .zip(to.iter())
+        .zip(rhs.iter())
         .map(|(a, b)| (a - b).powi(2))
         .sum::<f64>()
         .sqrt())
diff --git a/src/common/vector/src/lib.rs b/src/common/vector/src/lib.rs
index 2988f0db11730..593c9252e6c23 100644
--- a/src/common/vector/src/lib.rs
+++ b/src/common/vector/src/lib.rs
@@ -16,5 +16,7 @@ mod distance;
 
 pub use distance::cosine_distance;
 pub use distance::cosine_distance_64;
+pub use distance::l1_distance;
+pub use distance::l1_distance_64;
 pub use distance::l2_distance;
 pub use distance::l2_distance_64;
diff --git a/src/query/catalog/src/plan/internal_column.rs b/src/query/catalog/src/plan/internal_column.rs
index 412d32d29f37c..a026ffb1f516a 100644
--- a/src/query/catalog/src/plan/internal_column.rs
+++ b/src/query/catalog/src/plan/internal_column.rs
@@ -42,6 +42,7 @@ use databend_common_expression::SEARCH_MATCHED_COLUMN_ID;
 use databend_common_expression::SEARCH_SCORE_COLUMN_ID;
 use databend_common_expression::SEGMENT_NAME_COLUMN_ID;
 use databend_common_expression::SNAPSHOT_NAME_COLUMN_ID;
+use databend_common_expression::VECTOR_SCORE_COLUMN_ID;
 use databend_storages_common_table_meta::meta::try_extract_uuid_str_from_path;
 use databend_storages_common_table_meta::meta::NUM_BLOCK_ID_BITS;
 
@@ -106,6 +107,8 @@ pub struct InternalColumnMeta {
     pub inner: Option<BlockMetaInfoPtr>,
     // The search matched rows and optional scores in the block.
     pub matched_rows: Option<Vec<(usize, Option<F32>)>>,
+    // The vector topn rows and scores in the block.
+    pub vector_scores: Option<Vec<(usize, F32)>>,
 }
 
 #[typetag::serde(name = "internal_column_meta")]
@@ -142,6 +145,9 @@ pub enum InternalColumnType {
     SearchMatched,
     SearchScore,
 
+    // vector columns
+    VectorScore,
+
     FileName,
     FileRowNumber,
 }
@@ -176,6 +182,7 @@ impl InternalColumn {
             )),
             InternalColumnType::SearchMatched => TableDataType::Boolean,
             InternalColumnType::SearchScore => TableDataType::Number(NumberDataType::Float32),
+            InternalColumnType::VectorScore => TableDataType::Number(NumberDataType::Float32),
             InternalColumnType::FileName => TableDataType::String,
             InternalColumnType::FileRowNumber => TableDataType::Number(NumberDataType::UInt64),
         }
@@ -200,6 +207,7 @@ impl InternalColumn {
             InternalColumnType::BaseBlockIds => BASE_BLOCK_IDS_COLUMN_ID,
             InternalColumnType::SearchMatched => SEARCH_MATCHED_COLUMN_ID,
             InternalColumnType::SearchScore => SEARCH_SCORE_COLUMN_ID,
+            InternalColumnType::VectorScore => VECTOR_SCORE_COLUMN_ID,
             InternalColumnType::FileName => FILENAME_COLUMN_ID,
             InternalColumnType::FileRowNumber => FILE_ROW_NUMBER_COLUMN_ID,
         }
@@ -291,6 +299,20 @@ impl InternalColumn {
                 }
                 Float32Type::from_data(scores).into()
             }
+            InternalColumnType::VectorScore => {
+                assert!(meta.vector_scores.is_some());
+                let vector_scores = meta.vector_scores.as_ref().unwrap();
+
+                // The smaller the score, the closer the distance.
+                // Fill other rows with the maximum value and they will be filtered out.
+                let mut scores = vec![F32::from(f32::MAX); num_rows];
+                for (idx, score) in vector_scores.iter() {
+                    if let Some(val) = scores.get_mut(*idx) {
+                        *val = *score;
+                    }
+                }
+                Float32Type::from_data(scores).into()
+            }
             InternalColumnType::FileName | InternalColumnType::FileRowNumber => {
                 todo!("generate_column_values not support for file related")
             }
diff --git a/src/query/catalog/src/plan/pushdown.rs b/src/query/catalog/src/plan/pushdown.rs
index cfad14da502a2..d090d4b47d59d 100644
--- a/src/query/catalog/src/plan/pushdown.rs
+++ b/src/query/catalog/src/plan/pushdown.rs
@@ -19,6 +19,7 @@ use std::fmt::Debug;
 use databend_common_ast::ast::SampleConfig;
 use databend_common_expression::types::DataType;
 use databend_common_expression::types::F32;
+use databend_common_expression::ColumnId;
 use databend_common_expression::DataSchema;
 use databend_common_expression::RemoteExpr;
 use databend_common_expression::Scalar;
@@ -131,6 +132,23 @@ pub struct InvertedIndexInfo {
     pub inverted_index_option: Option<InvertedIndexOption>,
 }
 
+/// Information about vector index.
+#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Eq)]
+pub struct VectorIndexInfo {
+    /// The index name.
+    pub index_name: String,
+    /// The index version.
+    pub index_version: String,
+    /// The index options: m, ef_construct, ..
+    pub index_options: BTreeMap<String, String>,
+    /// The column id of vector column.
+    pub column_id: ColumnId,
+    /// The distance function name: l1_distance, l2_distance, cosine_distance, ..
+    pub func_name: String,
+    /// The query vector value.
+    pub query_values: Vec<F32>,
+}
+
 /// Extras is a wrapper for push down items.
 #[derive(serde::Serialize, serde::Deserialize, Clone, Default, Debug, PartialEq, Eq)]
 pub struct PushDownInfo {
@@ -161,6 +179,8 @@ pub struct PushDownInfo {
     pub change_type: Option<ChangeType>,
     /// Optional inverted index
     pub inverted_index: Option<InvertedIndexInfo>,
+    /// Optional vector index
+    pub vector_index: Option<VectorIndexInfo>,
     /// Used by table sample
     pub sample: Option<SampleConfig>,
 }
@@ -236,6 +256,10 @@ impl PushDownInfo {
         }
     }
 
+    pub fn vector_topn(&self) -> bool {
+        !self.order_by.is_empty() && self.limit.is_some() && self.vector_index.is_some()
+    }
+
     pub fn prewhere_of_push_downs(push_downs: Option<&PushDownInfo>) -> Option<PrewhereInfo> {
         if let Some(PushDownInfo { prewhere, .. }) = push_downs {
             prewhere.clone()
diff --git a/src/query/config/src/config.rs b/src/query/config/src/config.rs
index d357d7aae491a..d69a95ad30120 100644
--- a/src/query/config/src/config.rs
+++ b/src/query/config/src/config.rs
@@ -3285,6 +3285,30 @@ pub struct CacheConfig {
     )]
     pub inverted_index_filter_memory_ratio: u64,
 
+    /// Max number of cached vector index meta objects. Set it to 0 to disable it.
+    #[clap(
+        long = "cache-vector-index-meta-count",
+        value_name = "VALUE",
+        default_value = "3000"
+    )]
+    pub vector_index_meta_count: u64,
+
+    /// Max bytes of cached vector index filters used. Set it to 0 to disable it.
+    #[clap(
+        long = "cache-vector-index-filter-size",
+        value_name = "VALUE",
+        default_value = "2147483648"
+    )]
+    pub vector_index_filter_size: u64,
+
+    /// Max percentage of in memory vector index filter cache relative to whole memory. By default it is 0 (disabled).
+    #[clap(
+        long = "cache-vector-index-filter-memory-ratio",
+        value_name = "VALUE",
+        default_value = "0"
+    )]
+    pub vector_index_filter_memory_ratio: u64,
+
     #[clap(
         long = "cache-table-prune-partitions-count",
         value_name = "VALUE",
@@ -3609,6 +3633,9 @@ mod cache_config_converters {
                 inverted_index_meta_count: value.inverted_index_meta_count,
                 inverted_index_filter_size: value.inverted_index_filter_size,
                 inverted_index_filter_memory_ratio: value.inverted_index_filter_memory_ratio,
+                vector_index_meta_count: value.vector_index_meta_count,
+                vector_index_filter_size: value.vector_index_filter_size,
+                vector_index_filter_memory_ratio: value.vector_index_filter_memory_ratio,
                 table_prune_partitions_count: value.table_prune_partitions_count,
                 data_cache_storage: value.data_cache_storage.try_into()?,
                 table_data_cache_population_queue_size: value
@@ -3645,6 +3672,9 @@ mod cache_config_converters {
                 inverted_index_meta_count: value.inverted_index_meta_count,
                 inverted_index_filter_size: value.inverted_index_filter_size,
                 inverted_index_filter_memory_ratio: value.inverted_index_filter_memory_ratio,
+                vector_index_meta_count: value.vector_index_meta_count,
+                vector_index_filter_size: value.vector_index_filter_size,
+                vector_index_filter_memory_ratio: value.vector_index_filter_memory_ratio,
                 table_prune_partitions_count: value.table_prune_partitions_count,
                 data_cache_storage: value.data_cache_storage.into(),
                 data_cache_key_reload_policy: value.data_cache_key_reload_policy.into(),
diff --git a/src/query/config/src/inner.rs b/src/query/config/src/inner.rs
index 32ee0791d0f6d..2807e1aaee817 100644
--- a/src/query/config/src/inner.rs
+++ b/src/query/config/src/inner.rs
@@ -616,6 +616,15 @@ pub struct CacheConfig {
     /// Max percentage of in memory inverted index filters cache relative to whole memory. By default it is 0 (disabled).
     pub inverted_index_filter_memory_ratio: u64,
 
+    /// Max number of cached vector index meta objects. Set it to 0 to disable it.
+    pub vector_index_meta_count: u64,
+
+    /// Max bytes of cached vector index filters used. Set it to 0 to disable it.
+    pub vector_index_filter_size: u64,
+
+    /// Max percentage of in memory vector index filters cache relative to whole memory. By default it is 0 (disabled).
+    pub vector_index_filter_memory_ratio: u64,
+
     pub data_cache_storage: CacheStorageTypeConfig,
 
     /// Max size of external cache population queue length
@@ -749,6 +758,9 @@ impl Default for CacheConfig {
             inverted_index_meta_count: 3000,
             inverted_index_filter_size: 2147483648,
             inverted_index_filter_memory_ratio: 0,
+            vector_index_meta_count: 3000,
+            vector_index_filter_size: 2147483648,
+            vector_index_filter_memory_ratio: 0,
             table_prune_partitions_count: 256,
             data_cache_storage: Default::default(),
             table_data_cache_population_queue_size: 0,
diff --git a/src/query/ee/tests/it/main.rs b/src/query/ee/tests/it/main.rs
index 8b5c2646c8640..4bf5e344b6e6b 100644
--- a/src/query/ee/tests/it/main.rs
+++ b/src/query/ee/tests/it/main.rs
@@ -19,3 +19,4 @@ mod license;
 mod ngram_index;
 mod storages;
 mod stream;
+mod vector_index;
diff --git a/src/query/ee/tests/it/vector_index/mod.rs b/src/query/ee/tests/it/vector_index/mod.rs
new file mode 100644
index 0000000000000..56193a6b31dee
--- /dev/null
+++ b/src/query/ee/tests/it/vector_index/mod.rs
@@ -0,0 +1,15 @@
+// Copyright 2023 Databend Cloud
+//
+// Licensed under the Elastic License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.elastic.co/licensing/elastic-license
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+mod pruning;
diff --git a/src/query/ee/tests/it/vector_index/pruning.rs b/src/query/ee/tests/it/vector_index/pruning.rs
new file mode 100644
index 0000000000000..9b862ed7071ff
--- /dev/null
+++ b/src/query/ee/tests/it/vector_index/pruning.rs
@@ -0,0 +1,757 @@
+// Copyright 2023 Databend Cloud
+//
+// Licensed under the Elastic License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.elastic.co/licensing/elastic-license
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::BTreeMap;
+use std::sync::Arc;
+
+use databend_common_ast::ast::Engine;
+use databend_common_base::base::tokio;
+use databend_common_catalog::plan::PushDownInfo;
+use databend_common_catalog::plan::VectorIndexInfo;
+use databend_common_exception::Result;
+use databend_common_expression::types::number::UInt64Type;
+use databend_common_expression::types::DataType;
+use databend_common_expression::types::NumberDataType;
+use databend_common_expression::types::VectorColumn;
+use databend_common_expression::types::VectorDataType;
+use databend_common_expression::types::F32;
+use databend_common_expression::Column;
+use databend_common_expression::DataBlock;
+use databend_common_expression::FromData;
+use databend_common_expression::RemoteExpr;
+use databend_common_expression::TableDataType;
+use databend_common_expression::TableField;
+use databend_common_expression::TableSchemaRef;
+use databend_common_expression::TableSchemaRefExt;
+use databend_common_meta_app::schema::CreateOption;
+use databend_common_meta_app::schema::TableIndex;
+use databend_common_meta_app::schema::TableIndexType;
+use databend_common_sql::plans::CreateTablePlan;
+use databend_common_sql::BloomIndexColumns;
+use databend_common_storages_fuse::pruning::create_segment_location_vector;
+use databend_common_storages_fuse::pruning::FusePruner;
+use databend_common_storages_fuse::FuseTable;
+use databend_enterprise_query::test_kits::context::EESetup;
+use databend_query::interpreters::CreateTableInterpreter;
+use databend_query::interpreters::Interpreter;
+use databend_query::sessions::QueryContext;
+use databend_query::sessions::TableContext;
+use databend_query::storages::fuse::FUSE_OPT_KEY_BLOCK_PER_SEGMENT;
+use databend_query::storages::fuse::FUSE_OPT_KEY_ROW_PER_BLOCK;
+use databend_query::test_kits::*;
+use databend_storages_common_pruner::BlockMetaIndex;
+use databend_storages_common_table_meta::meta::BlockMeta;
+use databend_storages_common_table_meta::meta::TableSnapshot;
+use databend_storages_common_table_meta::table::OPT_KEY_DATABASE_ID;
+use opendal::Operator;
+
+async fn apply_block_pruning(
+    table_snapshot: Arc<TableSnapshot>,
+    schema: TableSchemaRef,
+    push_down: &Option<PushDownInfo>,
+    ctx: Arc<QueryContext>,
+    dal: Operator,
+    bloom_index_cols: BloomIndexColumns,
+) -> Result<Vec<(BlockMetaIndex, Arc<BlockMeta>)>> {
+    let ctx: Arc<dyn TableContext> = ctx;
+    let segment_locs = table_snapshot.segments.clone();
+    let segment_locs = create_segment_location_vector(segment_locs, None);
+
+    FusePruner::create(&ctx, dal, schema, push_down, bloom_index_cols, vec![], None)?
+        .read_pruning(segment_locs)
+        .await
+}
+
+#[tokio::test(flavor = "multi_thread")]
+async fn test_block_pruner() -> Result<()> {
+    let fixture = TestFixture::setup_with_custom(EESetup::new()).await?;
+
+    let ctx = fixture.new_query_ctx().await?;
+    fixture.create_default_database().await?;
+
+    let test_tbl_name = "test_vector_index";
+    let test_schema = TableSchemaRefExt::create(vec![
+        TableField::new("id", TableDataType::Number(NumberDataType::UInt64)),
+        TableField::new(
+            "embedding",
+            TableDataType::Vector(VectorDataType::Float32(4)),
+        ),
+    ]);
+
+    let row_per_block = 10;
+    let num_blocks_opt = row_per_block.to_string();
+
+    let index_name = "idx1".to_string();
+    let index_version = "test1".to_string();
+
+    let mut index_options = BTreeMap::new();
+    index_options.insert("m".to_string(), "10".to_string());
+    index_options.insert("ef_construct".to_string(), "40".to_string());
+    index_options.insert("distance".to_string(), "cosine,l1,l2".to_string());
+    let index_column_id = 1;
+    let table_index = TableIndex {
+        index_type: TableIndexType::Vector,
+        name: index_name.clone(),
+        column_ids: vec![index_column_id],
+        sync_creation: true,
+        version: index_version.clone(),
+        options: index_options.clone(),
+    };
+    let mut table_indexes = BTreeMap::new();
+    table_indexes.insert("idx1".to_string(), table_index);
+
+    // create test table
+    let create_table_plan = CreateTablePlan {
+        catalog: "default".to_owned(),
+        create_option: CreateOption::Create,
+        tenant: fixture.default_tenant(),
+        database: fixture.default_db_name(),
+        table: test_tbl_name.to_string(),
+        schema: test_schema.clone(),
+        engine: Engine::Fuse,
+        engine_options: Default::default(),
+        storage_params: None,
+        options: [
+            (FUSE_OPT_KEY_ROW_PER_BLOCK.to_owned(), num_blocks_opt),
+            (FUSE_OPT_KEY_BLOCK_PER_SEGMENT.to_owned(), "5".to_owned()),
+            (OPT_KEY_DATABASE_ID.to_owned(), "1".to_owned()),
+        ]
+        .into(),
+        field_comments: vec![],
+        as_select: None,
+        cluster_key: None,
+        table_indexes: Some(table_indexes),
+        attached_columns: None,
+        table_partition: None,
+        table_properties: None,
+    };
+
+    let interpreter = CreateTableInterpreter::try_create(ctx.clone(), create_table_plan)?;
+    let _ = interpreter.execute(ctx.clone()).await?;
+
+    // get table
+    let catalog = ctx.get_catalog("default").await?;
+    let table = catalog
+        .get_table(
+            &fixture.default_tenant(),
+            fixture.default_db_name().as_str(),
+            test_tbl_name,
+        )
+        .await?;
+
+    // prepare test blocks
+    let vals0: Vec<f32> = vec![
+        -0.6886994,
+        0.594091,
+        0.90251666,
+        -0.5796461,
+        -0.82056284,
+        0.80095357,
+        0.6307791,
+        -0.10274009,
+        0.80994654,
+        0.17736527,
+        -0.65107286,
+        -0.34088722,
+        -0.06466371,
+        -0.20792475,
+        0.15237674,
+        0.51079565,
+        -0.6937013,
+        -0.5297969,
+        0.7171806,
+        0.785739,
+        -0.65965945,
+        -0.81779677,
+        0.35969305,
+        -0.46954358,
+        -0.8181405,
+        -0.6114142,
+        -0.87969273,
+        -0.00383717,
+        0.918081,
+        -0.08214826,
+        -0.2705187,
+        -0.39889243,
+        0.6290396,
+        0.9973043,
+        -0.3085359,
+        0.8468473,
+        -0.32078063,
+        0.67827964,
+        0.9978988,
+        -0.30051866,
+    ];
+    let vals0 = unsafe { std::mem::transmute::<Vec<f32>, Vec<F32>>(vals0) };
+    let block0 = DataBlock::new_from_columns(vec![
+        UInt64Type::from_data(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
+        Column::Vector(VectorColumn::Float32((vals0.into(), 4))),
+    ]);
+    let vals1: Vec<f32> = vec![
+        -0.8662579,
+        0.21157496,
+        0.04832743,
+        0.01034609,
+        -0.8213428,
+        -0.07455289,
+        0.79567593,
+        0.22692858,
+        -0.13815483,
+        0.04082361,
+        -0.04671623,
+        0.07829991,
+        -0.4285805,
+        -0.83638775,
+        0.16173266,
+        -0.6230965,
+        0.48879236,
+        -0.8992002,
+        0.6461996,
+        -0.6104055,
+        0.7835251,
+        0.6034467,
+        0.12212521,
+        0.49520096,
+        0.5970688,
+        0.45890963,
+        -0.05623427,
+        -0.49175563,
+        -0.8342597,
+        -0.5295784,
+        0.6283545,
+        0.08985507,
+        -0.60963225,
+        -0.9484875,
+        -0.40452087,
+        -0.87066746,
+        0.48526454,
+        0.03684357,
+        0.63801855,
+        -0.49714512,
+    ];
+    let vals1 = unsafe { std::mem::transmute::<Vec<f32>, Vec<F32>>(vals1) };
+    let block1 = DataBlock::new_from_columns(vec![
+        UInt64Type::from_data(vec![11, 12, 13, 14, 15, 16, 17, 18, 19, 20]),
+        Column::Vector(VectorColumn::Float32((vals1.into(), 4))),
+    ]);
+    let vals2: Vec<f32> = vec![
+        -0.18905626,
+        0.6927208,
+        0.7869001,
+        0.22925916,
+        -0.5255186,
+        0.14997292,
+        -0.5750151,
+        0.51772356,
+        -0.951746,
+        0.9412492,
+        0.4678889,
+        0.46652728,
+        0.61070764,
+        -0.66532606,
+        -0.76100147,
+        -0.12496163,
+        -0.6957283,
+        0.8386284,
+        -0.15284961,
+        -0.2555948,
+        -0.22072262,
+        0.42040154,
+        0.99745035,
+        0.6271642,
+        0.9605643,
+        -0.65621495,
+        -0.4781119,
+        0.6010602,
+        0.7315234,
+        -0.03415851,
+        -0.12357767,
+        0.09560691,
+        0.21121186,
+        0.2585377,
+        0.5601369,
+        0.23845962,
+        -0.35424188,
+        0.17996286,
+        -0.4941602,
+        -0.20577724,
+    ];
+    let vals2 = unsafe { std::mem::transmute::<Vec<f32>, Vec<F32>>(vals2) };
+    let block2 = DataBlock::new_from_columns(vec![
+        UInt64Type::from_data(vec![21, 22, 23, 24, 25, 26, 27, 28, 29, 30]),
+        Column::Vector(VectorColumn::Float32((vals2.into(), 4))),
+    ]);
+    let vals3: Vec<f32> = vec![
+        0.8412355,
+        0.3082751,
+        0.59870875,
+        -0.54127926,
+        -0.9425862,
+        -0.4464907,
+        -0.82330227,
+        -0.33117214,
+        0.13021936,
+        -0.6236809,
+        0.96284235,
+        -0.5690468,
+        -0.2858306,
+        0.4726673,
+        -0.1239042,
+        -0.6170608,
+        -0.00327663,
+        -0.83231056,
+        0.16952398,
+        -0.01978558,
+        0.6004247,
+        0.09402651,
+        0.9722124,
+        -0.46700177,
+        0.59854394,
+        0.43756092,
+        -0.60489684,
+        -0.77390605,
+        -0.33195212,
+        0.20036773,
+        -0.78870934,
+        0.06877671,
+        0.90521765,
+        0.76765245,
+        -0.5661686,
+        -0.85996264,
+        -0.8881472,
+        0.7931559,
+        0.2554919,
+        -0.8342734,
+    ];
+    let vals3 = unsafe { std::mem::transmute::<Vec<f32>, Vec<F32>>(vals3) };
+    let block3 = DataBlock::new_from_columns(vec![
+        UInt64Type::from_data(vec![31, 32, 33, 34, 35, 36, 37, 38, 39, 40]),
+        Column::Vector(VectorColumn::Float32((vals3.into(), 4))),
+    ]);
+    let vals4: Vec<f32> = vec![
+        -0.07214834,
+        -0.45140868,
+        0.52644473,
+        -0.9244883,
+        -0.30683544,
+        -0.54323095,
+        -0.21925122,
+        -0.12423284,
+        -0.8629535,
+        0.58288944,
+        0.75837606,
+        0.03510276,
+        -0.8564059,
+        -0.03417623,
+        -0.07238109,
+        0.58050597,
+        0.7454117,
+        -0.27445704,
+        0.45540568,
+        -0.5408085,
+        -0.780661,
+        0.6657731,
+        -0.97462314,
+        0.8857822,
+        0.02701622,
+        0.04349842,
+        0.5408021,
+        0.7438895,
+        -0.44429415,
+        0.77314705,
+        -0.36297366,
+        -0.6039303,
+        0.19068193,
+        0.14782214,
+        0.75198305,
+        -0.10257443,
+        -0.08388132,
+        -0.7079838,
+        -0.45469823,
+        0.4560124,
+    ];
+    let vals4 = unsafe { std::mem::transmute::<Vec<f32>, Vec<F32>>(vals4) };
+    let block4 = DataBlock::new_from_columns(vec![
+        UInt64Type::from_data(vec![41, 42, 43, 44, 45, 46, 47, 48, 49, 50]),
+        Column::Vector(VectorColumn::Float32((vals4.into(), 4))),
+    ]);
+    let vals5: Vec<f32> = vec![
+        -0.1999165,
+        0.52322525,
+        -0.337038,
+        -0.90144914,
+        -0.8406314,
+        -0.5335526,
+        -0.95726347,
+        0.33673206,
+        -0.8691562,
+        0.48139447,
+        -0.6788517,
+        0.3771608,
+        0.4059562,
+        -0.58860403,
+        -0.428289,
+        0.32089558,
+        -0.3011892,
+        0.60242313,
+        -0.87302023,
+        -0.25639316,
+        -0.9859232,
+        0.29515472,
+        0.55974996,
+        -0.8190884,
+        -0.08609874,
+        -0.50538206,
+        0.0652289,
+        0.7410794,
+        -0.59104115,
+        0.8998315,
+        0.31411764,
+        0.5163839,
+        0.25237387,
+        0.02671343,
+        -0.8648633,
+        0.95094275,
+        -0.6676619,
+        0.62161124,
+        0.6938727,
+        -0.10332275,
+    ];
+    let vals5 = unsafe { std::mem::transmute::<Vec<f32>, Vec<F32>>(vals5) };
+    let block5 = DataBlock::new_from_columns(vec![
+        UInt64Type::from_data(vec![51, 52, 53, 54, 55, 56, 57, 58, 59, 60]),
+        Column::Vector(VectorColumn::Float32((vals5.into(), 4))),
+    ]);
+    let vals6: Vec<f32> = vec![
+        0.5895334,
+        -0.7343663,
+        -0.02117946,
+        0.1402015,
+        0.6598045,
+        -0.722716,
+        -0.40154833,
+        -0.10447401,
+        -0.78196186,
+        0.436223,
+        -0.8290139,
+        0.22458494,
+        -0.01400176,
+        0.3236723,
+        0.17722614,
+        0.9377708,
+        0.09351188,
+        0.8986833,
+        -0.8690766,
+        0.10546188,
+        -0.2846303,
+        -0.454967,
+        -0.5632622,
+        0.46904188,
+        -0.39408457,
+        -0.1404441,
+        -0.5426498,
+        -0.7066665,
+        0.8154848,
+        0.92514247,
+        -0.449755,
+        0.62942183,
+        0.5758866,
+        0.8156669,
+        -0.15692636,
+        -0.15390746,
+        0.457048,
+        0.47833237,
+        0.63010204,
+        0.81386733,
+    ];
+    let vals6 = unsafe { std::mem::transmute::<Vec<f32>, Vec<F32>>(vals6) };
+    let block6 = DataBlock::new_from_columns(vec![
+        UInt64Type::from_data(vec![61, 62, 63, 64, 65, 66, 67, 68, 69, 70]),
+        Column::Vector(VectorColumn::Float32((vals6.into(), 4))),
+    ]);
+    let vals7: Vec<f32> = vec![
+        0.2055598,
+        -0.9889231,
+        0.48384285,
+        0.6735521,
+        0.42140472,
+        -0.56612134,
+        -0.3547931,
+        0.37290242,
+        -0.63698244,
+        0.25703365,
+        -0.6497194,
+        -0.00122721,
+        0.01125184,
+        -0.32437629,
+        -0.23926528,
+        -0.13202162,
+        -0.37527475,
+        -0.23734985,
+        0.03072986,
+        -0.08610785,
+        0.09782696,
+        -0.05098151,
+        -0.01559174,
+        -0.59764004,
+        -0.48390508,
+        0.71857893,
+        -0.4476935,
+        0.6353149,
+        -0.9063252,
+        0.03339462,
+        -0.13207407,
+        0.35822904,
+        0.14378202,
+        -0.6895029,
+        -0.45171574,
+        0.7036348,
+        -0.05764073,
+        -0.04511834,
+        -0.6025827,
+        0.42203856,
+    ];
+    let vals7 = unsafe { std::mem::transmute::<Vec<f32>, Vec<F32>>(vals7) };
+    let block7 = DataBlock::new_from_columns(vec![
+        UInt64Type::from_data(vec![71, 72, 73, 74, 75, 76, 77, 78, 79, 80]),
+        Column::Vector(VectorColumn::Float32((vals7.into(), 4))),
+    ]);
+    let vals8: Vec<f32> = vec![
+        0.44271547,
+        0.04186246,
+        -0.05471806,
+        0.84741205,
+        -0.60298675,
+        0.13338158,
+        -0.01588953,
+        0.2876288,
+        -0.09086735,
+        -0.11241615,
+        0.03860525,
+        0.63135403,
+        0.60686076,
+        -0.32387394,
+        -0.66953754,
+        0.7155654,
+        -0.40972582,
+        -0.70375466,
+        0.28354865,
+        -0.75318587,
+        0.11960128,
+        -0.10885316,
+        0.30722642,
+        0.11420934,
+        -0.5221141,
+        0.31499448,
+        0.86042684,
+        0.47856066,
+        -0.82223445,
+        0.7333596,
+        -0.32723898,
+        -0.4398808,
+        0.9394175,
+        -0.25679085,
+        0.2887939,
+        -0.73664117,
+        0.5395438,
+        -0.05887805,
+        0.36002022,
+        -0.72944045,
+    ];
+    let vals8 = unsafe { std::mem::transmute::<Vec<f32>, Vec<F32>>(vals8) };
+    let block8 = DataBlock::new_from_columns(vec![
+        UInt64Type::from_data(vec![81, 82, 83, 84, 85, 86, 87, 88, 89, 90]),
+        Column::Vector(VectorColumn::Float32((vals8.into(), 4))),
+    ]);
+    let vals9: Vec<f32> = vec![
+        0.95527714,
+        -0.03856075,
+        -0.89367366,
+        0.90464765,
+        0.7934615,
+        -0.50674295,
+        0.5941392,
+        -0.35010257,
+        0.45648512,
+        -0.11480136,
+        0.9441768,
+        0.07530943,
+        0.07846592,
+        -0.15600504,
+        -0.28246698,
+        0.19841912,
+        0.07780663,
+        0.1556818,
+        -0.2927237,
+        0.07868534,
+        0.13883874,
+        -0.8788782,
+        0.7045493,
+        -0.23339222,
+        0.95576626,
+        -0.9563942,
+        -0.13632946,
+        0.06362384,
+        0.44660464,
+        0.6827207,
+        0.5226848,
+        -0.23891447,
+        0.48967868,
+        0.9801073,
+        -0.5306416,
+        -0.36345342,
+        0.42729795,
+        0.92860633,
+        0.8177991,
+        -0.24459854,
+    ];
+    let vals9 = unsafe { std::mem::transmute::<Vec<f32>, Vec<F32>>(vals9) };
+    let block9 = DataBlock::new_from_columns(vec![
+        UInt64Type::from_data(vec![91, 92, 93, 94, 95, 96, 97, 98, 99, 100]),
+        Column::Vector(VectorColumn::Float32((vals9.into(), 4))),
+    ]);
+
+    let blocks = vec![
+        block0, block1, block2, block3, block4, block5, block6, block7, block8, block9,
+    ];
+
+    fixture
+        .append_commit_blocks(table.clone(), blocks, false, true)
+        .await?;
+
+    // Define query vectors for testing
+    let query_values1 = vec![-0.6886994, 0.594091, 0.90251667, -0.5796461];
+    let query_values1 = unsafe { std::mem::transmute::<Vec<f32>, Vec<F32>>(query_values1) };
+    let query_values2 = vec![0.5758866, 0.8156669, -0.15692637, -0.15390747];
+    let query_values2 = unsafe { std::mem::transmute::<Vec<f32>, Vec<F32>>(query_values2) };
+
+    let table = catalog
+        .get_table(
+            &fixture.default_tenant(),
+            fixture.default_db_name().as_str(),
+            test_tbl_name,
+        )
+        .await?;
+
+    let fuse_table = FuseTable::do_create(table.get_table_info().clone())?;
+    let snapshot = fuse_table.read_table_snapshot().await?;
+    assert!(snapshot.is_some());
+    let snapshot = snapshot.unwrap();
+
+    let orderby_expr = RemoteExpr::<String>::ColumnRef {
+        span: None,
+        id: "_vector_score".to_string(),
+        data_type: DataType::Number(NumberDataType::Float32),
+        display_name: "_vector_score".to_string(),
+    };
+
+    let vector_index = VectorIndexInfo {
+        index_name: index_name.clone(),
+        index_version: index_version.clone(),
+        index_options: index_options.clone(),
+        column_id: index_column_id,
+        func_name: "".to_string(),
+        query_values: vec![],
+    };
+
+    let query_values = vec![
+        ("cosine_distance".to_string(), query_values1.clone()),
+        ("l1_distance".to_string(), query_values1.clone()),
+        ("l2_distance".to_string(), query_values1.clone()),
+        ("cosine_distance".to_string(), query_values2.clone()),
+        ("l1_distance".to_string(), query_values2.clone()),
+        ("l2_distance".to_string(), query_values2.clone()),
+    ];
+
+    let results = vec![
+        vec![
+            vec![
+                (0, 0, 0, 0.005022526),
+                (0, 0, 9, 0.05992174),
+                (0, 0, 1, 0.09289217),
+            ],
+            vec![(1, 0, 9, 0.05186367), (1, 0, 5, 0.07403374)],
+        ],
+        vec![
+            vec![(0, 0, 0, 0.0), (0, 0, 9, 0.84269863), (0, 0, 1, 1.0792456)],
+            vec![(0, 4, 2, 0.9375271)],
+            vec![(1, 0, 9, 0.7167929)],
+        ],
+        vec![vec![(0, 0, 0, 3.5187712), (0, 0, 9, 3.5518785)], vec![
+            (1, 3, 6, 3.4702706),
+            (1, 3, 7, 3.5206928),
+            (1, 3, 1, 3.556445),
+        ]],
+        vec![
+            vec![(0, 1, 6, 0.18258381)],
+            vec![(0, 3, 8, 0.15948296)],
+            vec![(1, 1, 8, 0.008677483), (1, 1, 7, 0.21170044)],
+            vec![(1, 4, 8, 0.0657177)],
+        ],
+        vec![
+            vec![(0, 1, 6, 0.7965471)],
+            vec![(0, 2, 7, 1.3045802)],
+            vec![(1, 1, 8, 0.0)],
+            vec![(1, 4, 8, 0.8538904), (1, 4, 7, 1.021619)],
+        ],
+        vec![vec![(1, 1, 8, 3.4763064)], vec![
+            (1, 3, 5, 3.4903116),
+            (1, 3, 9, 3.4926815),
+            (1, 3, 0, 3.527872),
+            (1, 3, 8, 3.560473),
+        ]],
+    ];
+
+    let mut extras = Vec::new();
+    for ((func_name, query_values), result) in query_values.into_iter().zip(results.into_iter()) {
+        let mut vector_index = vector_index.clone();
+        vector_index.func_name = func_name;
+        vector_index.query_values = query_values;
+        let extra = PushDownInfo {
+            limit: Some(5),
+            order_by: vec![(orderby_expr.clone(), true, false)],
+            vector_index: Some(vector_index),
+            ..Default::default()
+        };
+        extras.push((Some(extra), result));
+    }
+
+    for (extra, expected_results) in extras {
+        let block_metas = apply_block_pruning(
+            snapshot.clone(),
+            table.get_table_info().schema(),
+            &extra,
+            ctx.clone(),
+            fuse_table.get_operator(),
+            fuse_table.bloom_index_cols(),
+        )
+        .await?;
+        assert_eq!(block_metas.len(), expected_results.len());
+        for ((block_meta_index, _), expected_scores) in
+            block_metas.iter().zip(expected_results.iter())
+        {
+            assert!(block_meta_index.vector_scores.is_some());
+            let vector_scores = block_meta_index.vector_scores.clone().unwrap();
+            assert_eq!(vector_scores.len(), expected_scores.len());
+            for (vector_score, expected_score) in vector_scores.iter().zip(expected_scores) {
+                assert_eq!(block_meta_index.segment_idx, expected_score.0);
+                assert_eq!(block_meta_index.block_idx, expected_score.1);
+                assert_eq!(vector_score.0, expected_score.2);
+                assert_eq!(vector_score.1, expected_score.3);
+            }
+        }
+    }
+
+    Ok(())
+}
diff --git a/src/query/expression/src/evaluator.rs b/src/query/expression/src/evaluator.rs
index 9379b53b8cb8e..b4ebe56e3c48f 100644
--- a/src/query/expression/src/evaluator.rs
+++ b/src/query/expression/src/evaluator.rs
@@ -49,6 +49,10 @@ use crate::types::ReturnType;
 use crate::types::StringType;
 use crate::types::ValueType;
 use crate::types::VariantType;
+use crate::types::VectorColumn;
+use crate::types::VectorDataType;
+use crate::types::VectorScalar;
+use crate::types::F32;
 use crate::values::Column;
 use crate::values::ColumnBuilder;
 use crate::values::Scalar;
@@ -916,6 +920,85 @@ impl<'a> Evaluator<'a> {
                     other => unreachable!("source: {}", other),
                 }
             }
+            (DataType::Array(inner_src_ty), DataType::Vector(inner_dest_ty)) => {
+                if !matches!(&**inner_src_ty, DataType::Number(_) | DataType::Decimal(_))
+                    || matches!(inner_dest_ty, VectorDataType::Int8(_))
+                {
+                    return Err(ErrorCode::BadArguments(format!(
+                        "unable to cast type `{src_type}` to type `{dest_type}`"
+                    ))
+                    .set_span(span));
+                }
+                let dimension = inner_dest_ty.dimension() as usize;
+                match value {
+                    Value::Scalar(Scalar::Array(col)) => {
+                        if col.len() != dimension {
+                            return Err(ErrorCode::BadArguments(
+                                "Array value cast to a vector has incorrect dimension".to_string(),
+                            )
+                            .set_span(span));
+                        }
+                        let mut vals = Vec::with_capacity(dimension);
+                        match col {
+                            Column::Number(num_col) => {
+                                for i in 0..dimension {
+                                    let num = unsafe { num_col.index_unchecked(i) };
+                                    vals.push(num.to_f32());
+                                }
+                            }
+                            Column::Decimal(dec_col) => {
+                                for i in 0..dimension {
+                                    let dec = unsafe { dec_col.index_unchecked(i) };
+                                    vals.push(F32::from(dec.to_float32()));
+                                }
+                            }
+                            _ => {
+                                return Err(ErrorCode::BadArguments(
+                                    "Array value cast to a vector has invalid value".to_string(),
+                                )
+                                .set_span(span));
+                            }
+                        }
+                        Ok(Value::Scalar(Scalar::Vector(VectorScalar::Float32(vals))))
+                    }
+                    Value::Column(Column::Array(array_col)) => {
+                        let mut vals = Vec::with_capacity(dimension * array_col.len());
+                        for col in array_col.iter() {
+                            if col.len() != dimension {
+                                return Err(ErrorCode::BadArguments(
+                                    "Array value cast to a vector has incorrect dimension"
+                                        .to_string(),
+                                )
+                                .set_span(span));
+                            }
+                            match col {
+                                Column::Number(num_col) => {
+                                    for i in 0..dimension {
+                                        let num = unsafe { num_col.index_unchecked(i) };
+                                        vals.push(num.to_f32());
+                                    }
+                                }
+                                Column::Decimal(dec_col) => {
+                                    for i in 0..dimension {
+                                        let dec = unsafe { dec_col.index_unchecked(i) };
+                                        vals.push(F32::from(dec.to_float32()));
+                                    }
+                                }
+                                _ => {
+                                    return Err(ErrorCode::BadArguments(
+                                        "Array value cast to a vector has invalid value"
+                                            .to_string(),
+                                    )
+                                    .set_span(span));
+                                }
+                            }
+                        }
+                        let vector_col = VectorColumn::Float32((vals.into(), dimension));
+                        Ok(Value::Column(Column::Vector(vector_col)))
+                    }
+                    other => unreachable!("source: {}", other),
+                }
+            }
 
             _ => Err(ErrorCode::BadArguments(format!(
                 "unable to cast type `{src_type}` to type `{dest_type}`"
diff --git a/src/query/expression/src/schema.rs b/src/query/expression/src/schema.rs
index 0cdb6cc69f331..89f088dd71f47 100644
--- a/src/query/expression/src/schema.rs
+++ b/src/query/expression/src/schema.rs
@@ -50,6 +50,8 @@ pub const BASE_BLOCK_IDS_COLUMN_ID: u32 = u32::MAX - 6;
 // internal search column id.
 pub const SEARCH_MATCHED_COLUMN_ID: u32 = u32::MAX - 7;
 pub const SEARCH_SCORE_COLUMN_ID: u32 = u32::MAX - 8;
+// internal vector score column id.
+pub const VECTOR_SCORE_COLUMN_ID: u32 = u32::MAX - 9;
 
 pub const VIRTUAL_COLUMN_ID_START: u32 = 3_000_000_000;
 pub const VIRTUAL_COLUMNS_ID_UPPER: u32 = 3_000_001_000;
@@ -66,6 +68,8 @@ pub const BASE_BLOCK_IDS_COL_NAME: &str = "_base_block_ids";
 // internal search column name.
 pub const SEARCH_MATCHED_COL_NAME: &str = "_search_matched";
 pub const SEARCH_SCORE_COL_NAME: &str = "_search_score";
+// internal vector score column name.
+pub const VECTOR_SCORE_COL_NAME: &str = "_vector_score";
 
 pub const CHANGE_ACTION_COL_NAME: &str = "change$action";
 pub const CHANGE_IS_UPDATE_COL_NAME: &str = "change$is_update";
@@ -100,6 +104,7 @@ pub static INTERNAL_COLUMNS: LazyLock<HashSet<&'static str>> = LazyLock::new(||
         BASE_BLOCK_IDS_COL_NAME,
         SEARCH_MATCHED_COL_NAME,
         SEARCH_SCORE_COL_NAME,
+        VECTOR_SCORE_COL_NAME,
         CHANGE_ACTION_COL_NAME,
         CHANGE_IS_UPDATE_COL_NAME,
         CHANGE_ROW_ID_COL_NAME,
@@ -114,7 +119,7 @@ pub static INTERNAL_COLUMNS: LazyLock<HashSet<&'static str>> = LazyLock::new(||
 
 #[inline]
 pub fn is_internal_column_id(column_id: ColumnId) -> bool {
-    column_id >= SEARCH_SCORE_COLUMN_ID
+    column_id >= VECTOR_SCORE_COLUMN_ID
         || (FILE_ROW_NUMBER_COLUMN_ID..=FILENAME_COLUMN_ID).contains(&column_id)
 }
 
@@ -670,7 +675,10 @@ impl TableSchema {
                     }
                 }
                 (
-                    TableDataType::Tuple { .. } | TableDataType::Array(_) | TableDataType::Map(_),
+                    TableDataType::Tuple { .. }
+                    | TableDataType::Array(_)
+                    | TableDataType::Map(_)
+                    | TableDataType::Vector(_),
                     _,
                 ) => {
                     // ignore leaf columns
diff --git a/src/query/expression/src/type_check.rs b/src/query/expression/src/type_check.rs
index df503fdd1c1f1..2b60877f13d12 100755
--- a/src/query/expression/src/type_check.rs
+++ b/src/query/expression/src/type_check.rs
@@ -638,7 +638,11 @@ fn can_cast_to(src_ty: &DataType, dest_ty: &DataType) -> bool {
         {
             true
         }
-
+        (DataType::Array(fields_src_ty), DataType::Vector(_))
+            if matches!(&**fields_src_ty, DataType::Number(_) | DataType::Decimal(_)) =>
+        {
+            true
+        }
         (DataType::Nullable(box inner_src_ty), DataType::Nullable(box inner_dest_ty))
         | (DataType::Nullable(box inner_src_ty), inner_dest_ty)
         | (inner_src_ty, DataType::Nullable(box inner_dest_ty))
diff --git a/src/query/expression/src/types/decimal.rs b/src/query/expression/src/types/decimal.rs
index 583d77522a57d..4e9e93d8d6467 100644
--- a/src/query/expression/src/types/decimal.rs
+++ b/src/query/expression/src/types/decimal.rs
@@ -176,6 +176,12 @@ pub enum DecimalScalar {
 }
 
 impl DecimalScalar {
+    pub fn to_float32(&self) -> f32 {
+        with_decimal_type!(|DECIMAL| match self {
+            DecimalScalar::DECIMAL(v, size) => v.to_float32(size.scale),
+        })
+    }
+
     pub fn to_float64(&self) -> f64 {
         with_decimal_type!(|DECIMAL| match self {
             DecimalScalar::DECIMAL(v, size) => v.to_float64(size.scale),
diff --git a/src/query/expression/src/types/number.rs b/src/query/expression/src/types/number.rs
index 358d7d6c467c5..0bd92a7d526c2 100644
--- a/src/query/expression/src/types/number.rs
+++ b/src/query/expression/src/types/number.rs
@@ -489,6 +489,14 @@ impl NumberScalar {
         }
     }
 
+    pub fn to_f32(&self) -> F32 {
+        crate::with_integer_mapped_type!(|NUM_TYPE| match self {
+            NumberScalar::NUM_TYPE(num) => (*num as f32).into(),
+            NumberScalar::Float32(num) => *num,
+            NumberScalar::Float64(num) => (num.into_inner() as f32).into(),
+        })
+    }
+
     pub fn to_f64(&self) -> F64 {
         crate::with_integer_mapped_type!(|NUM_TYPE| match self {
             NumberScalar::NUM_TYPE(num) => (*num as f64).into(),
diff --git a/src/query/formats/src/field_decoder/nested.rs b/src/query/formats/src/field_decoder/nested.rs
index 753fe52f694b0..b9e0d1ce1e696 100644
--- a/src/query/formats/src/field_decoder/nested.rs
+++ b/src/query/formats/src/field_decoder/nested.rs
@@ -449,19 +449,14 @@ impl NestedValues {
         reader.must_ignore_byte(b'[')?;
         let dimension = column.dimension();
         let mut values = Vec::with_capacity(dimension);
-        for _ in 0..dimension {
+        for idx in 0..dimension {
             let _ = reader.ignore_white_spaces_or_comments();
-            reader.must_ignore_byte(b',')?;
-            let _ = reader.ignore_white_spaces_or_comments();
-            let res: Result<f32> = reader.read_float_text();
-            match res {
-                Ok(v) => {
-                    values.push(v.into());
-                }
-                Err(err) => {
-                    return Err(err);
-                }
+            if idx != 0 {
+                reader.must_ignore_byte(b',')?;
             }
+            let _ = reader.ignore_white_spaces_or_comments();
+            let v: f32 = reader.read_float_text()?;
+            values.push(v.into());
         }
         reader.must_ignore_byte(b']')?;
         column.push(&VectorScalarRef::Float32(&values));
diff --git a/src/query/functions/src/scalars/vector.rs b/src/query/functions/src/scalars/vector.rs
index bec069210957b..5e7231b2feb07 100644
--- a/src/query/functions/src/scalars/vector.rs
+++ b/src/query/functions/src/scalars/vector.rs
@@ -12,20 +12,41 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+use std::sync::Arc;
+
+use databend_common_exception::Result;
+use databend_common_expression::types::AnyType;
 use databend_common_expression::types::ArrayType;
 use databend_common_expression::types::Buffer;
+use databend_common_expression::types::DataType;
 use databend_common_expression::types::Float32Type;
 use databend_common_expression::types::Float64Type;
+use databend_common_expression::types::NumberColumn;
+use databend_common_expression::types::NumberDataType;
+use databend_common_expression::types::NumberScalar;
 use databend_common_expression::types::StringType;
+use databend_common_expression::types::VectorDataType;
+use databend_common_expression::types::VectorScalarRef;
 use databend_common_expression::types::F32;
 use databend_common_expression::types::F64;
 use databend_common_expression::vectorize_with_builder_1_arg;
 use databend_common_expression::vectorize_with_builder_2_arg;
+use databend_common_expression::Column;
+use databend_common_expression::EvalContext;
+use databend_common_expression::Function;
 use databend_common_expression::FunctionDomain;
+use databend_common_expression::FunctionEval;
+use databend_common_expression::FunctionFactory;
 use databend_common_expression::FunctionRegistry;
+use databend_common_expression::FunctionSignature;
+use databend_common_expression::Scalar;
+use databend_common_expression::ScalarRef;
+use databend_common_expression::Value;
 use databend_common_openai::OpenAI;
 use databend_common_vector::cosine_distance;
 use databend_common_vector::cosine_distance_64;
+use databend_common_vector::l1_distance;
+use databend_common_vector::l1_distance_64;
 use databend_common_vector::l2_distance;
 use databend_common_vector::l2_distance_64;
 
@@ -37,12 +58,35 @@ pub fn register(registry: &mut FunctionRegistry) {
         |_, _, _| FunctionDomain::MayThrow,
         vectorize_with_builder_2_arg::<ArrayType<Float32Type>, ArrayType<Float32Type>,  Float32Type>(
             |lhs, rhs, output, ctx| {
-                let l=
+                let l =
                     unsafe { std::mem::transmute::<Buffer<F32>, Buffer<f32>>(lhs) };
                 let r =
                     unsafe { std::mem::transmute::<Buffer<F32>, Buffer<f32>>(rhs) };
 
-                match cosine_distance(l.as_slice(), r .as_slice()) {
+                match cosine_distance(l.as_slice(), r.as_slice()) {
+                    Ok(dist) => {
+                        output.push(F32::from(dist));
+                    }
+                    Err(err) => {
+                        ctx.set_error(output.len(), err.to_string());
+                        output.push(F32::from(0.0));
+                    }
+                }
+            }
+        ),
+    );
+
+    registry.register_passthrough_nullable_2_arg::<ArrayType<Float32Type>, ArrayType<Float32Type>, Float32Type, _, _>(
+        "l1_distance",
+        |_, _, _| FunctionDomain::MayThrow,
+        vectorize_with_builder_2_arg::<ArrayType<Float32Type>, ArrayType<Float32Type>,  Float32Type>(
+            |lhs, rhs, output, ctx| {
+                let l =
+                    unsafe { std::mem::transmute::<Buffer<F32>, Buffer<f32>>(lhs) };
+                let r =
+                    unsafe { std::mem::transmute::<Buffer<F32>, Buffer<f32>>(rhs) };
+
+                match l1_distance(l.as_slice(), r.as_slice()) {
                     Ok(dist) => {
                         output.push(F32::from(dist));
                     }
@@ -63,12 +107,12 @@ pub fn register(registry: &mut FunctionRegistry) {
         |_, _, _| FunctionDomain::MayThrow,
         vectorize_with_builder_2_arg::<ArrayType<Float32Type>, ArrayType<Float32Type>,  Float32Type>(
             |lhs, rhs, output, ctx| {
-                let l=
+                let l =
                     unsafe { std::mem::transmute::<Buffer<F32>, Buffer<f32>>(lhs) };
                 let r =
                     unsafe { std::mem::transmute::<Buffer<F32>, Buffer<f32>>(rhs) };
 
-                match l2_distance(l.as_slice(), r .as_slice()) {
+                match l2_distance(l.as_slice(), r.as_slice()) {
                     Ok(dist) => {
                         output.push(F32::from(dist));
                     }
@@ -91,7 +135,30 @@ pub fn register(registry: &mut FunctionRegistry) {
                 let r =
                     unsafe { std::mem::transmute::<Buffer<F64>, Buffer<f64>>(rhs) };
 
-                match cosine_distance_64(l.as_slice(), r .as_slice()) {
+                match cosine_distance_64(l.as_slice(), r.as_slice()) {
+                    Ok(dist) => {
+                        output.push(F64::from(dist));
+                    }
+                    Err(err) => {
+                        ctx.set_error(output.len(), err.to_string());
+                        output.push(F64::from(0.0));
+                    }
+                }
+            }
+        ),
+    );
+
+    registry.register_passthrough_nullable_2_arg::<ArrayType<Float64Type>, ArrayType<Float64Type>, Float64Type, _, _>(
+        "l1_distance",
+        |_, _, _| FunctionDomain::MayThrow,
+        vectorize_with_builder_2_arg::<ArrayType<Float64Type>, ArrayType<Float64Type>,  Float64Type>(
+            |lhs, rhs, output, ctx| {
+                let l =
+                    unsafe { std::mem::transmute::<Buffer<F64>, Buffer<f64>>(lhs) };
+                let r =
+                    unsafe { std::mem::transmute::<Buffer<F64>, Buffer<f64>>(rhs) };
+
+                match l1_distance_64(l.as_slice(), r.as_slice()) {
                     Ok(dist) => {
                         output.push(F64::from(dist));
                     }
@@ -109,12 +176,12 @@ pub fn register(registry: &mut FunctionRegistry) {
         |_, _, _| FunctionDomain::MayThrow,
         vectorize_with_builder_2_arg::<ArrayType<Float64Type>, ArrayType<Float64Type>,  Float64Type>(
             |lhs, rhs, output, ctx| {
-                let l=
+                let l =
                     unsafe { std::mem::transmute::<Buffer<F64>, Buffer<f64>>(lhs) };
                 let r =
                     unsafe { std::mem::transmute::<Buffer<F64>, Buffer<f64>>(rhs) };
 
-                match l2_distance_64(l.as_slice(), r .as_slice()) {
+                match l2_distance_64(l.as_slice(), r.as_slice()) {
                     Ok(dist) => {
                         output.push(F64::from(dist));
                     }
@@ -226,4 +293,127 @@ pub fn register(registry: &mut FunctionRegistry) {
             }
         }),
     );
+
+    let cosine_distance_factory =
+        FunctionFactory::Closure(Box::new(|_, args_type: &[DataType]| {
+            let args_type = check_args_type(args_type)?;
+            Some(Arc::new(Function {
+                signature: FunctionSignature {
+                    name: "cosine_distance".to_string(),
+                    args_type: args_type.clone(),
+                    return_type: DataType::Number(NumberDataType::Float32),
+                },
+                eval: FunctionEval::Scalar {
+                    calc_domain: Box::new(|_, _| FunctionDomain::Full),
+                    eval: Box::new(|args, ctx| calculate_distance(args, ctx, cosine_distance)),
+                },
+            }))
+        }));
+    registry.register_function_factory("cosine_distance", cosine_distance_factory);
+
+    let l1_distance_factory = FunctionFactory::Closure(Box::new(|_, args_type: &[DataType]| {
+        let args_type = check_args_type(args_type)?;
+        Some(Arc::new(Function {
+            signature: FunctionSignature {
+                name: "l1_distance".to_string(),
+                args_type: args_type.clone(),
+                return_type: DataType::Number(NumberDataType::Float32),
+            },
+            eval: FunctionEval::Scalar {
+                calc_domain: Box::new(|_, _| FunctionDomain::Full),
+                eval: Box::new(|args, ctx| calculate_distance(args, ctx, l1_distance)),
+            },
+        }))
+    }));
+    registry.register_function_factory("l1_distance", l1_distance_factory);
+
+    let l2_distance_factory = FunctionFactory::Closure(Box::new(|_, args_type: &[DataType]| {
+        let args_type = check_args_type(args_type)?;
+        Some(Arc::new(Function {
+            signature: FunctionSignature {
+                name: "l2_distance".to_string(),
+                args_type: args_type.clone(),
+                return_type: DataType::Number(NumberDataType::Float32),
+            },
+            eval: FunctionEval::Scalar {
+                calc_domain: Box::new(|_, _| FunctionDomain::Full),
+                eval: Box::new(|args, ctx| calculate_distance(args, ctx, l2_distance)),
+            },
+        }))
+    }));
+    registry.register_function_factory("l2_distance", l2_distance_factory);
+}
+
+fn check_args_type(args_type: &[DataType]) -> Option<Vec<DataType>> {
+    if args_type.len() != 2 {
+        return None;
+    }
+    let args_type0 = args_type[0].remove_nullable();
+    let vector_type0 = args_type0.as_vector()?;
+    let args_type1 = args_type[1].remove_nullable();
+    let vector_type1 = args_type1.as_vector()?;
+    match (vector_type0, vector_type1) {
+        (VectorDataType::Int8(dim0), VectorDataType::Int8(dim1)) => {
+            if dim0 != dim1 {
+                return None;
+            }
+        }
+        (VectorDataType::Float32(dim0), VectorDataType::Float32(dim1)) => {
+            if dim0 != dim1 {
+                return None;
+            }
+        }
+        (_, _) => {
+            return None;
+        }
+    }
+    Some(args_type.to_vec())
+}
+
+fn calculate_distance<F>(
+    args: &[Value<AnyType>],
+    _ctx: &mut EvalContext,
+    distance_fn: F,
+) -> Value<AnyType>
+where
+    F: Fn(&[f32], &[f32]) -> Result<f32>,
+{
+    let len_opt = args.iter().find_map(|arg| match arg {
+        Value::Column(col) => Some(col.len()),
+        _ => None,
+    });
+    let len = len_opt.unwrap_or(1);
+    let mut builder = Vec::with_capacity(len);
+    for i in 0..len {
+        let lhs = unsafe { args[0].index_unchecked(i) };
+        let rhs = unsafe { args[1].index_unchecked(i) };
+        match (lhs, rhs) {
+            (
+                ScalarRef::Vector(VectorScalarRef::Int8(lhs)),
+                ScalarRef::Vector(VectorScalarRef::Int8(rhs)),
+            ) => {
+                let l: Vec<_> = lhs.iter().map(|v| *v as f32).collect();
+                let r: Vec<_> = rhs.iter().map(|v| *v as f32).collect();
+                let dist = distance_fn(l.as_slice(), r.as_slice()).unwrap();
+                builder.push(F32::from(dist));
+            }
+            (
+                ScalarRef::Vector(VectorScalarRef::Float32(lhs)),
+                ScalarRef::Vector(VectorScalarRef::Float32(rhs)),
+            ) => {
+                let l = unsafe { std::mem::transmute::<&[F32], &[f32]>(lhs) };
+                let r = unsafe { std::mem::transmute::<&[F32], &[f32]>(rhs) };
+                let dist = distance_fn(l, r).unwrap();
+                builder.push(F32::from(dist));
+            }
+            (_, _) => {
+                builder.push(F32::from(f32::MAX));
+            }
+        }
+    }
+    if len_opt.is_some() {
+        Value::Column(Column::Number(NumberColumn::Float32(Buffer::from(builder))))
+    } else {
+        Value::Scalar(Scalar::Number(NumberScalar::Float32(builder[0])))
+    }
 }
diff --git a/src/query/functions/tests/it/scalars/testdata/function_list.txt b/src/query/functions/tests/it/scalars/testdata/function_list.txt
index f3aa029688e9a..a691ba428c0f8 100644
--- a/src/query/functions/tests/it/scalars/testdata/function_list.txt
+++ b/src/query/functions/tests/it/scalars/testdata/function_list.txt
@@ -1345,6 +1345,7 @@ Functions overloads:
 1 cosine_distance(Array(Float32) NULL, Array(Float32) NULL) :: Float32 NULL
 2 cosine_distance(Array(Float64), Array(Float64)) :: Float64
 3 cosine_distance(Array(Float64) NULL, Array(Float64) NULL) :: Float64 NULL
+4 cosine_distance FACTORY
 0 cot(Float64) :: Float64
 1 cot(Float64 NULL) :: Float64 NULL
 0 crc32(String) :: UInt32
@@ -2299,10 +2300,16 @@ Functions overloads:
 1 json_strip_nulls(Variant NULL) :: Variant NULL
 0 json_typeof(Variant) :: String
 1 json_typeof(Variant NULL) :: String NULL
+0 l1_distance(Array(Float32), Array(Float32)) :: Float32
+1 l1_distance(Array(Float32) NULL, Array(Float32) NULL) :: Float32 NULL
+2 l1_distance(Array(Float64), Array(Float64)) :: Float64
+3 l1_distance(Array(Float64) NULL, Array(Float64) NULL) :: Float64 NULL
+4 l1_distance FACTORY
 0 l2_distance(Array(Float32), Array(Float32)) :: Float32
 1 l2_distance(Array(Float32) NULL, Array(Float32) NULL) :: Float32 NULL
 2 l2_distance(Array(Float64), Array(Float64)) :: Float64
 3 l2_distance(Array(Float64) NULL, Array(Float64) NULL) :: Float64 NULL
+4 l2_distance FACTORY
 0 left(String, UInt64) :: String
 1 left(String NULL, UInt64 NULL) :: String NULL
 0 length(Variant NULL) :: UInt32 NULL
diff --git a/src/query/functions/tests/it/scalars/testdata/vector.txt b/src/query/functions/tests/it/scalars/testdata/vector.txt
index dc4e081423cc6..f7626f452ef77 100644
--- a/src/query/functions/tests/it/scalars/testdata/vector.txt
+++ b/src/query/functions/tests/it/scalars/testdata/vector.txt
@@ -1,23 +1,171 @@
-ast            : cosine_distance([a], [b])
-raw expr       : cosine_distance(array(a::Float32), array(b::Float32))
-checked expr   : cosine_distance<Array(Float32), Array(Float32)>(array<T0=Float32><T0>(a), array<T0=Float32><T0>(b))
-evaluation:
-+--------+---------+---------+---------+
-|        | a       | b       | Output  |
-+--------+---------+---------+---------+
-| Type   | Float32 | Float32 | Float32 |
-| Domain | {0..=2} | {3..=5} | Unknown |
-| Row 0  | 0       | 3       | NaN     |
-| Row 1  | 1       | 4       | 0       |
-| Row 2  | 2       | 5       | 0       |
-+--------+---------+---------+---------+
-evaluation (internal):
-+--------+----------------------+
-| Column | Data                 |
-+--------+----------------------+
-| a      | Float32([0, 1, 2])   |
-| b      | Float32([3, 4, 5])   |
-| Output | Float32([NaN, 0, 0]) |
-+--------+----------------------+
+ast            : cosine_distance([1,0,0], [1,0,0])
+raw expr       : cosine_distance(array(1, 0, 0), array(1, 0, 0))
+checked expr   : cosine_distance<Array(Float32), Array(Float32)>(CAST<Array(UInt8)>(array<T0=UInt8><T0, T0, T0>(1_u8, 0_u8, 0_u8) AS Array(Float32)), CAST<Array(UInt8)>(array<T0=UInt8><T0, T0, T0>(1_u8, 0_u8, 0_u8) AS Array(Float32)))
+optimized expr : 0_f32
+output type    : Float32
+output domain  : {0..=0}
+output         : 0
+
+
+ast            : cosine_distance([1,0,0], [-1,0,0])
+raw expr       : cosine_distance(array(1, 0, 0), array(minus(1), 0, 0))
+checked expr   : cosine_distance<Array(Float32), Array(Float32)>(CAST<Array(UInt8)>(array<T0=UInt8><T0, T0, T0>(1_u8, 0_u8, 0_u8) AS Array(Float32)), CAST<Array(Int16)>(array<T0=Int16><T0, T0, T0>(minus<UInt8>(1_u8), CAST<UInt8>(0_u8 AS Int16), CAST<UInt8>(0_u8 AS Int16)) AS Array(Float32)))
+optimized expr : 2_f32
+output type    : Float32
+output domain  : {2..=2}
+output         : 2
+
+
+ast            : cosine_distance([1,2,3], [4,5,6])
+raw expr       : cosine_distance(array(1, 2, 3), array(4, 5, 6))
+checked expr   : cosine_distance<Array(Float32), Array(Float32)>(CAST<Array(UInt8)>(array<T0=UInt8><T0, T0, T0>(1_u8, 2_u8, 3_u8) AS Array(Float32)), CAST<Array(UInt8)>(array<T0=UInt8><T0, T0, T0>(4_u8, 5_u8, 6_u8) AS Array(Float32)))
+optimized expr : 0.02536821_f32
+output type    : Float32
+output domain  : {0.02536821..=0.02536821}
+output         : 0.02536821
+
+
+ast            : cosine_distance([0,0,0], [1,2,3])
+raw expr       : cosine_distance(array(0, 0, 0), array(1, 2, 3))
+checked expr   : cosine_distance<Array(Float32), Array(Float32)>(CAST<Array(UInt8)>(array<T0=UInt8><T0, T0, T0>(0_u8, 0_u8, 0_u8) AS Array(Float32)), CAST<Array(UInt8)>(array<T0=UInt8><T0, T0, T0>(1_u8, 2_u8, 3_u8) AS Array(Float32)))
+optimized expr : NaN_f32
+output type    : Float32
+output domain  : {NaN..=NaN}
+output         : NaN
+
+
+ast            : cosine_distance([1,-2,3]::vector(3), [-4,5,-6]::vector(3))
+raw expr       : cosine_distance(CAST(array(1, minus(2), 3) AS Vector(3)), CAST(array(minus(4), 5, minus(6)) AS Vector(3)))
+checked expr   : cosine_distance<Vector(3), Vector(3)>(CAST<Array(Int16)>(array<T0=Int16><T0, T0, T0>(CAST<UInt8>(1_u8 AS Int16), minus<UInt8>(2_u8), CAST<UInt8>(3_u8 AS Int16)) AS Vector(3)), CAST<Array(Int16)>(array<T0=Int16><T0, T0, T0>(minus<UInt8>(4_u8), CAST<UInt8>(5_u8 AS Int16), minus<UInt8>(6_u8)) AS Vector(3)))
+optimized expr : 1.974632_f32
+output type    : Float32
+output domain  : {1.974632..=1.974632}
+output         : 1.974632
+
+
+ast            : cosine_distance([0.1,0.2,0.3]::vector(3), [0.4,0.5,0.6]::vector(3))
+raw expr       : cosine_distance(CAST(array(0.1, 0.2, 0.3) AS Vector(3)), CAST(array(0.4, 0.5, 0.6) AS Vector(3)))
+checked expr   : cosine_distance<Vector(3), Vector(3)>(CAST<Array(Decimal(1, 1))>(array<T0=Decimal(1, 1)><T0, T0, T0>(0.1_d64(1,1), 0.2_d64(1,1), 0.3_d64(1,1)) AS Vector(3)), CAST<Array(Decimal(1, 1))>(array<T0=Decimal(1, 1)><T0, T0, T0>(0.4_d64(1,1), 0.5_d64(1,1), 0.6_d64(1,1)) AS Vector(3)))
+optimized expr : 0.02536827_f32
+output type    : Float32
+output domain  : {0.02536827..=0.02536827}
+output         : 0.02536827
+
+
+ast            : cosine_distance([1,0]::vector(2), [0,1]::vector(2))
+raw expr       : cosine_distance(CAST(array(1, 0) AS Vector(2)), CAST(array(0, 1) AS Vector(2)))
+checked expr   : cosine_distance<Vector(2), Vector(2)>(CAST<Array(UInt8)>(array<T0=UInt8><T0, T0>(1_u8, 0_u8) AS Vector(2)), CAST<Array(UInt8)>(array<T0=UInt8><T0, T0>(0_u8, 1_u8) AS Vector(2)))
+optimized expr : 1_f32
+output type    : Float32
+output domain  : {1..=1}
+output         : 1
+
+
+ast            : l1_distance([1,2,3], [1,2,3])
+raw expr       : l1_distance(array(1, 2, 3), array(1, 2, 3))
+checked expr   : l1_distance<Array(Float32), Array(Float32)>(CAST<Array(UInt8)>(array<T0=UInt8><T0, T0, T0>(1_u8, 2_u8, 3_u8) AS Array(Float32)), CAST<Array(UInt8)>(array<T0=UInt8><T0, T0, T0>(1_u8, 2_u8, 3_u8) AS Array(Float32)))
+optimized expr : 0_f32
+output type    : Float32
+output domain  : {0..=0}
+output         : 0
+
+
+ast            : l1_distance([1,2,3], [4,5,6])
+raw expr       : l1_distance(array(1, 2, 3), array(4, 5, 6))
+checked expr   : l1_distance<Array(Float32), Array(Float32)>(CAST<Array(UInt8)>(array<T0=UInt8><T0, T0, T0>(1_u8, 2_u8, 3_u8) AS Array(Float32)), CAST<Array(UInt8)>(array<T0=UInt8><T0, T0, T0>(4_u8, 5_u8, 6_u8) AS Array(Float32)))
+optimized expr : 9_f32
+output type    : Float32
+output domain  : {9..=9}
+output         : 9
+
+
+ast            : l1_distance([0,0,0], [1,2,3])
+raw expr       : l1_distance(array(0, 0, 0), array(1, 2, 3))
+checked expr   : l1_distance<Array(Float32), Array(Float32)>(CAST<Array(UInt8)>(array<T0=UInt8><T0, T0, T0>(0_u8, 0_u8, 0_u8) AS Array(Float32)), CAST<Array(UInt8)>(array<T0=UInt8><T0, T0, T0>(1_u8, 2_u8, 3_u8) AS Array(Float32)))
+optimized expr : 6_f32
+output type    : Float32
+output domain  : {6..=6}
+output         : 6
+
+
+ast            : l1_distance([1,-2,3]::vector(3), [-4,5,-6]::vector(3))
+raw expr       : l1_distance(CAST(array(1, minus(2), 3) AS Vector(3)), CAST(array(minus(4), 5, minus(6)) AS Vector(3)))
+checked expr   : l1_distance<Vector(3), Vector(3)>(CAST<Array(Int16)>(array<T0=Int16><T0, T0, T0>(CAST<UInt8>(1_u8 AS Int16), minus<UInt8>(2_u8), CAST<UInt8>(3_u8 AS Int16)) AS Vector(3)), CAST<Array(Int16)>(array<T0=Int16><T0, T0, T0>(minus<UInt8>(4_u8), CAST<UInt8>(5_u8 AS Int16), minus<UInt8>(6_u8)) AS Vector(3)))
+optimized expr : 21_f32
+output type    : Float32
+output domain  : {21..=21}
+output         : 21
+
+
+ast            : l1_distance([0.1,0.2,0.3]::vector(3), [0.4,0.5,0.6]::vector(3))
+raw expr       : l1_distance(CAST(array(0.1, 0.2, 0.3) AS Vector(3)), CAST(array(0.4, 0.5, 0.6) AS Vector(3)))
+checked expr   : l1_distance<Vector(3), Vector(3)>(CAST<Array(Decimal(1, 1))>(array<T0=Decimal(1, 1)><T0, T0, T0>(0.1_d64(1,1), 0.2_d64(1,1), 0.3_d64(1,1)) AS Vector(3)), CAST<Array(Decimal(1, 1))>(array<T0=Decimal(1, 1)><T0, T0, T0>(0.4_d64(1,1), 0.5_d64(1,1), 0.6_d64(1,1)) AS Vector(3)))
+optimized expr : 0.9_f32
+output type    : Float32
+output domain  : {0.9..=0.9}
+output         : 0.9
+
+
+ast            : l1_distance([1,2]::vector(2), [3,4]::vector(2))
+raw expr       : l1_distance(CAST(array(1, 2) AS Vector(2)), CAST(array(3, 4) AS Vector(2)))
+checked expr   : l1_distance<Vector(2), Vector(2)>(CAST<Array(UInt8)>(array<T0=UInt8><T0, T0>(1_u8, 2_u8) AS Vector(2)), CAST<Array(UInt8)>(array<T0=UInt8><T0, T0>(3_u8, 4_u8) AS Vector(2)))
+optimized expr : 4_f32
+output type    : Float32
+output domain  : {4..=4}
+output         : 4
+
+
+ast            : l2_distance([1,2,3], [1,2,3])
+raw expr       : l2_distance(array(1, 2, 3), array(1, 2, 3))
+checked expr   : l2_distance<Array(Float32), Array(Float32)>(CAST<Array(UInt8)>(array<T0=UInt8><T0, T0, T0>(1_u8, 2_u8, 3_u8) AS Array(Float32)), CAST<Array(UInt8)>(array<T0=UInt8><T0, T0, T0>(1_u8, 2_u8, 3_u8) AS Array(Float32)))
+optimized expr : 0_f32
+output type    : Float32
+output domain  : {0..=0}
+output         : 0
+
+
+ast            : l2_distance([1,2,3], [4,5,6])
+raw expr       : l2_distance(array(1, 2, 3), array(4, 5, 6))
+checked expr   : l2_distance<Array(Float32), Array(Float32)>(CAST<Array(UInt8)>(array<T0=UInt8><T0, T0, T0>(1_u8, 2_u8, 3_u8) AS Array(Float32)), CAST<Array(UInt8)>(array<T0=UInt8><T0, T0, T0>(4_u8, 5_u8, 6_u8) AS Array(Float32)))
+optimized expr : 5.196152_f32
+output type    : Float32
+output domain  : {5.196152..=5.196152}
+output         : 5.196152
+
+
+ast            : l2_distance([0,0,0], [1,2,3])
+raw expr       : l2_distance(array(0, 0, 0), array(1, 2, 3))
+checked expr   : l2_distance<Array(Float32), Array(Float32)>(CAST<Array(UInt8)>(array<T0=UInt8><T0, T0, T0>(0_u8, 0_u8, 0_u8) AS Array(Float32)), CAST<Array(UInt8)>(array<T0=UInt8><T0, T0, T0>(1_u8, 2_u8, 3_u8) AS Array(Float32)))
+optimized expr : 3.741658_f32
+output type    : Float32
+output domain  : {3.741658..=3.741658}
+output         : 3.741658
+
+
+ast            : l2_distance([1,-2,3]::vector(3), [-4,5,-6]::vector(3))
+raw expr       : l2_distance(CAST(array(1, minus(2), 3) AS Vector(3)), CAST(array(minus(4), 5, minus(6)) AS Vector(3)))
+checked expr   : l2_distance<Vector(3), Vector(3)>(CAST<Array(Int16)>(array<T0=Int16><T0, T0, T0>(CAST<UInt8>(1_u8 AS Int16), minus<UInt8>(2_u8), CAST<UInt8>(3_u8 AS Int16)) AS Vector(3)), CAST<Array(Int16)>(array<T0=Int16><T0, T0, T0>(minus<UInt8>(4_u8), CAST<UInt8>(5_u8 AS Int16), minus<UInt8>(6_u8)) AS Vector(3)))
+optimized expr : 12.4499_f32
+output type    : Float32
+output domain  : {12.4499..=12.4499}
+output         : 12.4499
+
+
+ast            : l2_distance([0.1,0.2,0.3]::vector(3), [0.4,0.5,0.6]::vector(3))
+raw expr       : l2_distance(CAST(array(0.1, 0.2, 0.3) AS Vector(3)), CAST(array(0.4, 0.5, 0.6) AS Vector(3)))
+checked expr   : l2_distance<Vector(3), Vector(3)>(CAST<Array(Decimal(1, 1))>(array<T0=Decimal(1, 1)><T0, T0, T0>(0.1_d64(1,1), 0.2_d64(1,1), 0.3_d64(1,1)) AS Vector(3)), CAST<Array(Decimal(1, 1))>(array<T0=Decimal(1, 1)><T0, T0, T0>(0.4_d64(1,1), 0.5_d64(1,1), 0.6_d64(1,1)) AS Vector(3)))
+optimized expr : 0.5196152_f32
+output type    : Float32
+output domain  : {0.5196152..=0.5196152}
+output         : 0.5196152
+
+
+ast            : l2_distance([1,2]::vector(2), [3,4]::vector(2))
+raw expr       : l2_distance(CAST(array(1, 2) AS Vector(2)), CAST(array(3, 4) AS Vector(2)))
+checked expr   : l2_distance<Vector(2), Vector(2)>(CAST<Array(UInt8)>(array<T0=UInt8><T0, T0>(1_u8, 2_u8) AS Vector(2)), CAST<Array(UInt8)>(array<T0=UInt8><T0, T0>(3_u8, 4_u8) AS Vector(2)))
+optimized expr : 2.828427_f32
+output type    : Float32
+output domain  : {2.828427..=2.828427}
+output         : 2.828427
 
 
diff --git a/src/query/functions/tests/it/scalars/vector.rs b/src/query/functions/tests/it/scalars/vector.rs
index 8421e5e61d590..5e5020aede4d0 100644
--- a/src/query/functions/tests/it/scalars/vector.rs
+++ b/src/query/functions/tests/it/scalars/vector.rs
@@ -14,8 +14,6 @@
 
 use std::io::Write;
 
-use databend_common_expression::types::*;
-use databend_common_expression::FromData;
 use goldenfile::Mint;
 
 use super::run_ast;
@@ -26,11 +24,62 @@ fn test_vector() {
     let file = &mut mint.new_goldenfile("vector.txt").unwrap();
 
     test_vector_cosine_distance(file);
+    test_vector_l1_distance(file);
+    test_vector_l2_distance(file);
 }
 
 fn test_vector_cosine_distance(file: &mut impl Write) {
-    run_ast(file, "cosine_distance([a], [b])", &[
-        ("a", Float32Type::from_data(vec![0f32, 1.0, 2.0])),
-        ("b", Float32Type::from_data(vec![3f32, 4.0, 5.0])),
-    ]);
+    run_ast(file, "cosine_distance([1,0,0], [1,0,0])", &[]);
+    run_ast(file, "cosine_distance([1,0,0], [-1,0,0])", &[]);
+    run_ast(file, "cosine_distance([1,2,3], [4,5,6])", &[]);
+    run_ast(file, "cosine_distance([0,0,0], [1,2,3])", &[]);
+    run_ast(
+        file,
+        "cosine_distance([1,-2,3]::vector(3), [-4,5,-6]::vector(3))",
+        &[],
+    );
+    run_ast(
+        file,
+        "cosine_distance([0.1,0.2,0.3]::vector(3), [0.4,0.5,0.6]::vector(3))",
+        &[],
+    );
+    run_ast(
+        file,
+        "cosine_distance([1,0]::vector(2), [0,1]::vector(2))",
+        &[],
+    );
+}
+
+fn test_vector_l1_distance(file: &mut impl Write) {
+    run_ast(file, "l1_distance([1,2,3], [1,2,3])", &[]);
+    run_ast(file, "l1_distance([1,2,3], [4,5,6])", &[]);
+    run_ast(file, "l1_distance([0,0,0], [1,2,3])", &[]);
+    run_ast(
+        file,
+        "l1_distance([1,-2,3]::vector(3), [-4,5,-6]::vector(3))",
+        &[],
+    );
+    run_ast(
+        file,
+        "l1_distance([0.1,0.2,0.3]::vector(3), [0.4,0.5,0.6]::vector(3))",
+        &[],
+    );
+    run_ast(file, "l1_distance([1,2]::vector(2), [3,4]::vector(2))", &[]);
+}
+
+fn test_vector_l2_distance(file: &mut impl Write) {
+    run_ast(file, "l2_distance([1,2,3], [1,2,3])", &[]);
+    run_ast(file, "l2_distance([1,2,3], [4,5,6])", &[]);
+    run_ast(file, "l2_distance([0,0,0], [1,2,3])", &[]);
+    run_ast(
+        file,
+        "l2_distance([1,-2,3]::vector(3), [-4,5,-6]::vector(3))",
+        &[],
+    );
+    run_ast(
+        file,
+        "l2_distance([0.1,0.2,0.3]::vector(3), [0.4,0.5,0.6]::vector(3))",
+        &[],
+    );
+    run_ast(file, "l2_distance([1,2]::vector(2), [3,4]::vector(2))", &[]);
 }
diff --git a/src/query/service/src/test_kits/block_writer.rs b/src/query/service/src/test_kits/block_writer.rs
index 7bd9b7bed0729..6f81b9a8f1dbe 100644
--- a/src/query/service/src/test_kits/block_writer.rs
+++ b/src/query/service/src/test_kits/block_writer.rs
@@ -110,6 +110,8 @@ impl<'a> BlockWriter<'a> {
             None,
             None,
             None,
+            None,
+            None,
             Compression::Lz4Raw,
             Some(Utc::now()),
         );
diff --git a/src/query/service/tests/it/storages/fuse/bloom_index_meta_size.rs b/src/query/service/tests/it/storages/fuse/bloom_index_meta_size.rs
index d16d635d824d7..520231794d2cb 100644
--- a/src/query/service/tests/it/storages/fuse/bloom_index_meta_size.rs
+++ b/src/query/service/tests/it/storages/fuse/bloom_index_meta_size.rs
@@ -337,6 +337,8 @@ fn build_test_segment_info(
         bloom_filter_index_size: 0,
         inverted_index_size: None,
         ngram_filter_index_size: None,
+        vector_index_size: None,
+        vector_index_location: None,
         virtual_block_meta: None,
         compression: Compression::Lz4,
         create_on: Some(Utc::now()),
diff --git a/src/query/service/tests/it/storages/fuse/operations/internal_column.rs b/src/query/service/tests/it/storages/fuse/operations/internal_column.rs
index 0300a5e1b8f03..805b24432413b 100644
--- a/src/query/service/tests/it/storages/fuse/operations/internal_column.rs
+++ b/src/query/service/tests/it/storages/fuse/operations/internal_column.rs
@@ -65,6 +65,7 @@ fn expected_data_block(
             base_block_ids: None,
             inner: None,
             matched_rows: block_meta.matched_rows.clone(),
+            vector_scores: block_meta.vector_scores.clone(),
         };
         for internal_column in internal_columns {
             let column = internal_column.generate_column_values(&internal_column_meta, num_rows);
diff --git a/src/query/service/tests/it/storages/fuse/operations/mutation/recluster_mutator.rs b/src/query/service/tests/it/storages/fuse/operations/mutation/recluster_mutator.rs
index ab4ea10645464..97c3e9cf85f0d 100644
--- a/src/query/service/tests/it/storages/fuse/operations/mutation/recluster_mutator.rs
+++ b/src/query/service/tests/it/storages/fuse/operations/mutation/recluster_mutator.rs
@@ -79,6 +79,8 @@ async fn test_recluster_mutator_block_select() -> Result<()> {
             None,
             None,
             None,
+            None,
+            None,
             meta::Compression::Lz4Raw,
             Some(Utc::now()),
         ));
diff --git a/src/query/service/tests/it/storages/fuse/operations/mutation/segments_compact_mutator.rs b/src/query/service/tests/it/storages/fuse/operations/mutation/segments_compact_mutator.rs
index 97fbcebc9e0eb..ebdcabfd13662 100644
--- a/src/query/service/tests/it/storages/fuse/operations/mutation/segments_compact_mutator.rs
+++ b/src/query/service/tests/it/storages/fuse/operations/mutation/segments_compact_mutator.rs
@@ -779,6 +779,8 @@ impl CompactSegmentTestFixture {
                         None,
                         None,
                         None,
+                        None,
+                        None,
                         Compression::Lz4Raw,
                         Some(Utc::now()),
                     );
diff --git a/src/query/service/tests/it/storages/fuse/operations/read_plan.rs b/src/query/service/tests/it/storages/fuse/operations/read_plan.rs
index fd2ee9c0d6914..09dccdb11816e 100644
--- a/src/query/service/tests/it/storages/fuse/operations/read_plan.rs
+++ b/src/query/service/tests/it/storages/fuse/operations/read_plan.rs
@@ -105,6 +105,8 @@ fn test_to_partitions() -> Result<()> {
         None,
         None,
         None,
+        None,
+        None,
         meta::Compression::Lz4Raw,
         Some(Utc::now()),
     ));
diff --git a/src/query/service/tests/it/storages/fuse/statistics.rs b/src/query/service/tests/it/storages/fuse/statistics.rs
index 64c1f89b04d3e..c87b524f9217f 100644
--- a/src/query/service/tests/it/storages/fuse/statistics.rs
+++ b/src/query/service/tests/it/storages/fuse/statistics.rs
@@ -635,6 +635,8 @@ fn test_reduce_block_meta() -> databend_common_exception::Result<()> {
             None,
             None,
             None,
+            None,
+            None,
             Compression::Lz4Raw,
             Some(Utc::now()),
         );
diff --git a/src/query/service/tests/it/storages/testdata/configs_table_basic.txt b/src/query/service/tests/it/storages/testdata/configs_table_basic.txt
index 5b4a92ad7b69b..0cef36101265a 100644
--- a/src/query/service/tests/it/storages/testdata/configs_table_basic.txt
+++ b/src/query/service/tests/it/storages/testdata/configs_table_basic.txt
@@ -32,6 +32,9 @@ DB.Table: 'system'.'configs', Table: configs-table_id:1, ver:0, Engine: SystemCo
 | 'cache'   | 'table_meta_snapshot_count'                          | '256'                                                                                                                                                                                                     | ''       |
 | 'cache'   | 'table_meta_statistic_count'                         | '256'                                                                                                                                                                                                     | ''       |
 | 'cache'   | 'table_prune_partitions_count'                       | '256'                                                                                                                                                                                                     | ''       |
+| 'cache'   | 'vector_index_filter_memory_ratio'                   | '0'                                                                                                                                                                                                       | ''       |
+| 'cache'   | 'vector_index_filter_size'                           | '2147483648'                                                                                                                                                                                              | ''       |
+| 'cache'   | 'vector_index_meta_count'                            | '3000'                                                                                                                                                                                                    | ''       |
 | 'log'     | 'dir'                                                | './.databend/logs'                                                                                                                                                                                        | ''       |
 | 'log'     | 'file.dir'                                           | './.databend/logs'                                                                                                                                                                                        | ''       |
 | 'log'     | 'file.format'                                        | 'text'                                                                                                                                                                                                    | ''       |
diff --git a/src/query/sql/src/executor/physical_plans/physical_table_scan.rs b/src/query/sql/src/executor/physical_plans/physical_table_scan.rs
index 0541b88c9abd1..ea303927eda2b 100644
--- a/src/query/sql/src/executor/physical_plans/physical_table_scan.rs
+++ b/src/query/sql/src/executor/physical_plans/physical_table_scan.rs
@@ -564,6 +564,7 @@ impl PhysicalPlanBuilder {
             agg_index: None,
             change_type: scan.change_type.clone(),
             inverted_index: scan.inverted_index.clone(),
+            vector_index: scan.vector_index.clone(),
             sample: scan.sample.clone(),
         })
     }
diff --git a/src/query/sql/src/planner/binder/bind_context.rs b/src/query/sql/src/planner/binder/bind_context.rs
index 2cf026e08983e..e3015508ce3ad 100644
--- a/src/query/sql/src/planner/binder/bind_context.rs
+++ b/src/query/sql/src/planner/binder/bind_context.rs
@@ -25,6 +25,7 @@ use databend_common_ast::ast::WindowSpec;
 use databend_common_ast::Span;
 use databend_common_catalog::plan::InternalColumn;
 use databend_common_catalog::plan::InvertedIndexInfo;
+use databend_common_catalog::plan::VectorIndexInfo;
 use databend_common_exception::ErrorCode;
 use databend_common_exception::Result;
 use databend_common_expression::ColumnId;
@@ -143,6 +144,8 @@ pub struct BindContext {
 
     pub inverted_index_map: Box<IndexMap<IndexType, InvertedIndexInfo>>,
 
+    pub vector_index_map: Box<IndexMap<IndexType, VectorIndexInfo>>,
+
     /// Whether allow rewrite as virtual column and pushdown.
     pub allow_virtual_column: bool,
 
@@ -217,6 +220,7 @@ impl BindContext {
             have_udf_script: false,
             have_udf_server: false,
             inverted_index_map: Box::default(),
+            vector_index_map: Box::default(),
             allow_virtual_column: false,
             expr_context: ExprContext::default(),
             planning_agg_index: false,
@@ -261,6 +265,7 @@ impl BindContext {
             have_udf_script: false,
             have_udf_server: false,
             inverted_index_map: Box::default(),
+            vector_index_map: Box::default(),
             allow_virtual_column: parent.allow_virtual_column,
             expr_context: ExprContext::default(),
             planning_agg_index: false,
diff --git a/src/query/sql/src/planner/binder/bind_mutation/mutation_expression.rs b/src/query/sql/src/planner/binder/bind_mutation/mutation_expression.rs
index 0938ea87dda2c..85ba4317ee0f5 100644
--- a/src/query/sql/src/planner/binder/bind_mutation/mutation_expression.rs
+++ b/src/query/sql/src/planner/binder/bind_mutation/mutation_expression.rs
@@ -465,7 +465,7 @@ impl Binder {
 
         let row_id_index: usize = column_binding.index;
 
-        *expr = expr.add_column_index_to_scans(table_index, row_id_index, &None);
+        *expr = expr.add_column_index_to_scans(table_index, row_id_index, &None, &None);
 
         self.metadata
             .write()
diff --git a/src/query/sql/src/planner/binder/binder.rs b/src/query/sql/src/planner/binder/binder.rs
index 51d18d059ed39..4430dddf37ce2 100644
--- a/src/query/sql/src/planner/binder/binder.rs
+++ b/src/query/sql/src/planner/binder/binder.rs
@@ -1105,13 +1105,20 @@ impl<'a> Binder {
                     .to_string(),
             ));
         }
+        let mut vector_index_map = mem::take(&mut bind_context.vector_index_map);
 
         for ((table_index, _), column_index) in bound_internal_columns.iter() {
             let inverted_index = inverted_index_map.shift_remove(table_index).map(|mut i| {
                 i.has_score = has_score;
                 i
             });
-            s_expr = s_expr.add_column_index_to_scans(*table_index, *column_index, &inverted_index);
+            let vector_index = vector_index_map.shift_remove(table_index);
+            s_expr = s_expr.add_column_index_to_scans(
+                *table_index,
+                *column_index,
+                &inverted_index,
+                &vector_index,
+            );
         }
         Ok(s_expr)
     }
diff --git a/src/query/sql/src/planner/binder/ddl/index.rs b/src/query/sql/src/planner/binder/ddl/index.rs
index dbf5c8bde93a6..b872de8bfd37f 100644
--- a/src/query/sql/src/planner/binder/ddl/index.rs
+++ b/src/query/sql/src/planner/binder/ddl/index.rs
@@ -45,6 +45,7 @@ use databend_common_meta_app::schema::IndexNameIdent;
 use databend_storages_common_table_meta::meta::Location;
 use derive_visitor::Drive;
 use derive_visitor::DriveMut;
+use itertools::Itertools;
 
 use crate::binder::Binder;
 use crate::optimizer::optimize;
@@ -104,6 +105,19 @@ fn is_valid_index_record_values<S: AsRef<str>>(opt_val: S) -> bool {
     INDEX_RECORD_VALUES.contains(opt_val.as_ref())
 }
 
+// valid values for vector index distance
+static INDEX_DISTANCE_VALUES: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
+    let mut r = HashSet::new();
+    r.insert("cosine");
+    r.insert("l1");
+    r.insert("l2");
+    r
+});
+
+fn is_valid_index_distance_values<S: AsRef<str>>(opt_val: S) -> bool {
+    INDEX_DISTANCE_VALUES.contains(opt_val.as_ref())
+}
+
 impl Binder {
     #[async_backtrace::framed]
     pub(in crate::planner::binder) async fn bind_query_index(
@@ -750,6 +764,23 @@ impl Binder {
                     }
                     options.insert("ef_construct".to_string(), value);
                 }
+                "distance" => {
+                    let raw_distances: Vec<&str> = value.split(',').collect();
+                    let mut distances = BTreeSet::new();
+                    for raw_distance in raw_distances {
+                        let distance = raw_distance.trim();
+                        if !is_valid_index_distance_values(distance) {
+                            return Err(ErrorCode::IndexOptionInvalid(format!(
+                                "value `{distance}` is invalid index distance type",
+                            )));
+                        }
+                        distances.insert(distance);
+                    }
+                    options.insert(
+                        "distance".to_string(),
+                        distances.into_iter().join(",").to_string(),
+                    );
+                }
                 _ => {
                     return Err(ErrorCode::IndexOptionInvalid(format!(
                         "index option `{key}` is invalid key for create vector index statement",
@@ -757,6 +788,12 @@ impl Binder {
                 }
             }
         }
+        if !options.contains_key("distance") {
+            return Err(ErrorCode::IndexOptionInvalid(
+                "must specify `distance` option, valid values are: `cosine`, `l1` and `l2`"
+                    .to_string(),
+            ));
+        }
         Ok(options)
     }
 
diff --git a/src/query/sql/src/planner/binder/internal_column_factory.rs b/src/query/sql/src/planner/binder/internal_column_factory.rs
index e43f56fe01608..90fc2a2db5e58 100644
--- a/src/query/sql/src/planner/binder/internal_column_factory.rs
+++ b/src/query/sql/src/planner/binder/internal_column_factory.rs
@@ -27,6 +27,7 @@ use databend_common_expression::SEARCH_MATCHED_COL_NAME;
 use databend_common_expression::SEARCH_SCORE_COL_NAME;
 use databend_common_expression::SEGMENT_NAME_COL_NAME;
 use databend_common_expression::SNAPSHOT_NAME_COL_NAME;
+use databend_common_expression::VECTOR_SCORE_COL_NAME;
 
 #[ctor]
 pub static INTERNAL_COLUMN_FACTORY: InternalColumnFactory = InternalColumnFactory::init();
@@ -79,6 +80,11 @@ impl InternalColumnFactory {
             InternalColumn::new(SEARCH_SCORE_COL_NAME, InternalColumnType::SearchScore),
         );
 
+        internal_columns.insert(
+            VECTOR_SCORE_COL_NAME.to_string(),
+            InternalColumn::new(VECTOR_SCORE_COL_NAME, InternalColumnType::VectorScore),
+        );
+
         internal_columns.insert(
             FILENAME_COLUMN_NAME.to_string(),
             InternalColumn::new(FILENAME_COLUMN_NAME, InternalColumnType::FileName),
diff --git a/src/query/sql/src/planner/binder/table.rs b/src/query/sql/src/planner/binder/table.rs
index c659a4b12910a..b63ae6d0cf0f5 100644
--- a/src/query/sql/src/planner/binder/table.rs
+++ b/src/query/sql/src/planner/binder/table.rs
@@ -187,6 +187,7 @@ impl Binder {
             have_udf_script: false,
             have_udf_server: false,
             inverted_index_map: Box::default(),
+            vector_index_map: Box::default(),
             allow_virtual_column: false,
             expr_context: ExprContext::default(),
             planning_agg_index: false,
diff --git a/src/query/sql/src/planner/optimizer/ir/expr/s_expr.rs b/src/query/sql/src/planner/optimizer/ir/expr/s_expr.rs
index 4900fe5167077..a2fd2194e2400 100644
--- a/src/query/sql/src/planner/optimizer/ir/expr/s_expr.rs
+++ b/src/query/sql/src/planner/optimizer/ir/expr/s_expr.rs
@@ -17,6 +17,7 @@ use std::sync::Arc;
 use std::sync::Mutex;
 
 use databend_common_catalog::plan::InvertedIndexInfo;
+use databend_common_catalog::plan::VectorIndexInfo;
 use databend_common_exception::ErrorCode;
 use databend_common_exception::Result;
 use educe::Educe;
@@ -401,6 +402,7 @@ impl SExpr {
         table_index: IndexType,
         column_index: IndexType,
         inverted_index: &Option<InvertedIndexInfo>,
+        vector_index: &Option<VectorIndexInfo>,
     ) -> SExpr {
         #[recursive::recursive]
         fn add_column_index_to_scans_recursive(
@@ -408,6 +410,7 @@ impl SExpr {
             column_index: IndexType,
             table_index: IndexType,
             inverted_index: &Option<InvertedIndexInfo>,
+            vector_index: &Option<VectorIndexInfo>,
         ) -> SExpr {
             let mut s_expr = s_expr.clone();
             s_expr.plan = if let RelOperator::Scan(mut p) = (*s_expr.plan).clone() {
@@ -416,6 +419,9 @@ impl SExpr {
                     if inverted_index.is_some() {
                         p.inverted_index = inverted_index.clone();
                     }
+                    if vector_index.is_some() {
+                        p.vector_index = vector_index.clone();
+                    }
                 }
                 Arc::new(p.into())
             } else {
@@ -432,6 +438,7 @@ impl SExpr {
                         column_index,
                         table_index,
                         inverted_index,
+                        vector_index,
                     )));
                 }
 
@@ -441,7 +448,13 @@ impl SExpr {
             }
         }
 
-        add_column_index_to_scans_recursive(self, column_index, table_index, inverted_index)
+        add_column_index_to_scans_recursive(
+            self,
+            column_index,
+            table_index,
+            inverted_index,
+            vector_index,
+        )
     }
 
     // The method will clear the applied rules of current SExpr and its children.
diff --git a/src/query/sql/src/planner/plans/scan.rs b/src/query/sql/src/planner/plans/scan.rs
index 2f2227bf16a96..d7a8a0d3f136f 100644
--- a/src/query/sql/src/planner/plans/scan.rs
+++ b/src/query/sql/src/planner/plans/scan.rs
@@ -18,6 +18,7 @@ use std::sync::Arc;
 
 use databend_common_ast::ast::SampleConfig;
 use databend_common_catalog::plan::InvertedIndexInfo;
+use databend_common_catalog::plan::VectorIndexInfo;
 use databend_common_catalog::statistics::BasicColumnStatistics;
 use databend_common_catalog::table::TableStatistics;
 use databend_common_catalog::table_context::TableContext;
@@ -104,6 +105,7 @@ pub struct Scan {
     // Whether to update stream columns.
     pub update_stream_columns: bool,
     pub inverted_index: Option<InvertedIndexInfo>,
+    pub vector_index: Option<VectorIndexInfo>,
     // Lazy row fetch.
     pub is_lazy_table: bool,
     pub sample: Option<SampleConfig>,
@@ -146,6 +148,7 @@ impl Scan {
             change_type: self.change_type.clone(),
             update_stream_columns: self.update_stream_columns,
             inverted_index: self.inverted_index.clone(),
+            vector_index: self.vector_index.clone(),
             is_lazy_table: self.is_lazy_table,
             sample: self.sample.clone(),
             scan_id: self.scan_id,
diff --git a/src/query/sql/src/planner/semantic/type_check.rs b/src/query/sql/src/planner/semantic/type_check.rs
index 906bdcb56ebc6..2278bf26d9405 100644
--- a/src/query/sql/src/planner/semantic/type_check.rs
+++ b/src/query/sql/src/planner/semantic/type_check.rs
@@ -56,6 +56,7 @@ use databend_common_catalog::plan::InternalColumn;
 use databend_common_catalog::plan::InternalColumnType;
 use databend_common_catalog::plan::InvertedIndexInfo;
 use databend_common_catalog::plan::InvertedIndexOption;
+use databend_common_catalog::plan::VectorIndexInfo;
 use databend_common_catalog::table_context::TableContext;
 use databend_common_compress::CompressAlgorithm;
 use databend_common_compress::DecompressDecoder;
@@ -77,6 +78,7 @@ use databend_common_expression::types::Decimal;
 use databend_common_expression::types::NumberDataType;
 use databend_common_expression::types::NumberScalar;
 use databend_common_expression::types::F32;
+use databend_common_expression::Column;
 use databend_common_expression::ColumnIndex;
 use databend_common_expression::Constant;
 use databend_common_expression::ConstantFolder;
@@ -90,6 +92,7 @@ use databend_common_expression::Scalar;
 use databend_common_expression::TableDataType;
 use databend_common_expression::SEARCH_MATCHED_COL_NAME;
 use databend_common_expression::SEARCH_SCORE_COL_NAME;
+use databend_common_expression::VECTOR_SCORE_COL_NAME;
 use databend_common_functions::aggregates::AggregateFunctionFactory;
 use databend_common_functions::is_builtin_function;
 use databend_common_functions::ASYNC_FUNCTIONS;
@@ -99,6 +102,8 @@ use databend_common_functions::GENERAL_SEARCH_FUNCTIONS;
 use databend_common_functions::GENERAL_WINDOW_FUNCTIONS;
 use databend_common_functions::GENERAL_WITHIN_GROUP_FUNCTIONS;
 use databend_common_functions::RANK_WINDOW_FUNCTIONS;
+use databend_common_license::license::Feature;
+use databend_common_license::license_manager::LicenseManagerSwitch;
 use databend_common_meta_app::principal::LambdaUDF;
 use databend_common_meta_app::principal::UDAFScript;
 use databend_common_meta_app::principal::UDFDefinition;
@@ -108,6 +113,7 @@ use databend_common_meta_app::schema::dictionary_name_ident::DictionaryNameIdent
 use databend_common_meta_app::schema::DictionaryIdentity;
 use databend_common_meta_app::schema::GetSequenceReq;
 use databend_common_meta_app::schema::SequenceIdent;
+use databend_common_meta_app::schema::TableIndexType;
 use databend_common_storage::init_stage_operator;
 use databend_common_users::UserApiProvider;
 use derive_visitor::Drive;
@@ -2705,6 +2711,9 @@ impl<'a> TypeChecker<'a> {
         let mut index_schema = None;
         let mut index_options = BTreeMap::new();
         for table_index in table_indexes.values() {
+            if table_index.index_type != TableIndexType::Inverted {
+                continue;
+            }
             if column_ids
                 .iter()
                 .all(|id| table_index.column_ids.contains(id))
@@ -2922,6 +2931,11 @@ impl<'a> TypeChecker<'a> {
         {
             return rewritten_variant_expr;
         }
+        if let Some(rewritten_vector_expr) =
+            self.try_rewrite_vector_function(span, func_name, &args)
+        {
+            return rewritten_vector_expr;
+        }
 
         self.resolve_scalar_function_call(span, func_name, params, args)
     }
@@ -4368,6 +4382,189 @@ impl<'a> TypeChecker<'a> {
         None
     }
 
+    fn vector_functions() -> &'static [Ascii<&'static str>] {
+        static VECTOR_FUNCTIONS: &[Ascii<&'static str>] = &[
+            Ascii::new("cosine_distance"),
+            Ascii::new("l1_distance"),
+            Ascii::new("l2_distance"),
+        ];
+        VECTOR_FUNCTIONS
+    }
+
+    fn try_rewrite_vector_function(
+        &mut self,
+        span: Span,
+        func_name: &str,
+        args: &[ScalarExpr],
+    ) -> Option<Result<Box<(ScalarExpr, DataType)>>> {
+        // Try rewrite vector distance function to vector score internal column,
+        // so that the vector index can be used to accelerate the query.
+        let uni_case_func_name = Ascii::new(func_name);
+        if Self::vector_functions().contains(&uni_case_func_name) {
+            match args {
+                [ScalarExpr::BoundColumnRef(BoundColumnRef {
+                    column:
+                        ColumnBinding {
+                            table_index,
+                            database_name,
+                            table_name,
+                            column_name,
+                            data_type,
+                            ..
+                        },
+                    ..
+                }), ScalarExpr::CastExpr(CastExpr {
+                    argument,
+                    target_type,
+                    ..
+                })]
+                | [ScalarExpr::CastExpr(CastExpr {
+                    argument,
+                    target_type,
+                    ..
+                }), ScalarExpr::BoundColumnRef(BoundColumnRef {
+                    column:
+                        ColumnBinding {
+                            table_index,
+                            database_name,
+                            table_name,
+                            column_name,
+                            data_type,
+                            ..
+                        },
+                    ..
+                })] => {
+                    let col_data_type = data_type.remove_nullable();
+                    if table_index.is_some()
+                        && matches!(col_data_type, DataType::Vector(_))
+                        && matches!(&**argument, ScalarExpr::ConstantExpr(_))
+                        && matches!(&**target_type, DataType::Vector(_))
+                        && LicenseManagerSwitch::instance()
+                            .check_enterprise_enabled(
+                                self.ctx.get_license_key(),
+                                Feature::VectorIndex,
+                            )
+                            .is_ok()
+                    {
+                        let table_index = table_index.unwrap();
+                        let table_entry = self.metadata.read().table(table_index).clone();
+                        let table = table_entry.table();
+                        let table_info = table.get_table_info();
+                        let table_schema = table_info.schema();
+                        let table_indexes = &table_info.meta.indexes;
+                        if self
+                            .bind_context
+                            .vector_index_map
+                            .contains_key(&table_index)
+                        {
+                            return None;
+                        }
+                        let Ok(column_id) = table_schema.column_id_of(column_name) else {
+                            return None;
+                        };
+                        for vector_index in table_indexes.values() {
+                            if vector_index.index_type != TableIndexType::Vector {
+                                continue;
+                            }
+                            let Some(distances) = vector_index.options.get("distance") else {
+                                continue;
+                            };
+                            // distance_type must match function name
+                            let mut matched_distance = false;
+                            let distance_types: Vec<&str> = distances.split(',').collect();
+                            for distance_type in distance_types {
+                                if func_name.starts_with(distance_type) {
+                                    matched_distance = true;
+                                    break;
+                                }
+                            }
+                            if !matched_distance {
+                                continue;
+                            }
+                            if vector_index.column_ids.contains(&column_id) {
+                                let internal_column = InternalColumn::new(
+                                    VECTOR_SCORE_COL_NAME,
+                                    InternalColumnType::VectorScore,
+                                );
+                                let internal_column_binding = InternalColumnBinding {
+                                    database_name: database_name.clone(),
+                                    table_name: table_name.clone(),
+                                    internal_column,
+                                };
+                                let Ok(column_binding) =
+                                    self.bind_context.add_internal_column_binding(
+                                        &internal_column_binding,
+                                        self.metadata.clone(),
+                                        Some(table_index),
+                                        false,
+                                    )
+                                else {
+                                    return None;
+                                };
+
+                                let new_column = ScalarExpr::BoundColumnRef(BoundColumnRef {
+                                    span,
+                                    column: column_binding,
+                                });
+
+                                let arg = ConstantExpr::try_from(*argument.clone()).unwrap();
+                                let Scalar::Array(arg_col) = arg.value else {
+                                    return None;
+                                };
+
+                                let col_vector_type = col_data_type.as_vector().unwrap();
+                                let col_dimension = col_vector_type.dimension() as usize;
+                                let arg_vector_type = target_type.as_vector().unwrap();
+                                let arg_dimension = arg_vector_type.dimension() as usize;
+                                if col_dimension != arg_dimension || arg_col.len() != col_dimension
+                                {
+                                    return None;
+                                }
+                                let mut query_values = Vec::with_capacity(arg_col.len());
+                                match arg_col {
+                                    Column::Number(num_col) => {
+                                        for i in 0..num_col.len() {
+                                            let num = unsafe { num_col.index_unchecked(i) };
+                                            query_values.push(num.to_f32());
+                                        }
+                                    }
+                                    Column::Decimal(dec_col) => {
+                                        for i in 0..dec_col.len() {
+                                            let dec = unsafe { dec_col.index_unchecked(i) };
+                                            query_values.push(F32::from(dec.to_float32()));
+                                        }
+                                    }
+                                    _ => {
+                                        return None;
+                                    }
+                                }
+
+                                let index_info = VectorIndexInfo {
+                                    index_name: vector_index.name.clone(),
+                                    index_version: vector_index.version.clone(),
+                                    index_options: vector_index.options.clone(),
+                                    column_id,
+                                    func_name: func_name.to_string(),
+                                    query_values,
+                                };
+                                self.bind_context
+                                    .vector_index_map
+                                    .insert(table_index, index_info);
+
+                                return Some(Ok(Box::new((
+                                    new_column,
+                                    DataType::Number(NumberDataType::Float32),
+                                ))));
+                            }
+                        }
+                    }
+                }
+                _ => {}
+            }
+        }
+        None
+    }
+
     fn resolve_trim_function(
         &mut self,
         span: Span,
diff --git a/src/query/storages/common/cache/src/cache_items.rs b/src/query/storages/common/cache/src/cache_items.rs
index 3b3df7f23f7b7..1f39fd50c10ef 100644
--- a/src/query/storages/common/cache/src/cache_items.rs
+++ b/src/query/storages/common/cache/src/cache_items.rs
@@ -21,6 +21,8 @@ pub use databend_storages_common_index::filters::FilterImpl;
 pub use databend_storages_common_index::BloomIndexMeta;
 pub use databend_storages_common_index::InvertedIndexFile;
 pub use databend_storages_common_index::InvertedIndexMeta;
+pub use databend_storages_common_index::VectorIndexFile;
+pub use databend_storages_common_index::VectorIndexMeta;
 pub use databend_storages_common_table_meta::meta::column_oriented_segment::ColumnOrientedSegment;
 pub use databend_storages_common_table_meta::meta::BlockMeta;
 pub use databend_storages_common_table_meta::meta::CompactSegmentInfo;
diff --git a/src/query/storages/common/cache/src/caches.rs b/src/query/storages/common/cache/src/caches.rs
index cfe565a00349b..944462e5bd4bc 100644
--- a/src/query/storages/common/cache/src/caches.rs
+++ b/src/query/storages/common/cache/src/caches.rs
@@ -52,6 +52,9 @@ pub type BloomIndexMetaCache = HybridCache<BloomIndexMeta>;
 pub type InvertedIndexMetaCache = InMemoryLruCache<InvertedIndexMeta>;
 pub type InvertedIndexFileCache = InMemoryLruCache<InvertedIndexFile>;
 
+pub type VectorIndexMetaCache = InMemoryLruCache<VectorIndexMeta>;
+pub type VectorIndexFileCache = InMemoryLruCache<VectorIndexFile>;
+
 /// In memory object cache of parquet FileMetaData of external parquet rs files
 pub type ParquetMetaDataCache = InMemoryLruCache<ParquetMetaData>;
 
@@ -151,6 +154,20 @@ impl CachedObject<InvertedIndexMeta> for InvertedIndexMeta {
     }
 }
 
+impl CachedObject<VectorIndexFile> for VectorIndexFile {
+    type Cache = VectorIndexFileCache;
+    fn cache() -> Option<Self::Cache> {
+        CacheManager::instance().get_vector_index_file_cache()
+    }
+}
+
+impl CachedObject<VectorIndexMeta> for VectorIndexMeta {
+    type Cache = VectorIndexMetaCache;
+    fn cache() -> Option<Self::Cache> {
+        CacheManager::instance().get_vector_index_meta_cache()
+    }
+}
+
 pub struct CacheValue<T> {
     inner: Arc<T>,
     mem_bytes: usize,
@@ -280,6 +297,24 @@ impl From<InvertedIndexFile> for CacheValue<InvertedIndexFile> {
     }
 }
 
+impl From<VectorIndexMeta> for CacheValue<VectorIndexMeta> {
+    fn from(value: VectorIndexMeta) -> Self {
+        CacheValue {
+            inner: Arc::new(value),
+            mem_bytes: 0,
+        }
+    }
+}
+
+impl From<VectorIndexFile> for CacheValue<VectorIndexFile> {
+    fn from(value: VectorIndexFile) -> Self {
+        CacheValue {
+            mem_bytes: std::mem::size_of::<VectorIndexFile>() + value.data.len(),
+            inner: Arc::new(value),
+        }
+    }
+}
+
 impl From<ParquetMetaData> for CacheValue<ParquetMetaData> {
     fn from(value: ParquetMetaData) -> Self {
         CacheValue {
diff --git a/src/query/storages/common/cache/src/manager.rs b/src/query/storages/common/cache/src/manager.rs
index 1f7f12e91a335..2e4aeb6b0fb6b 100644
--- a/src/query/storages/common/cache/src/manager.rs
+++ b/src/query/storages/common/cache/src/manager.rs
@@ -42,6 +42,8 @@ use crate::caches::PrunePartitionsCache;
 use crate::caches::SegmentBlockMetasCache;
 use crate::caches::TableSnapshotCache;
 use crate::caches::TableSnapshotStatisticCache;
+use crate::caches::VectorIndexFileCache;
+use crate::caches::VectorIndexMetaCache;
 use crate::providers::HybridCache;
 use crate::providers::HybridCacheExt;
 use crate::CacheAccessor;
@@ -104,6 +106,8 @@ pub struct CacheManager {
     bloom_index_meta_cache: CacheSlot<BloomIndexMetaCache>,
     inverted_index_meta_cache: CacheSlot<InvertedIndexMetaCache>,
     inverted_index_file_cache: CacheSlot<InvertedIndexFileCache>,
+    vector_index_meta_cache: CacheSlot<VectorIndexMetaCache>,
+    vector_index_file_cache: CacheSlot<VectorIndexFileCache>,
     prune_partitions_cache: CacheSlot<PrunePartitionsCache>,
     parquet_meta_data_cache: CacheSlot<ParquetMetaDataCache>,
     in_memory_table_data_cache: CacheSlot<ColumnArrayCache>,
@@ -223,6 +227,8 @@ impl CacheManager {
                 column_oriented_segment_info_cache: CacheSlot::new(None),
                 inverted_index_meta_cache: CacheSlot::new(None),
                 inverted_index_file_cache: CacheSlot::new(None),
+                vector_index_meta_cache: CacheSlot::new(None),
+                vector_index_file_cache: CacheSlot::new(None),
                 prune_partitions_cache: CacheSlot::new(None),
                 parquet_meta_data_cache: CacheSlot::new(None),
                 table_statistic_cache: CacheSlot::new(None),
@@ -302,6 +308,25 @@ impl CacheManager {
                 MEMORY_CACHE_INVERTED_INDEX_FILE,
                 inverted_index_file_size,
             );
+
+            let vector_index_meta_cache = Self::new_items_cache_slot(
+                MEMORY_CACHE_VECTOR_INDEX_FILE_META_DATA,
+                config.vector_index_meta_count as usize,
+            );
+
+            // setup in-memory vector index filter cache
+            let vector_index_file_size = if config.vector_index_filter_memory_ratio != 0 {
+                (*max_server_memory_usage as usize)
+                    * config.vector_index_filter_memory_ratio as usize
+                    / 100
+            } else {
+                config.vector_index_filter_size as usize
+            };
+            let vector_index_file_cache = Self::new_bytes_cache_slot(
+                MEMORY_CACHE_VECTOR_INDEX_FILE,
+                vector_index_file_size,
+            );
+
             let prune_partitions_cache = Self::new_items_cache_slot(
                 MEMORY_CACHE_PRUNE_PARTITIONS,
                 config.table_prune_partitions_count as usize,
@@ -335,6 +360,8 @@ impl CacheManager {
                 bloom_index_meta_cache,
                 inverted_index_meta_cache,
                 inverted_index_file_cache,
+                vector_index_meta_cache,
+                vector_index_file_cache,
                 prune_partitions_cache,
                 table_statistic_cache,
                 in_memory_table_data_cache,
@@ -417,6 +444,14 @@ impl CacheManager {
                 let cache = &self.inverted_index_meta_cache;
                 Self::set_items_capacity(cache, new_capacity, name);
             }
+            MEMORY_CACHE_VECTOR_INDEX_FILE => {
+                let cache = &self.vector_index_file_cache;
+                Self::set_bytes_capacity(cache, new_capacity, name);
+            }
+            MEMORY_CACHE_VECTOR_INDEX_FILE_META_DATA => {
+                let cache = &self.vector_index_meta_cache;
+                Self::set_items_capacity(cache, new_capacity, name);
+            }
             HYBRID_CACHE_BLOOM_INDEX_FILE_META_DATA
             | IN_MEMORY_CACHE_BLOOM_INDEX_FILE_META_DATA => {
                 Self::set_hybrid_cache_items_capacity(
@@ -593,6 +628,14 @@ impl CacheManager {
         self.inverted_index_file_cache.get()
     }
 
+    pub fn get_vector_index_meta_cache(&self) -> Option<VectorIndexMetaCache> {
+        self.vector_index_meta_cache.get()
+    }
+
+    pub fn get_vector_index_file_cache(&self) -> Option<VectorIndexFileCache> {
+        self.vector_index_file_cache.get()
+    }
+
     pub fn get_prune_partitions_cache(&self) -> Option<PrunePartitionsCache> {
         self.prune_partitions_cache.get()
     }
@@ -736,6 +779,8 @@ const MEMORY_CACHE_PRUNE_PARTITIONS: &str = "memory_cache_prune_partitions";
 const MEMORY_CACHE_INVERTED_INDEX_FILE: &str = "memory_cache_inverted_index_file";
 const MEMORY_CACHE_INVERTED_INDEX_FILE_META_DATA: &str =
     "memory_cache_inverted_index_file_meta_data";
+const MEMORY_CACHE_VECTOR_INDEX_FILE: &str = "memory_cache_vector_index_file";
+const MEMORY_CACHE_VECTOR_INDEX_FILE_META_DATA: &str = "memory_cache_vector_index_file_meta_data";
 
 const HYBRID_CACHE_BLOOM_INDEX_FILE_META_DATA: &str = "cache_bloom_index_file_meta_data";
 const HYBRID_CACHE_COLUMN_DATA: &str = "cache_column_data";
@@ -985,6 +1030,8 @@ mod tests {
             bloom_filter_index_size: 0,
             inverted_index_size: None,
             ngram_filter_index_size: None,
+            vector_index_location: None,
+            vector_index_size: None,
             virtual_block_meta: None,
             compression: Compression::Lz4,
             create_on: None,
diff --git a/src/query/storages/common/index/Cargo.toml b/src/query/storages/common/index/Cargo.toml
index 3fb76c396cca7..7f230d41573a1 100644
--- a/src/query/storages/common/index/Cargo.toml
+++ b/src/query/storages/common/index/Cargo.toml
@@ -14,26 +14,40 @@ databend-common-ast = { workspace = true }
 databend-common-exception = { workspace = true }
 databend-common-expression = { workspace = true }
 databend-common-functions = { workspace = true }
+databend-common-vector = { workspace = true }
 databend-storages-common-table-meta = { workspace = true }
 
 anyerror = { workspace = true }
 bincode = { workspace = true, features = ["serde"] }
+bitvec = { workspace = true }
+bytemuck = { workspace = true, features = ["derive", "extern_crate_alloc", "must_cast", "transparentwrapper_extra"] }
 bytes = { workspace = true }
 cbordata = { workspace = true }
 fastrace = { workspace = true }
+feistel-permutation-rs = { workspace = true }
 goldenfile = { workspace = true }
+itertools = { workspace = true }
 jsonb = { workspace = true }
 levenshtein_automata = { workspace = true }
 log = { workspace = true }
 match-template = { workspace = true }
+num-traits = { workspace = true }
+num_cpus = { workspace = true }
+ordered-float = { workspace = true }
+parking_lot = { workspace = true }
 parquet = { workspace = true }
+rand = { workspace = true }
+rayon = { workspace = true }
 roaring = { workspace = true }
+self_cell = { workspace = true }
 serde = { workspace = true }
+serde_json = { workspace = true }
 tantivy = { workspace = true }
 tantivy-common = { workspace = true }
 tantivy-fst = { workspace = true }
 thiserror = { workspace = true }
 xorfilter-rs = { workspace = true, features = ["cbordata"] }
+zerocopy = { workspace = true, features = ["derive"] }
 
 [dev-dependencies]
 divan = { workspace = true }
diff --git a/src/query/storages/common/index/src/hnsw_index/common/bitpacking.rs b/src/query/storages/common/index/src/hnsw_index/common/bitpacking.rs
new file mode 100644
index 0000000000000..54015bbed6b3b
--- /dev/null
+++ b/src/query/storages/common/index/src/hnsw_index/common/bitpacking.rs
@@ -0,0 +1,407 @@
+// Copyright Qdrant
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::num::NonZero;
+use std::num::Saturating;
+
+use num_traits::AsPrimitive;
+use num_traits::ConstOne;
+use num_traits::PrimInt;
+use num_traits::Unsigned;
+
+pub trait ConstBits {
+    /// The size of this integer type in bits.
+    const BITS: u32;
+}
+
+macro_rules! impl_const_bits {
+    ($($t:ty),* $(,)?) => {
+        $(
+            impl ConstBits for $t {
+                const BITS: u32 = Self::BITS;
+            }
+            impl ConstBits for NonZero<$t> {
+                const BITS: u32 = Self::BITS;
+            }
+            impl ConstBits for Saturating<$t> {
+                const BITS: u32 = Self::BITS;
+            }
+        )*
+    };
+}
+
+impl_const_bits!(i8, i16, i32, i64, i128, isize);
+impl_const_bits!(u8, u16, u32, u64, u128, usize);
+
+/// The internal buffer type for [`BitWriter`] and [`BitReader`].
+/// Instead of writing/reading a single byte at a time, they write/read
+/// `size_of::<Buf>()` bytes at once, for a better performance.
+/// This is an implementation detail and shouldn't affect the data layout.
+/// Any unsigned numeric type larger than `u32` should work.
+type Buf = u64;
+
+/// Writes bits to the `u8` vector.
+/// It's like [`std::io::Write`], but for bits rather than bytes.
+pub struct BitWriter<'a> {
+    output: &'a mut Vec<u8>,
+    buf: Buf,
+    buf_bits: u8,
+}
+
+impl<'a> BitWriter<'a> {
+    /// Create a new writer that appends bits to the `output`.
+    #[inline]
+    pub fn new(output: &'a mut Vec<u8>) -> Self {
+        Self {
+            output,
+            buf: 0,
+            buf_bits: 0,
+        }
+    }
+
+    /// Write a `value` of `bits` bits to the output.
+    ///
+    /// The `bits` must be less than or equal to 32, and the `value` must fit in
+    /// the `bits` bits.
+    #[inline]
+    pub fn write<T: ConstBits + Into<Buf>>(&mut self, value: T, bits: u8) {
+        let value = value.into();
+
+        #[cfg(test)]
+        debug_assert!(u32::from(bits) <= T::BITS && packed_bits(value) <= bits);
+
+        self.buf |= value << self.buf_bits;
+        self.buf_bits += bits;
+        if self.buf_bits >= Buf::BITS as u8 {
+            // ┌──value───┐┌───initial self.buf────┐
+            // rrrrrvvvvvvvbbbbbbbbbbbbbbbbbbbbbbbbb
+            // └[2]┘└─────────────[1]──────────────┘
+            self.output.extend_from_slice(&self.buf.to_le_bytes()); // [1]
+            self.buf_bits -= Buf::BITS as u8;
+            if bits - self.buf_bits == Buf::BITS as u8 {
+                self.buf = 0;
+            } else {
+                self.buf = value >> (bits - self.buf_bits); // [2]
+            }
+        }
+    }
+
+    /// Write the remaining bufferized bits to the output.
+    #[inline]
+    pub fn finish(self) {
+        self.output.extend_from_slice(
+            &self.buf.to_le_bytes()[..(self.buf_bits as usize).div_ceil(u8::BITS as usize)],
+        );
+    }
+}
+
+/// Reads bits from `u8` slice.
+/// It's like [`std::io::Read`], but for bits rather than bytes.
+pub struct BitReader<'a> {
+    input: &'a [u8],
+    buf: Buf,
+    buf_bits: u8,
+    mask: Buf,
+    bits: u8,
+}
+
+impl<'a> BitReader<'a> {
+    #[inline]
+    pub fn new(input: &'a [u8]) -> Self {
+        Self {
+            input,
+            buf: 0,
+            buf_bits: 0,
+            mask: 0,
+            bits: 0,
+        }
+    }
+
+    /// Configure the reader to read `bits` bits at a time. This affects
+    /// subsequent calls to [`read()`].
+    ///
+    /// The `bits` must be less than or equal to 32.
+    ///
+    /// Note: it's a separate method and not a parameter of [`read()`] to
+    /// optimize reading a group of values with the same bit size.
+    ///
+    /// [`read()`]: Self::read
+    #[inline]
+    pub fn set_bits(&mut self, bits: u8) {
+        #[cfg(test)]
+        debug_assert!(u32::from(bits) <= Buf::BITS);
+
+        self.bits = bits;
+        self.mask = make_bitmask(bits);
+    }
+
+    /// Returns the number of bits set with [`set_bits()`].
+    ///
+    /// [`set_bits()`]: Self::set_bits
+    #[inline]
+    pub fn bits(&self) -> u8 {
+        self.bits
+    }
+
+    /// Read next `bits` bits from the input. The amount of bits must be set
+    /// with [`set_bits()`] before calling this method.
+    ///
+    /// If read beyond the end of the input, the result would be an unspecified
+    /// garbage.
+    ///
+    /// [`set_bits()`]: Self::set_bits
+    #[inline]
+    pub fn read<T>(&mut self) -> T
+    where
+        T: 'static + Copy,
+        Buf: AsPrimitive<T>,
+    {
+        if self.buf_bits >= self.bits {
+            self.buf_bits -= self.bits;
+            let val = (self.buf & self.mask).as_();
+            self.buf >>= self.bits;
+            val
+        } else {
+            // Consider a naive approach:
+            //
+            //     let new_buf = read_buf_and_advance(&mut self.input);
+            //     self.buf |= new_buf << self.buf_bits; // *overflow*
+            //     self.buf_bits += size_of_val(&new_buf) * u8::BITS;
+            //     ... then proceed as usual ...
+            //
+            // For performance reasons, we want `new_buf` and `self.buf` to be
+            // both 64-bit. But when they are the same, the naive approach would
+            // overflow in the commented line. So, the following code is a trick
+            // to let us use the same type for both.
+            //
+            // ┌───────────new_buf────────────┐┌─self.buf─┐
+            // rrrrrrrrrrrrrrrrrrrrrrrrrvvvvvvvbbbbbbbbbbbb
+            // └──────────[3]──────────┘├─[2]─┘└───[1]────┤
+            //                          └───────val───────┘
+            let new_buf = read_buf_and_advance(&mut self.input);
+            let val = ((
+                // [1]
+                self.buf
+            ) | (
+                // [2]
+                new_buf << self.buf_bits
+            ) & self.mask)
+                .as_();
+            self.buf_bits += Buf::BITS as u8 - self.bits;
+            if self.buf_bits == 0 {
+                self.buf = 0;
+            } else {
+                self.buf = /*[3]*/ new_buf >> (Buf::BITS as u8 - self.buf_bits);
+            }
+            val
+        }
+    }
+}
+
+/// Read a single [`Buf`] from the `input` and advance (or not) the `input`.
+#[inline]
+fn read_buf_and_advance(input: &mut &[u8]) -> Buf {
+    let mut buf = 0;
+    if input.len() >= size_of::<Buf>() {
+        // This line translates to a single unaligned pointer read.
+        buf = Buf::from_le_bytes(input[0..size_of::<Buf>()].try_into().unwrap());
+        // This line translates to a single pointer advance.
+        *input = &input[size_of::<Buf>()..];
+    } else {
+        // We could remove this branch by explicitly using unsafe pointer
+        // operations in the branch above, but we are playing it safe here.
+        for (i, byte) in input.iter().copied().enumerate() {
+            buf |= Buf::from(byte) << (i * u8::BITS as usize);
+        }
+
+        // The following line is commented out for performance reasons as this
+        // should be the last read. If the caller will try to read input again
+        // anyway, it will get the same values again (aka "unspecified garbage"
+        // as stated in the documentation).
+        // *input = &[]; // Not needed, see the comment above.
+    }
+    buf
+}
+
+/// Minimum amount of bits required to store a value in the range
+/// `0..=max_value`.
+pub fn packed_bits<T: ConstBits + PrimInt + Unsigned>(max_value: T) -> u8 {
+    (T::BITS - max_value.leading_zeros()) as u8
+}
+
+pub fn make_bitmask<T: ConstBits + ConstOne + PrimInt + Unsigned>(bits: u8) -> T {
+    if u32::from(bits) >= T::BITS {
+        T::max_value()
+    } else {
+        (T::ONE << usize::from(bits)) - T::ONE
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::fmt::Debug;
+    use std::iter::zip;
+
+    use num_traits::ConstOne;
+    use num_traits::ConstZero;
+    use num_traits::PrimInt;
+    use num_traits::Unsigned;
+    use rand::distributions::uniform::SampleUniform;
+    use rand::rngs::StdRng;
+    use rand::Rng as _;
+    use rand::SeedableRng as _;
+
+    use super::*;
+
+    #[test]
+    fn test_simple() {
+        let mut packed = Vec::new();
+        let mut w = BitWriter::new(&mut packed);
+
+        w.write::<u32>(0b01010, 5);
+        w.write::<u32>(0b10110, 5);
+        w.write::<u32>(0b10100, 5);
+        w.write::<u32>(0b010110010, 9);
+        w.write::<u32>(0b101100001, 9);
+        w.write::<u32>(0b001001101, 9);
+        w.write::<u32>(0x12345678, 32);
+        w.finish();
+        assert_eq!(packed.len(), 10);
+
+        let mut r = BitReader::new(&packed);
+        r.set_bits(5);
+        assert_eq!(r.read::<u32>(), 0b01010);
+        assert_eq!(r.read::<u32>(), 0b10110);
+        assert_eq!(r.read::<u32>(), 0b10100);
+        r.set_bits(9);
+        assert_eq!(r.read::<u32>(), 0b010110010);
+        assert_eq!(r.read::<u32>(), 0b101100001);
+        assert_eq!(r.read::<u32>(), 0b001001101);
+        r.set_bits(32);
+        assert_eq!(r.read::<u32>(), 0x12345678);
+    }
+
+    #[test]
+    fn test_random() {
+        test_random_impl::<u8>();
+        test_random_impl::<u16>();
+        test_random_impl::<u32>();
+        test_random_impl::<u64>();
+    }
+
+    fn test_random_impl<T>()
+    where
+        Buf: AsPrimitive<T>,
+        T: ConstBits
+            + ConstOne
+            + ConstZero
+            + Copy
+            + Debug
+            + Into<Buf>
+            + PrimInt
+            + SampleUniform
+            + Unsigned
+            + 'static,
+    {
+        let mut rng = StdRng::seed_from_u64(42);
+
+        let mut bits_per_value = Vec::new();
+        let mut values = Vec::<T>::new();
+        let mut packed = Vec::new();
+        let mut unpacked = Vec::<T>::new();
+        for len in 0..40 {
+            for _ in 0..100 {
+                values.clear();
+                bits_per_value.clear();
+                let mut total_bits = 0;
+                for _ in 0..len {
+                    let bits = rng.gen_range(0u8..=T::BITS as u8);
+                    values.push(rng.gen_range(T::ZERO..=make_bitmask(bits)));
+                    bits_per_value.push(bits);
+                    total_bits += u64::from(bits);
+                }
+
+                packed.clear();
+                let mut w = BitWriter::new(&mut packed);
+                for (&x, &bits) in zip(&values, &bits_per_value) {
+                    w.write(x, bits);
+                }
+                w.finish();
+
+                assert_eq!(packed.len(), total_bits.next_multiple_of(8) as usize / 8);
+
+                unpacked.clear();
+                let mut r = BitReader::new(&packed);
+                for &bits in &bits_per_value {
+                    r.set_bits(bits);
+                    unpacked.push(r.read());
+                }
+
+                assert_eq!(values, unpacked);
+            }
+        }
+    }
+
+    #[test]
+    fn test_packed_bits_simple() {
+        assert_eq!(packed_bits(0_u32), 0);
+
+        assert_eq!(packed_bits(1_u32), 1);
+
+        assert_eq!(packed_bits(2_u32), 2);
+        assert_eq!(packed_bits(3_u32), 2);
+
+        assert_eq!(packed_bits(4_u32), 3);
+        assert_eq!(packed_bits(7_u32), 3);
+
+        assert_eq!(packed_bits(0x_7FFF_FFFF_u32), 31);
+
+        assert_eq!(packed_bits(0x_8000_0000_u32), 32);
+        assert_eq!(packed_bits(0x_FFFF_FFFF_u32), 32);
+    }
+
+    #[test]
+    fn test_packed_bits_extensive() {
+        fn check<T: Unsigned + PrimInt + ConstBits + TryFrom<u128>>(v: u128, expected_bits: u8) {
+            if let Ok(x) = v.try_into() {
+                assert_eq!(packed_bits::<T>(x), expected_bits);
+            }
+        }
+
+        for expected_bits in 0..=128_u8 {
+            let (min, max);
+            if expected_bits == 0 {
+                (min, max) = (0, 0);
+            } else {
+                min = 1_u128 << (expected_bits - 1);
+                max = (min - 1) * 2 + 1;
+            }
+
+            check::<u8>(min, expected_bits);
+            check::<u16>(min, expected_bits);
+            check::<u32>(min, expected_bits);
+            check::<u64>(min, expected_bits);
+            check::<u128>(min, expected_bits);
+            check::<usize>(min, expected_bits);
+
+            check::<u8>(max, expected_bits);
+            check::<u16>(max, expected_bits);
+            check::<u32>(max, expected_bits);
+            check::<u64>(max, expected_bits);
+            check::<u128>(max, expected_bits);
+            check::<usize>(max, expected_bits);
+        }
+    }
+}
diff --git a/src/query/storages/common/index/src/hnsw_index/common/bitpacking_links.rs b/src/query/storages/common/index/src/hnsw_index/common/bitpacking_links.rs
new file mode 100644
index 0000000000000..388a0753add57
--- /dev/null
+++ b/src/query/storages/common/index/src/hnsw_index/common/bitpacking_links.rs
@@ -0,0 +1,192 @@
+// Copyright Qdrant
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use super::bitpacking::packed_bits;
+use super::bitpacking::BitReader;
+use super::bitpacking::BitWriter;
+
+/// To simplify value counting, each value should be at least one byte.
+/// Otherwise the count could would be ambiguous, e.g., a 2-byte slice of 5-bit
+/// values could contain either 2 or 3 values.
+pub const MIN_BITS_PER_VALUE: u8 = u8::BITS as u8;
+
+/// How many bits required to store a value in range
+/// `MIN_BITS_PER_VALUE..=u32::BITS`.
+const HEADER_BITS: u8 = 5;
+
+/// A specialized packer to pack HNSW graph links.
+///
+/// It assumes that the first `m` (or `m0`) values could be re-ordered for better
+/// compression.
+///
+/// Parameters:
+/// - `bits_per_unsorted` should be enough to store the maximum point ID
+///   (it should be the same for all nodes/links within a segment).
+/// - `sorted_count` is `m` (or `m0`) for this layer.
+pub fn pack_links(
+    links: &mut Vec<u8>,
+    mut raw_links: Vec<u32>,
+    bits_per_unsorted: u8,
+    sorted_count: usize,
+) {
+    if raw_links.is_empty() {
+        return;
+    }
+
+    // Sort and delta-encode the first `sorted_count` links.
+    let sorted_count = raw_links.len().min(sorted_count);
+    raw_links[..sorted_count].sort_unstable();
+    for i in (1..sorted_count).rev() {
+        raw_links[i] -= raw_links[i - 1];
+    }
+
+    let mut w = BitWriter::new(links);
+
+    if sorted_count != 0 {
+        // 1. Header.
+        let bits_per_sorted =
+            packed_bits(*raw_links[..sorted_count].iter().max().unwrap()).max(MIN_BITS_PER_VALUE);
+        w.write(u32::from(bits_per_sorted - MIN_BITS_PER_VALUE), HEADER_BITS);
+
+        // 2. First `sorted_count` values, sorted and delta-encoded.
+        //    The bit width is determined by the header.
+        for &value in &raw_links[..sorted_count] {
+            w.write(value, bits_per_sorted);
+        }
+    }
+
+    // 3. The rest of the values, unsorted.
+    for &value in &raw_links[sorted_count..] {
+        w.write(value, bits_per_unsorted);
+    }
+
+    w.finish();
+}
+
+/// Returns an iterator over packed links.
+/// See [`pack_links`] for parameter descriptions.
+#[inline]
+pub fn iterate_packed_links(
+    links: &[u8],
+    bits_per_unsorted: u8,
+    sorted_count: usize,
+) -> PackedLinksIterator {
+    let mut reader = BitReader::new(links);
+
+    let mut remaining_bits = links.len() * u8::BITS as usize;
+    let mut remaining_bits_target = remaining_bits;
+    if sorted_count != 0 && !links.is_empty() {
+        // 1. Header.
+        reader.set_bits(HEADER_BITS);
+        let bits_per_sorted = reader.read::<u8>() + MIN_BITS_PER_VALUE;
+        remaining_bits -= HEADER_BITS as usize;
+
+        // Prepare for reading sorted values.
+        reader.set_bits(bits_per_sorted);
+        let max_sorted = remaining_bits / bits_per_sorted as usize;
+        remaining_bits_target -= sorted_count.min(max_sorted) * bits_per_sorted as usize;
+    } else {
+        // Prepare for reading unsorted values.
+        reader.set_bits(bits_per_unsorted);
+    }
+
+    PackedLinksIterator {
+        reader,
+        bits_per_unsorted,
+        remaining_bits,
+        remaining_bits_target,
+        current_delta: 0,
+    }
+}
+
+/// Iterator over links packed with [`pack_links`].
+/// Created by [`iterate_packed_links`].
+pub struct PackedLinksIterator<'a> {
+    reader: BitReader<'a>,
+    bits_per_unsorted: u8,
+    remaining_bits: usize,
+    remaining_bits_target: usize,
+    current_delta: u32,
+}
+
+impl PackedLinksIterator<'_> {
+    #[inline]
+    fn next_sorted(&mut self) -> u32 {
+        self.current_delta = self.current_delta.wrapping_add(self.reader.read::<u32>());
+        self.remaining_bits -= self.reader.bits() as usize;
+        self.current_delta
+    }
+
+    #[inline]
+    fn next_unsorted(&mut self) -> Option<u32> {
+        if let Some(rb) = self.remaining_bits.checked_sub(self.reader.bits() as usize) {
+            self.remaining_bits = rb;
+            Some(self.reader.read::<u32>())
+        } else {
+            None
+        }
+    }
+}
+
+impl Iterator for PackedLinksIterator<'_> {
+    type Item = u32;
+
+    #[inline]
+    fn next(&mut self) -> Option<u32> {
+        if self.remaining_bits > self.remaining_bits_target {
+            let value = self.next_sorted();
+            if self.remaining_bits <= self.remaining_bits_target {
+                // It was the last sorted value.
+                self.reader.set_bits(self.bits_per_unsorted);
+            }
+            return Some(value);
+        }
+
+        self.next_unsorted()
+    }
+
+    /// Optimized [`Iterator::fold()`]. Should be faster than calling
+    /// [`Iterator::next()`] in a loop.
+    ///
+    /// It is used in a hot loop during HNSW search, so performance is critical.
+    #[inline]
+    fn fold<Acc, F: FnMut(Acc, u32) -> Acc>(mut self, mut acc: Acc, mut f: F) -> Acc {
+        while self.remaining_bits > self.remaining_bits_target {
+            acc = f(acc, self.next_sorted());
+        }
+
+        self.reader.set_bits(self.bits_per_unsorted);
+        while let Some(value) = self.next_unsorted() {
+            acc = f(acc, value);
+        }
+
+        acc
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        let (sorted, unsorted);
+        if let Some(sorted_bits) = self.remaining_bits.checked_sub(self.remaining_bits_target) {
+            let sorted_bits = sorted_bits.next_multiple_of(self.reader.bits() as usize);
+            sorted = sorted_bits / self.reader.bits() as usize;
+            unsorted = (self.remaining_bits - sorted_bits) / self.bits_per_unsorted as usize;
+        } else {
+            sorted = 0;
+            unsorted = self.remaining_bits / self.reader.bits() as usize;
+        }
+        (sorted + unsorted, Some(sorted + unsorted))
+    }
+}
+
+impl ExactSizeIterator for PackedLinksIterator<'_> {}
diff --git a/src/query/storages/common/index/src/hnsw_index/common/bitpacking_ordered.rs b/src/query/storages/common/index/src/hnsw_index/common/bitpacking_ordered.rs
new file mode 100644
index 0000000000000..667185ebad02a
--- /dev/null
+++ b/src/query/storages/common/index/src/hnsw_index/common/bitpacking_ordered.rs
@@ -0,0 +1,311 @@
+// Copyright Qdrant
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! A compression algorithm to store medium-to-large-sized sorted arrays of
+//! `u64` values.
+//!
+//! Allows for fast random access within the compressed data.
+//!
+//! Assumptions:
+//! - The input values are sorted.
+//! - The distribution of the values is somewhat uniform, i.e. there are no
+//!   large gaps between values. A single gap might bloat the overall size, but
+//!   it shouldn't be worse than storing byte-aligned bases without deltas.
+//!
+//! # Format
+//!
+//! The compressed data consists of small, uniformly-sized chunks.
+//! The size of each chunk is determined by compression parameters.
+//! The compression parameters are determined automatically during compression.
+//!
+//! Each chunk contains `1 << chunk_len_log2` values: the first value (the base)
+//! is stored as is, and the rest are stored as deltas from the base. Or, more
+//! formally:
+//! - `chunk_value[0] = base` (assume `delta[0]` is 0)
+//! - `chunk_value[i] = base + delta[i]` for `i > 0`
+//!
+//! ```text
+//! ┌───────┬───────┬───────┬   ┬───────┬────────┐
+//! │chunk 0│chunk 1│chunk 2│ … │chunk X│7 × 0xFF│
+//! └───────┤       ├───────┴   ┴───────┴────────┘
+//! ╭───────╯       ╰────────────────╮
+//! │        bitpacked chunk         │
+//! ├────┬──┬──┬──┬──┬   ┬────┬──────┤
+//! │base│Δ₁│Δ₂│Δ₃│Δ₄│ … │Δₙ₋₁│bitpad│
+//! └────┴──┴──┴──┴──┴   ┴────┴──────┘
+//! ```
+//!
+//! In the above diagram:
+//! - `7 × 0xFF` is 8 bytes tail (see [`TAIL_SIZE`]).
+//! - `base` is `parameters.base_bits` wide.
+//! - `Δ₁`..`Δₙ₋₁` are delta values, each is `parameters.delta_bits` wide.
+//! - `bitpad` is a bit padding (0..7 bits) so the chunk is byte-aligned.
+
+use std::ops::RangeInclusive;
+
+use thiserror::Error;
+use zerocopy::little_endian::U64;
+use zerocopy::FromBytes;
+use zerocopy::Immutable;
+use zerocopy::IntoBytes;
+use zerocopy::KnownLayout;
+
+use super::bitpacking::make_bitmask;
+use super::bitpacking::packed_bits;
+use super::bitpacking::BitWriter;
+
+/// The size of the tail padding.
+/// These extra 7 bytes after the last chunk allows the decompressor to safely
+/// perform unchecked unaligned 8-byte reads.
+const TAIL_SIZE: usize = size_of::<u64>() - 1;
+
+/// The allowed range for the `delta_bits` parameter.
+/// Limiting it up to 7*8 = 56 bits allows the decompressor to read a single
+/// delta value in a single unaligned read.
+/// Disallowing 0 removes unlikely edge cases.
+const DELTA_BITS_RANGE: RangeInclusive<u8> = 1..=(u64::BITS - u8::BITS) as u8;
+
+/// Larger values are unlikely to produce better compression.
+const MAX_CHUNK_LEN_LOG2: u8 = 7;
+
+/// Compress the provided data using the best parameters found.
+///
+/// # Panics
+///
+/// This function may panic if the input data is not sorted.
+pub fn compress(values: &[u64]) -> (Vec<u8>, Parameters) {
+    let parameters = Parameters::find_best(values);
+    let compressed = compress_with_parameters(values, parameters);
+    (compressed, parameters)
+}
+
+/// Compress the data with given parameters.
+fn compress_with_parameters(values: &[u64], parameters: Parameters) -> Vec<u8> {
+    let expected_size = parameters.total_chunks_size_bytes().unwrap() + TAIL_SIZE;
+    let mut compressed = Vec::with_capacity(expected_size);
+
+    for chunk in values.chunks(1 << parameters.chunk_len_log2) {
+        let first = chunk[0];
+        let mut w = BitWriter::new(&mut compressed);
+        w.write(first, parameters.base_bits);
+        for &value in chunk.iter().skip(1) {
+            w.write(value - first, parameters.delta_bits);
+        }
+        // For the last (incomplete) chunk, pad it with 0b11...11, so all chunks
+        // have the same size.
+        for _ in 0..(1 << parameters.chunk_len_log2) - chunk.len() {
+            w.write(
+                make_bitmask::<u64>(parameters.delta_bits),
+                parameters.delta_bits,
+            );
+        }
+        w.finish(); // bit padding
+    }
+
+    compressed.extend_from_slice(&[0xFF; TAIL_SIZE]);
+    assert_eq!(compressed.len(), expected_size);
+
+    compressed
+}
+
+#[derive(Clone, Debug)]
+pub struct Reader<'a> {
+    base_bits: u8,
+    base_mask: u64,
+    delta_bits: u8,
+    delta_mask: u64,
+    chunk_len_log2: u8,
+    chunk_len_mask: usize,
+    chunk_size_bytes: usize,
+    compressed: &'a [u8],
+    len: usize,
+}
+
+#[derive(Error, Debug)]
+#[error("decompression error: {0}")]
+pub struct DecompressionError(String);
+
+impl<'a> Reader<'a> {
+    pub fn new(
+        parameters: Parameters,
+        bytes: &'a [u8],
+    ) -> Result<(Self, &'a [u8]), DecompressionError> {
+        // Safety checks: the `get()` method doesn't perform bounds checking,
+        // so we need to be extra cautious here, including checking for
+        // overflows.
+        if !parameters.valid() {
+            return Err(DecompressionError("invalid parameters".to_string()));
+        }
+        let total_size_bytes = parameters
+            .total_chunks_size_bytes()
+            .and_then(|size| size.checked_add(TAIL_SIZE))
+            .ok_or_else(|| DecompressionError("invalid parameters".to_string()))?;
+
+        let (compressed, bytes) = bytes.split_at_checked(total_size_bytes).ok_or_else(|| {
+            DecompressionError(format!(
+                "insufficient length (compressed data, expected {total_size_bytes} bytes, got {})",
+                bytes.len(),
+            ))
+        })?;
+
+        let result = Self {
+            base_bits: parameters.base_bits,
+            base_mask: make_bitmask(parameters.base_bits),
+            delta_bits: parameters.delta_bits,
+            delta_mask: make_bitmask(parameters.delta_bits),
+            chunk_len_log2: parameters.chunk_len_log2,
+            chunk_len_mask: make_bitmask(parameters.chunk_len_log2),
+            chunk_size_bytes: parameters.chunk_size_bytes().unwrap(),
+            compressed,
+            len: parameters.length.get() as usize,
+        };
+
+        // Safety checks: the `get()` method doesn't perform bounds checking.
+        // The assertions below ensure that the `compressed` slice holds enough
+        // bytes for any index reachable by `get()`.
+        if let Some(max_index) = result.len.checked_sub(1) {
+            let chunk_offset = (max_index >> result.chunk_len_log2) * result.chunk_size_bytes;
+            // *base*
+            assert!(chunk_offset + size_of::<u64>() <= result.compressed.len());
+
+            let max_value_index = result.chunk_len_mask;
+            if max_value_index > 0 {
+                let delta_offset_bits =
+                    result.base_bits as usize + (max_value_index - 1) * result.delta_bits as usize;
+                // *delta*
+                assert!(
+                    chunk_offset + delta_offset_bits / u8::BITS as usize + size_of::<u64>()
+                        <= result.compressed.len()
+                );
+            }
+        }
+
+        Ok((result, bytes))
+    }
+
+    /// Parameters used to compress the data.
+    #[allow(dead_code)]
+    pub fn parameters(&self) -> Parameters {
+        Parameters {
+            length: U64::new(self.len as u64),
+            base_bits: self.base_bits,
+            delta_bits: self.delta_bits,
+            chunk_len_log2: self.chunk_len_log2,
+        }
+    }
+
+    /// The number of values in the decompressed data.
+    #[inline]
+    #[allow(dead_code)]
+    pub fn len(&self) -> usize {
+        self.len
+    }
+
+    /// Get the value at the given index.
+    #[inline]
+    pub fn get(&self, index: usize) -> Option<u64> {
+        if index >= self.len {
+            return None;
+        }
+
+        let chunk_offset = (index >> self.chunk_len_log2) * self.chunk_size_bytes;
+        let value_index = index & self.chunk_len_mask;
+        let chunk_ptr = self.compressed.as_ptr().wrapping_add(chunk_offset);
+        // SAFETY: see the *base* comment in `new()`.
+        let base = unsafe { read_u64_le(chunk_ptr) } & self.base_mask;
+        if value_index == 0 {
+            return Some(base);
+        }
+        let delta_offset_bits =
+            self.base_bits as usize + (value_index - 1) * self.delta_bits as usize;
+        // SAFETY: see the *delta* comment in `new()`.
+        let delta = (unsafe { read_u64_le(chunk_ptr.add(delta_offset_bits / u8::BITS as usize)) }
+            >> (delta_offset_bits % u8::BITS as usize))
+            & self.delta_mask;
+        Some(base + delta)
+    }
+}
+
+#[inline(always)]
+unsafe fn read_u64_le(ptr: *const u8) -> u64 {
+    unsafe { u64::from_le(ptr.cast::<u64>().read_unaligned()) }
+}
+
+/// Compression parameters. Required for decompression.
+#[derive(Clone, Copy, Debug, FromBytes, Immutable, IntoBytes, KnownLayout)]
+#[repr(C)]
+pub struct Parameters {
+    /// Amount of values in the decompressed data.
+    pub length: U64,
+    /// Amount of bits to store base values.
+    pub base_bits: u8,
+    /// Amount of bits to store delta values.
+    pub delta_bits: u8,
+    /// Log2 of the amount of values in a chunk.
+    pub chunk_len_log2: u8,
+}
+
+impl Parameters {
+    /// Check if the parameters are valid.
+    fn valid(self) -> bool {
+        u32::from(self.base_bits) <= u64::BITS
+            && DELTA_BITS_RANGE.contains(&self.delta_bits)
+            && self.chunk_len_log2 <= MAX_CHUNK_LEN_LOG2
+    }
+
+    /// Size of a single chunk in bytes.
+    /// Returns `None` on overflow: see safety comments in [`Reader::new()`].
+    #[deny(clippy::arithmetic_side_effects, reason = "extra cautious for safety")]
+    fn chunk_size_bytes(self) -> Option<usize> {
+        let bits = (self.base_bits as usize).checked_add(
+            (self.delta_bits as usize).checked_mul(make_bitmask::<usize>(self.chunk_len_log2))?,
+        )?;
+        Some(bits.div_ceil(u8::BITS as usize))
+    }
+
+    /// Size of the compressed data, without the tail.
+    /// Returns `None` on overflow: see safety comments in [`Reader::new()`].
+    #[deny(clippy::arithmetic_side_effects, reason = "extra cautious for safety")]
+    fn total_chunks_size_bytes(self) -> Option<usize> {
+        let chunks_count = (self.length.get() as usize).div_ceil(1 << self.chunk_len_log2);
+        chunks_count.checked_mul(self.chunk_size_bytes()?)
+    }
+
+    /// Find the best compression parameters for the given values.
+    fn find_best(values: &[u64]) -> Self {
+        Self::try_all(values)
+            .min_by_key(|parameters| parameters.total_chunks_size_bytes())
+            .unwrap()
+    }
+
+    /// Generate all possible compression parameters for the given values.
+    fn try_all(values: &[u64]) -> impl Iterator<Item = Parameters> + use<'_> {
+        let last_value = values.last().copied().unwrap_or(0);
+        (0..=MAX_CHUNK_LEN_LOG2)
+            .map(move |chunk_len_log2| {
+                let mut delta_bits = *DELTA_BITS_RANGE.start();
+                for chunk in values.chunks(1 << chunk_len_log2) {
+                    delta_bits = delta_bits.max(packed_bits(chunk.last().unwrap() - chunk[0]));
+                }
+                Parameters {
+                    length: U64::new(values.len() as u64),
+                    base_bits: packed_bits(last_value).max(1),
+                    delta_bits,
+                    chunk_len_log2,
+                }
+            })
+            .filter(|parameters| DELTA_BITS_RANGE.contains(&parameters.delta_bits))
+    }
+}
diff --git a/src/query/storages/common/index/src/hnsw_index/common/fixed_length_priority_queue.rs b/src/query/storages/common/index/src/hnsw_index/common/fixed_length_priority_queue.rs
new file mode 100644
index 0000000000000..27f55cf46c8da
--- /dev/null
+++ b/src/query/storages/common/index/src/hnsw_index/common/fixed_length_priority_queue.rs
@@ -0,0 +1,112 @@
+// Copyright Qdrant
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::cmp::Reverse;
+use std::collections::BinaryHeap;
+use std::num::NonZeroUsize;
+use std::vec::IntoIter as VecIntoIter;
+
+use bytemuck::TransparentWrapper as _;
+use bytemuck::TransparentWrapperAlloc as _;
+use serde::Deserialize;
+use serde::Serialize;
+
+/// To avoid excessive memory allocation, FixedLengthPriorityQueue
+/// imposes a reasonable limit on the allocation size. If the limit
+/// is extremely large, we treat it as if no limit was set and
+/// delay allocation, assuming that the results will fit within a
+/// predefined threshold.
+const LARGEST_REASONABLE_ALLOCATION_SIZE: usize = 1_048_576;
+
+/// A container that forgets all but the top N elements
+///
+/// This is a MinHeap by default - it will keep the largest elements, pop smallest
+#[derive(Deserialize, Serialize, Clone, Debug)]
+pub struct FixedLengthPriorityQueue<T: Ord> {
+    heap: BinaryHeap<Reverse<T>>,
+    length: NonZeroUsize,
+}
+
+impl<T: Ord> Default for FixedLengthPriorityQueue<T> {
+    fn default() -> Self {
+        Self::new(1)
+    }
+}
+
+impl<T: Ord> FixedLengthPriorityQueue<T> {
+    /// Creates a new queue with the given length
+    /// Panics if length is 0
+    pub fn new(length: usize) -> Self {
+        let heap = BinaryHeap::with_capacity(
+            length
+                .saturating_add(1)
+                .min(LARGEST_REASONABLE_ALLOCATION_SIZE),
+        );
+        let length = NonZeroUsize::new(length).expect("length must be greater than zero");
+        FixedLengthPriorityQueue::<T> { heap, length }
+    }
+
+    /// Pushes a value into the priority queue.
+    ///
+    /// If the queue if full, replaces the smallest value and returns it.
+    pub fn push(&mut self, value: T) -> Option<T> {
+        if self.heap.len() < self.length.into() {
+            self.heap.push(Reverse(value));
+            return None;
+        }
+
+        let mut x = self.heap.peek_mut().unwrap();
+        let mut value = Reverse(value);
+        if x.0 < value.0 {
+            std::mem::swap(&mut *x, &mut value);
+        }
+        Some(value.0)
+    }
+
+    /// Consumes the [`FixedLengthPriorityQueue`] and returns a vector
+    /// in sorted (descending) order.
+    pub fn into_sorted_vec(self) -> Vec<T> {
+        Reverse::peel_vec(self.heap.into_sorted_vec())
+    }
+
+    /// Returns an iterator over the elements in the queue, in arbitrary order.
+    pub fn iter_unsorted(&self) -> std::slice::Iter<'_, T> {
+        Reverse::peel_slice(self.heap.as_slice()).iter()
+    }
+
+    /// Returns an iterator over the elements in the queue
+    /// in sorted (descending) order.
+    pub fn into_iter_sorted(self) -> VecIntoIter<T> {
+        self.into_sorted_vec().into_iter()
+    }
+
+    /// Returns the smallest element of the queue,
+    /// if there is any.
+    pub fn top(&self) -> Option<&T> {
+        self.heap.peek().map(|x| &x.0)
+    }
+
+    /// Returns actual length of the queue
+    #[allow(dead_code)]
+    pub fn len(&self) -> usize {
+        self.heap.len()
+    }
+
+    /// Checks if the queue is empty
+    #[allow(dead_code)]
+    pub fn is_empty(&self) -> bool {
+        self.heap.is_empty()
+    }
+}
diff --git a/src/query/storages/common/index/src/hnsw_index/common/mod.rs b/src/query/storages/common/index/src/hnsw_index/common/mod.rs
new file mode 100644
index 0000000000000..6ec58d62c8199
--- /dev/null
+++ b/src/query/storages/common/index/src/hnsw_index/common/mod.rs
@@ -0,0 +1,22 @@
+// Copyright Qdrant
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+pub mod bitpacking;
+pub mod bitpacking_links;
+pub mod bitpacking_ordered;
+pub mod fixed_length_priority_queue;
+pub mod types;
+pub mod utils;
+pub mod zeros;
diff --git a/src/query/storages/common/index/src/hnsw_index/common/types.rs b/src/query/storages/common/index/src/hnsw_index/common/types.rs
new file mode 100644
index 0000000000000..65e05aa4e28fd
--- /dev/null
+++ b/src/query/storages/common/index/src/hnsw_index/common/types.rs
@@ -0,0 +1,48 @@
+// Copyright Qdrant
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::cmp::Ordering;
+
+use ordered_float::OrderedFloat;
+use zerocopy::FromBytes;
+use zerocopy::Immutable;
+use zerocopy::IntoBytes;
+use zerocopy::KnownLayout;
+
+/// Type of vector matching score
+pub type ScoreType = f32;
+/// Type of point index inside a segment
+pub type PointOffsetType = u32;
+
+#[derive(Copy, Clone, PartialEq, Debug, Default, FromBytes, IntoBytes, KnownLayout, Immutable)]
+#[repr(C)]
+pub struct ScoredPointOffset {
+    pub idx: PointOffsetType,
+    pub score: ScoreType,
+}
+
+impl Eq for ScoredPointOffset {}
+
+impl Ord for ScoredPointOffset {
+    fn cmp(&self, other: &Self) -> Ordering {
+        OrderedFloat(self.score).cmp(&OrderedFloat(other.score))
+    }
+}
+
+impl PartialOrd for ScoredPointOffset {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
diff --git a/src/query/storages/common/index/src/hnsw_index/common/utils.rs b/src/query/storages/common/index/src/hnsw_index/common/utils.rs
new file mode 100644
index 0000000000000..db1f75e1cdb48
--- /dev/null
+++ b/src/query/storages/common/index/src/hnsw_index/common/utils.rs
@@ -0,0 +1,31 @@
+// Copyright Qdrant
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::atomic::AtomicBool;
+use std::sync::atomic::Ordering;
+
+use databend_common_exception::ErrorCode;
+use databend_common_exception::Result;
+
+pub fn rev_range(a: usize, b: usize) -> impl Iterator<Item = usize> {
+    (b + 1..=a).rev()
+}
+
+pub fn check_process_stopped(stopped: &AtomicBool) -> Result<()> {
+    if stopped.load(Ordering::Relaxed) {
+        return Err(ErrorCode::Internal("check process stopped error"));
+    }
+    Ok(())
+}
diff --git a/src/query/storages/common/index/src/hnsw_index/common/zeros.rs b/src/query/storages/common/index/src/hnsw_index/common/zeros.rs
new file mode 100644
index 0000000000000..040830e06dbf9
--- /dev/null
+++ b/src/query/storages/common/index/src/hnsw_index/common/zeros.rs
@@ -0,0 +1,33 @@
+// Copyright Qdrant
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::io::Result;
+use std::io::Write;
+
+static ZEROS: [u8; 8096] = [0u8; 8096];
+
+pub trait WriteZerosExt {
+    /// Write `len` zeros to the writer.
+    fn write_zeros(&mut self, len: usize) -> Result<()>;
+}
+
+impl<W: Write> WriteZerosExt for W {
+    fn write_zeros(&mut self, mut len: usize) -> Result<()> {
+        while len > 0 {
+            len -= self.write(&ZEROS[..ZEROS.len().min(len)])?;
+        }
+        Ok(())
+    }
+}
diff --git a/src/query/storages/common/index/src/hnsw_index/entry_points.rs b/src/query/storages/common/index/src/hnsw_index/entry_points.rs
new file mode 100644
index 0000000000000..cb9970583f867
--- /dev/null
+++ b/src/query/storages/common/index/src/hnsw_index/entry_points.rs
@@ -0,0 +1,162 @@
+// Copyright Qdrant
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::cmp::Ordering;
+
+use serde::Deserialize;
+use serde::Serialize;
+
+use crate::hnsw_index::common::fixed_length_priority_queue::FixedLengthPriorityQueue;
+use crate::hnsw_index::common::types::PointOffsetType;
+
+#[derive(Deserialize, Serialize, Clone, Debug, PartialEq)]
+pub struct EntryPoint {
+    pub point_id: PointOffsetType,
+    pub level: usize,
+}
+
+impl Eq for EntryPoint {}
+
+impl PartialOrd for EntryPoint {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl Ord for EntryPoint {
+    fn cmp(&self, other: &Self) -> Ordering {
+        self.level.cmp(&other.level)
+    }
+}
+
+#[derive(Deserialize, Serialize, Clone, Debug)]
+pub struct EntryPoints {
+    entry_points: Vec<EntryPoint>,
+    extra_entry_points: FixedLengthPriorityQueue<EntryPoint>,
+}
+
+impl EntryPoints {
+    pub fn new(extra_entry_points: usize) -> Self {
+        EntryPoints {
+            entry_points: vec![],
+            extra_entry_points: FixedLengthPriorityQueue::new(extra_entry_points),
+        }
+    }
+
+    #[allow(dead_code)]
+    pub fn merge_from_other(&mut self, mut other: EntryPoints) {
+        self.entry_points.append(&mut other.entry_points);
+        // Do not merge `extra_entry_points` to prevent duplications
+    }
+
+    pub fn new_point<F>(
+        &mut self,
+        new_point: PointOffsetType,
+        level: usize,
+        checker: F,
+    ) -> Option<EntryPoint>
+    where
+        F: Fn(PointOffsetType) -> bool,
+    {
+        // there are 3 cases:
+        // - There is proper entry point for a new point higher or same level - return the point
+        // - The new point is higher than any alternative - return the next best thing
+        // - There is no point and alternatives - return None
+
+        for i in 0..self.entry_points.len() {
+            let candidate = &self.entry_points[i];
+
+            if !checker(candidate.point_id) {
+                continue; // Checkpoint does not fulfil filtering conditions. Hence, does not "exists"
+            }
+            // Found checkpoint candidate
+            return if candidate.level >= level {
+                // The good checkpoint exists.
+                // Return it, and also try to save given if required
+                self.extra_entry_points.push(EntryPoint {
+                    point_id: new_point,
+                    level,
+                });
+                Some(candidate.clone())
+            } else {
+                // The current point is better than existing
+                let entry = self.entry_points[i].clone();
+                self.entry_points[i] = EntryPoint {
+                    point_id: new_point,
+                    level,
+                };
+                self.extra_entry_points.push(entry.clone());
+                Some(entry)
+            };
+        }
+        // No entry points found. Create a new one and return self
+        let new_entry = EntryPoint {
+            point_id: new_point,
+            level,
+        };
+        self.entry_points.push(new_entry);
+        None
+    }
+
+    /// Find the highest `EntryPoint` which satisfies filtering condition of `checker`
+    pub fn get_entry_point<F>(&self, checker: F) -> Option<EntryPoint>
+    where F: Fn(PointOffsetType) -> bool {
+        self.entry_points
+            .iter()
+            .find(|entry| checker(entry.point_id))
+            .cloned()
+            .or_else(|| {
+                // Searching for at least some entry point
+                self.extra_entry_points
+                    .iter_unsorted()
+                    .filter(|entry| checker(entry.point_id))
+                    .cloned()
+                    .max_by_key(|ep| ep.level)
+            })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use rand::thread_rng;
+    use rand::Rng;
+
+    use super::*;
+
+    #[test]
+    fn test_entry_points() {
+        let mut points = EntryPoints::new(10);
+
+        let mut rng = thread_rng();
+
+        for i in 0..1000 {
+            let level = rng.gen_range(0..10000);
+            points.new_point(i, level, |_x| true);
+        }
+
+        assert_eq!(points.entry_points.len(), 1);
+        assert_eq!(points.extra_entry_points.len(), 10);
+
+        assert!(points.entry_points[0].level > 1);
+
+        for i in 1000..2000 {
+            let level = rng.gen_range(0..10000);
+            points.new_point(i, level, |x| x % 5 == i % 5);
+        }
+
+        assert_eq!(points.entry_points.len(), 5);
+        assert_eq!(points.extra_entry_points.len(), 10);
+    }
+}
diff --git a/src/query/storages/common/index/src/hnsw_index/graph_layers.rs b/src/query/storages/common/index/src/hnsw_index/graph_layers.rs
new file mode 100644
index 0000000000000..bc5e31f53a224
--- /dev/null
+++ b/src/query/storages/common/index/src/hnsw_index/graph_layers.rs
@@ -0,0 +1,291 @@
+// Copyright Qdrant
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::borrow::Cow;
+use std::cmp::max;
+use std::sync::atomic::AtomicBool;
+
+use databend_common_exception::ErrorCode;
+use databend_common_exception::Result;
+use itertools::Itertools;
+use serde::Deserialize;
+use serde::Serialize;
+
+use super::entry_points::EntryPoint;
+use super::graph_links::GraphLinks;
+use super::graph_links::GraphLinksFormat;
+use crate::hnsw_index::common::fixed_length_priority_queue::FixedLengthPriorityQueue;
+use crate::hnsw_index::common::types::PointOffsetType;
+use crate::hnsw_index::common::types::ScoredPointOffset;
+use crate::hnsw_index::common::utils::check_process_stopped;
+use crate::hnsw_index::common::utils::rev_range;
+use crate::hnsw_index::entry_points::EntryPoints;
+use crate::hnsw_index::graph_links::GraphLinksSerializer;
+use crate::hnsw_index::point_scorer::FilteredScorer;
+use crate::hnsw_index::search_context::SearchContext;
+use crate::hnsw_index::visited_pool::VisitedListHandle;
+use crate::hnsw_index::visited_pool::VisitedPool;
+
+pub type LinkContainer = Vec<PointOffsetType>;
+#[allow(dead_code)]
+pub type LayersContainer = Vec<LinkContainer>;
+
+/// Contents of the `graph.bin` file.
+#[derive(Deserialize, Serialize, Debug)]
+pub(super) struct GraphLayerData<'a> {
+    pub(super) m: usize,
+    pub(super) m0: usize,
+    pub(super) ef_construct: usize,
+    pub(super) entry_points: Cow<'a, EntryPoints>,
+}
+
+#[derive(Debug)]
+pub struct GraphLayers {
+    pub(super) m: usize,
+    pub(super) m0: usize,
+    pub(super) links: GraphLinks,
+    pub(super) entry_points: EntryPoints,
+    pub(super) visited_pool: VisitedPool,
+}
+
+pub trait GraphLayersBase {
+    fn get_visited_list_from_pool(&self) -> VisitedListHandle;
+
+    fn links_map<F>(&self, point_id: PointOffsetType, level: usize, f: F)
+    where F: FnMut(PointOffsetType);
+
+    /// Get M based on current level
+    fn get_m(&self, level: usize) -> usize;
+
+    /// Greedy search for closest points within a single graph layer
+    fn _search_on_level(
+        &self,
+        searcher: &mut SearchContext,
+        level: usize,
+        visited_list: &mut VisitedListHandle,
+        points_scorer: &mut FilteredScorer,
+        is_stopped: &AtomicBool,
+    ) -> Result<()> {
+        let limit = self.get_m(level);
+        let mut points_ids: Vec<PointOffsetType> = Vec::with_capacity(2 * limit);
+
+        while let Some(candidate) = searcher.candidates.pop() {
+            check_process_stopped(is_stopped)?;
+
+            if candidate.score < searcher.lower_bound() {
+                break;
+            }
+
+            points_ids.clear();
+            self.links_map(candidate.idx, level, |link| {
+                if !visited_list.check(link) {
+                    points_ids.push(link);
+                }
+            });
+
+            let scores = points_scorer.score_points(&mut points_ids, limit);
+            scores.iter().copied().for_each(|score_point| {
+                searcher.process_candidate(score_point);
+                visited_list.check_and_update_visited(score_point.idx);
+            });
+        }
+
+        Ok(())
+    }
+
+    fn search_on_level(
+        &self,
+        level_entry: ScoredPointOffset,
+        level: usize,
+        ef: usize,
+        points_scorer: &mut FilteredScorer,
+        is_stopped: &AtomicBool,
+    ) -> Result<FixedLengthPriorityQueue<ScoredPointOffset>> {
+        let mut visited_list = self.get_visited_list_from_pool();
+        visited_list.check_and_update_visited(level_entry.idx);
+        let mut search_context = SearchContext::new(level_entry, ef);
+
+        self._search_on_level(
+            &mut search_context,
+            level,
+            &mut visited_list,
+            points_scorer,
+            is_stopped,
+        )?;
+        Ok(search_context.nearest)
+    }
+
+    /// Greedy searches for entry point of level `target_level`.
+    /// Beam size is 1.
+    fn search_entry(
+        &self,
+        entry_point: PointOffsetType,
+        top_level: usize,
+        target_level: usize,
+        points_scorer: &mut FilteredScorer,
+        is_stopped: &AtomicBool,
+    ) -> Result<ScoredPointOffset> {
+        let mut links: Vec<PointOffsetType> = Vec::with_capacity(2 * self.get_m(0));
+
+        let mut current_point = ScoredPointOffset {
+            idx: entry_point,
+            score: points_scorer.score_point(entry_point),
+        };
+        for level in rev_range(top_level, target_level) {
+            check_process_stopped(is_stopped)?;
+
+            let limit = self.get_m(level);
+
+            let mut changed = true;
+            while changed {
+                changed = false;
+
+                links.clear();
+                self.links_map(current_point.idx, level, |link| {
+                    links.push(link);
+                });
+
+                let scores = points_scorer.score_points(&mut links, limit);
+                scores.iter().copied().for_each(|score_point| {
+                    if score_point.score > current_point.score {
+                        changed = true;
+                        current_point = score_point;
+                    }
+                });
+            }
+        }
+        Ok(current_point)
+    }
+}
+
+impl GraphLayersBase for GraphLayers {
+    fn get_visited_list_from_pool(&self) -> VisitedListHandle {
+        self.visited_pool.get(self.links.num_points())
+    }
+
+    fn links_map<F>(&self, point_id: PointOffsetType, level: usize, f: F)
+    where F: FnMut(PointOffsetType) {
+        self.links.links(point_id, level).for_each(f);
+    }
+
+    fn get_m(&self, level: usize) -> usize {
+        if level == 0 {
+            self.m0
+        } else {
+            self.m
+        }
+    }
+}
+
+/// Object contains links between nodes for HNSW search
+///
+/// Assume all scores are similarities. Larger score = closer points
+impl GraphLayers {
+    /// Returns the highest level this point is included in
+    pub fn point_level(&self, point_id: PointOffsetType) -> usize {
+        self.links.point_level(point_id)
+    }
+
+    fn get_entry_point(
+        &self,
+        points_scorer: &FilteredScorer,
+        custom_entry_points: Option<&[PointOffsetType]>,
+    ) -> Option<EntryPoint> {
+        // Try to get it from custom entry points
+        custom_entry_points
+            .and_then(|custom_entry_points| {
+                custom_entry_points
+                    .iter()
+                    .filter(|&&point_id| points_scorer.check_vector(point_id))
+                    .map(|&point_id| {
+                        let level = self.point_level(point_id);
+                        EntryPoint { point_id, level }
+                    })
+                    .max_by_key(|ep| ep.level)
+            })
+            .or_else(|| {
+                // Otherwise use normal entry points
+                self.entry_points
+                    .get_entry_point(|point_id| points_scorer.check_vector(point_id))
+            })
+    }
+
+    pub fn search(
+        &self,
+        top: usize,
+        ef: usize,
+        mut points_scorer: FilteredScorer,
+        custom_entry_points: Option<&[PointOffsetType]>,
+        is_stopped: &AtomicBool,
+    ) -> Result<Vec<ScoredPointOffset>> {
+        let Some(entry_point) = self.get_entry_point(&points_scorer, custom_entry_points) else {
+            return Ok(Vec::default());
+        };
+
+        let zero_level_entry = self.search_entry(
+            entry_point.point_id,
+            entry_point.level,
+            0,
+            &mut points_scorer,
+            is_stopped,
+        )?;
+        let nearest = self.search_on_level(
+            zero_level_entry,
+            0,
+            max(top, ef),
+            &mut points_scorer,
+            is_stopped,
+        )?;
+        Ok(nearest.into_iter_sorted().take(top).collect_vec())
+    }
+
+    #[allow(dead_code)]
+    pub fn num_points(&self) -> usize {
+        self.links.num_points()
+    }
+}
+
+impl GraphLayers {
+    pub fn open(links_slice: &[u8], data_slice: &[u8]) -> Result<Self> {
+        let (graph_data, _): (GraphLayerData, _) =
+            bincode::serde::decode_from_slice(data_slice, bincode::config::standard()).map_err(
+                |e| ErrorCode::StorageOther(format!("failed to decode graph layer data {:?}", e)),
+            )?;
+
+        let graph_links = GraphLinks::load(links_slice)?;
+        Ok(Self {
+            m: graph_data.m,
+            m0: graph_data.m0,
+            links: graph_links,
+            entry_points: graph_data.entry_points.into_owned(),
+            visited_pool: VisitedPool::new(),
+        })
+    }
+
+    #[allow(dead_code)]
+    pub fn compress_ram(&mut self) {
+        assert_eq!(self.links.format(), GraphLinksFormat::Plain);
+        let dummy = GraphLinksSerializer::new(Vec::new(), GraphLinksFormat::Plain, 0, 0)
+            .to_graph_links_ram();
+        let links = std::mem::replace(&mut self.links, dummy);
+        self.links = GraphLinksSerializer::new(
+            links.into_edges(),
+            GraphLinksFormat::Compressed,
+            self.m,
+            self.m0,
+        )
+        .to_graph_links_ram();
+    }
+}
diff --git a/src/query/storages/common/index/src/hnsw_index/graph_layers_builder.rs b/src/query/storages/common/index/src/hnsw_index/graph_layers_builder.rs
new file mode 100644
index 0000000000000..c226dcc146746
--- /dev/null
+++ b/src/query/storages/common/index/src/hnsw_index/graph_layers_builder.rs
@@ -0,0 +1,571 @@
+// Copyright Qdrant
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::borrow::Cow;
+use std::cmp::max;
+use std::cmp::min;
+use std::collections::BinaryHeap;
+use std::sync::atomic::AtomicBool;
+use std::sync::atomic::AtomicUsize;
+
+use bitvec::prelude::BitVec;
+use databend_common_exception::ErrorCode;
+use databend_common_exception::Result;
+use parking_lot::Mutex;
+use parking_lot::MutexGuard;
+use parking_lot::RwLock;
+use rand::distributions::Uniform;
+use rand::Rng;
+
+use super::graph_layers::GraphLayerData;
+use super::graph_links::GraphLinksFormat;
+use crate::hnsw_index::common::fixed_length_priority_queue::FixedLengthPriorityQueue;
+use crate::hnsw_index::common::types::PointOffsetType;
+use crate::hnsw_index::common::types::ScoreType;
+use crate::hnsw_index::common::types::ScoredPointOffset;
+use crate::hnsw_index::entry_points::EntryPoints;
+use crate::hnsw_index::graph_layers::GraphLayers;
+use crate::hnsw_index::graph_layers::GraphLayersBase;
+use crate::hnsw_index::graph_layers::LinkContainer;
+use crate::hnsw_index::graph_links::GraphLinksSerializer;
+use crate::hnsw_index::point_scorer::FilteredScorer;
+use crate::hnsw_index::search_context::SearchContext;
+use crate::hnsw_index::visited_pool::VisitedListHandle;
+use crate::hnsw_index::visited_pool::VisitedPool;
+
+pub type LockedLinkContainer = RwLock<LinkContainer>;
+pub type LockedLayersContainer = Vec<LockedLinkContainer>;
+
+/// Same as `GraphLayers`,  but allows to build in parallel
+/// Convertible to `GraphLayers`
+pub struct GraphLayersBuilder {
+    max_level: AtomicUsize,
+    m: usize,
+    m0: usize,
+    ef_construct: usize,
+    // Factor of level probability
+    level_factor: f64,
+    // Exclude points according to "not closer than base" heuristic?
+    use_heuristic: bool,
+    links_layers: Vec<LockedLayersContainer>,
+    entry_points: Mutex<EntryPoints>,
+
+    // Fields used on construction phase only
+    visited_pool: VisitedPool,
+
+    // List of bool flags, which defines if the point is already indexed or not
+    ready_list: RwLock<BitVec>,
+}
+
+impl GraphLayersBase for GraphLayersBuilder {
+    fn get_visited_list_from_pool(&self) -> VisitedListHandle {
+        self.visited_pool.get(self.num_points())
+    }
+
+    fn links_map<F>(&self, point_id: PointOffsetType, level: usize, mut f: F)
+    where F: FnMut(PointOffsetType) {
+        let links = self.links_layers[point_id as usize][level].read();
+        let ready_list = self.ready_list.read();
+        for link in links.iter() {
+            if ready_list[*link as usize] {
+                f(*link);
+            }
+        }
+    }
+
+    fn get_m(&self, level: usize) -> usize {
+        if level == 0 {
+            self.m0
+        } else {
+            self.m
+        }
+    }
+}
+
+impl GraphLayersBuilder {
+    #[allow(dead_code)]
+    pub fn get_entry_points(&self) -> MutexGuard<EntryPoints> {
+        self.entry_points.lock()
+    }
+
+    pub fn into_graph_data(self, format: GraphLinksFormat) -> Result<(Vec<u8>, Vec<u8>)> {
+        let serializer =
+            Self::links_layers_to_serializer(self.links_layers, format, self.m, self.m0);
+        let mut links_buf = Vec::new();
+        serializer.serialize_to_writer(&mut links_buf)?;
+
+        let entry_points = self.entry_points.into_inner();
+        let data = GraphLayerData {
+            m: self.m,
+            m0: self.m0,
+            ef_construct: self.ef_construct,
+            entry_points: Cow::Borrowed(&entry_points),
+        };
+
+        let data_buf =
+            bincode::serde::encode_to_vec(data, bincode::config::standard()).map_err(|e| {
+                ErrorCode::StorageOther(format!("failed to encode graph layer data {:?}", e))
+            })?;
+
+        Ok((links_buf, data_buf))
+    }
+
+    #[allow(dead_code)]
+    pub fn into_graph_layers_ram(self, format: GraphLinksFormat) -> GraphLayers {
+        GraphLayers {
+            m: self.m,
+            m0: self.m0,
+            links: Self::links_layers_to_serializer(self.links_layers, format, self.m, self.m0)
+                .to_graph_links_ram(),
+            entry_points: self.entry_points.into_inner(),
+            visited_pool: self.visited_pool,
+        }
+    }
+
+    fn links_layers_to_serializer(
+        link_layers: Vec<LockedLayersContainer>,
+        format: GraphLinksFormat,
+        m: usize,
+        m0: usize,
+    ) -> GraphLinksSerializer {
+        let edges = link_layers
+            .into_iter()
+            .map(|l| l.into_iter().map(|l| l.into_inner()).collect())
+            .collect();
+        GraphLinksSerializer::new(edges, format, m, m0)
+    }
+
+    pub fn new_with_params(
+        num_vectors: usize, // Initial number of points in index
+        m: usize,           // Expected M for non-first layer
+        m0: usize,          // Expected M for first layer
+        ef_construct: usize,
+        entry_points_num: usize, // Depends on number of points
+        use_heuristic: bool,
+        reserve: bool,
+    ) -> Self {
+        let links_layers = std::iter::repeat_with(|| {
+            vec![RwLock::new(if reserve {
+                Vec::with_capacity(m0)
+            } else {
+                vec![]
+            })]
+        })
+        .take(num_vectors)
+        .collect();
+
+        let ready_list = RwLock::new(BitVec::repeat(false, num_vectors));
+
+        Self {
+            max_level: AtomicUsize::new(0),
+            m,
+            m0,
+            ef_construct,
+            level_factor: 1.0 / (max(m, 2) as f64).ln(),
+            use_heuristic,
+            links_layers,
+            entry_points: Mutex::new(EntryPoints::new(entry_points_num)),
+            visited_pool: VisitedPool::new(),
+            ready_list,
+        }
+    }
+
+    pub fn new(
+        num_vectors: usize, // Initial number of points in index
+        m: usize,           // Expected M for non-first layer
+        m0: usize,          // Expected M for first layer
+        ef_construct: usize,
+        entry_points_num: usize, // Depends on number of points
+        use_heuristic: bool,
+    ) -> Self {
+        Self::new_with_params(
+            num_vectors,
+            m,
+            m0,
+            ef_construct,
+            entry_points_num,
+            use_heuristic,
+            true,
+        )
+    }
+
+    #[allow(dead_code)]
+    pub fn merge_from_other(&mut self, other: GraphLayersBuilder) {
+        self.max_level = AtomicUsize::new(max(
+            self.max_level.load(std::sync::atomic::Ordering::Relaxed),
+            other.max_level.load(std::sync::atomic::Ordering::Relaxed),
+        ));
+        let mut visited_list = self.visited_pool.get(self.num_points());
+        if other.links_layers.len() > self.links_layers.len() {
+            self.links_layers
+                .resize_with(other.links_layers.len(), Vec::new);
+        }
+        for (point_id, layers) in other.links_layers.into_iter().enumerate() {
+            let current_layers = &mut self.links_layers[point_id];
+            for (level, other_links) in layers.into_iter().enumerate() {
+                if current_layers.len() <= level {
+                    current_layers.push(other_links);
+                } else {
+                    let other_links = other_links.into_inner();
+                    visited_list.next_iteration();
+                    let mut current_links = current_layers[level].write();
+                    current_links.iter().copied().for_each(|x| {
+                        visited_list.check_and_update_visited(x);
+                    });
+                    for other_link in other_links
+                        .into_iter()
+                        .filter(|x| !visited_list.check_and_update_visited(*x))
+                    {
+                        current_links.push(other_link);
+                    }
+                }
+            }
+        }
+        self.entry_points
+            .lock()
+            .merge_from_other(other.entry_points.into_inner());
+    }
+
+    fn num_points(&self) -> usize {
+        self.links_layers.len()
+    }
+
+    /// Generate random level for a new point, according to geometric distribution
+    pub fn get_random_layer<R>(&self, rng: &mut R) -> usize
+    where R: Rng + ?Sized {
+        // let distribution = Uniform::new(0.0, 1.0).unwrap();
+        let distribution = Uniform::new(0.0, 1.0);
+        let sample: f64 = rng.sample(distribution);
+        let picked_level = -sample.ln() * self.level_factor;
+        picked_level.round() as usize
+    }
+
+    pub(crate) fn get_point_level(&self, point_id: PointOffsetType) -> usize {
+        self.links_layers[point_id as usize].len() - 1
+    }
+
+    pub fn set_levels(&mut self, point_id: PointOffsetType, level: usize) {
+        if self.links_layers.len() <= point_id as usize {
+            while self.links_layers.len() <= point_id as usize {
+                self.links_layers.push(vec![]);
+            }
+        }
+        let point_layers = &mut self.links_layers[point_id as usize];
+        while point_layers.len() <= level {
+            let links = Vec::with_capacity(self.m);
+            point_layers.push(RwLock::new(links));
+        }
+        self.max_level
+            .fetch_max(level, std::sync::atomic::Ordering::Relaxed);
+    }
+
+    /// Connect new point to links, so that links contains only closest points
+    fn connect_new_point<F>(
+        links: &mut LinkContainer,
+        new_point_id: PointOffsetType,
+        target_point_id: PointOffsetType,
+        level_m: usize,
+        mut score_internal: F,
+    ) where
+        F: FnMut(PointOffsetType, PointOffsetType) -> ScoreType,
+    {
+        // ToDo: binary search here ? (most likely does not worth it)
+        let new_to_target = score_internal(target_point_id, new_point_id);
+
+        let mut id_to_insert = links.len();
+        for (i, &item) in links.iter().enumerate() {
+            let target_to_link = score_internal(target_point_id, item);
+            if target_to_link < new_to_target {
+                id_to_insert = i;
+                break;
+            }
+        }
+
+        if links.len() < level_m {
+            links.insert(id_to_insert, new_point_id);
+        } else if id_to_insert != links.len() {
+            links.pop();
+            links.insert(id_to_insert, new_point_id);
+        }
+    }
+
+    /// <https://github.com/nmslib/hnswlib/issues/99>
+    fn select_candidate_with_heuristic_from_sorted<F>(
+        candidates: impl Iterator<Item = ScoredPointOffset>,
+        m: usize,
+        mut score_internal: F,
+    ) -> Vec<PointOffsetType>
+    where
+        F: FnMut(PointOffsetType, PointOffsetType) -> ScoreType,
+    {
+        let mut result_list = Vec::with_capacity(m);
+        for current_closest in candidates {
+            if result_list.len() >= m {
+                break;
+            }
+            let mut is_good = true;
+            for &selected_point in &result_list {
+                let dist_to_already_selected = score_internal(current_closest.idx, selected_point);
+                if dist_to_already_selected > current_closest.score {
+                    is_good = false;
+                    break;
+                }
+            }
+            if is_good {
+                result_list.push(current_closest.idx);
+            }
+        }
+
+        result_list
+    }
+
+    /// <https://github.com/nmslib/hnswlib/issues/99>
+    pub(crate) fn select_candidates_with_heuristic<F>(
+        candidates: FixedLengthPriorityQueue<ScoredPointOffset>,
+        m: usize,
+        score_internal: F,
+    ) -> Vec<PointOffsetType>
+    where
+        F: FnMut(PointOffsetType, PointOffsetType) -> ScoreType,
+    {
+        let closest_iter = candidates.into_iter_sorted();
+        Self::select_candidate_with_heuristic_from_sorted(closest_iter, m, score_internal)
+    }
+
+    pub fn link_new_point(&self, point_id: PointOffsetType, mut points_scorer: FilteredScorer) {
+        // Check if there is an suitable entry point
+        //   - entry point level if higher or equal
+        //   - it satisfies filters
+
+        let level = self.get_point_level(point_id);
+
+        let entry_point_opt = self
+            .entry_points
+            .lock()
+            .get_entry_point(|point_id| points_scorer.check_vector(point_id));
+        if let Some(entry_point) = entry_point_opt {
+            let mut level_entry = if entry_point.level > level {
+                // The entry point is higher than a new point
+                // Let's find closest one on same level
+
+                // greedy search for a single closest point
+                self.search_entry(
+                    entry_point.point_id,
+                    entry_point.level,
+                    level,
+                    &mut points_scorer,
+                    &AtomicBool::new(false),
+                )
+                .unwrap()
+            } else {
+                ScoredPointOffset {
+                    idx: entry_point.point_id,
+                    score: points_scorer.score_internal(point_id, entry_point.point_id),
+                }
+            };
+            // minimal common level for entry points
+            let linking_level = min(level, entry_point.level);
+
+            for curr_level in (0..=linking_level).rev() {
+                level_entry = self.link_new_point_on_level(
+                    point_id,
+                    curr_level,
+                    &mut points_scorer,
+                    level_entry,
+                );
+            }
+        } else {
+            // New point is a new empty entry (for this filter, at least)
+            // We can't do much here, so just quit
+        }
+        let was_ready = self.ready_list.write().replace(point_id as usize, true);
+        debug_assert!(!was_ready, "Point {point_id} was already marked as ready");
+        self.entry_points
+            .lock()
+            .new_point(point_id, level, |point_id| {
+                points_scorer.check_vector(point_id)
+            });
+    }
+
+    /// Add a new point using pre-existing links.
+    /// Mutually exclusive with [`Self::link_new_point`].
+    #[allow(dead_code)]
+    pub fn add_new_point(&self, point_id: PointOffsetType, levels: Vec<Vec<PointOffsetType>>) {
+        let level = self.get_point_level(point_id);
+        debug_assert_eq!(levels.len(), level + 1);
+
+        for (level, neighbours) in levels.iter().enumerate() {
+            let mut links = self.links_layers[point_id as usize][level].write();
+            links.clear();
+            links.extend_from_slice(neighbours);
+        }
+
+        let was_ready = self.ready_list.write().replace(point_id as usize, true);
+        debug_assert!(!was_ready);
+        self.entry_points
+            .lock()
+            .new_point(point_id, level, |_| true);
+    }
+
+    /// Link a new point on a specific level.
+    /// Returns an entry point for the level below.
+    fn link_new_point_on_level(
+        &self,
+        point_id: PointOffsetType,
+        curr_level: usize,
+        points_scorer: &mut FilteredScorer,
+        mut level_entry: ScoredPointOffset,
+    ) -> ScoredPointOffset {
+        let mut visited_list = self.get_visited_list_from_pool();
+
+        visited_list.check_and_update_visited(level_entry.idx);
+
+        let mut search_context = SearchContext::new(level_entry, self.ef_construct);
+
+        self._search_on_level(
+            &mut search_context,
+            curr_level,
+            &mut visited_list,
+            points_scorer,
+            &AtomicBool::new(false),
+        )
+        .unwrap();
+
+        if let Some(the_nearest) = search_context.nearest.iter_unsorted().max() {
+            level_entry = *the_nearest;
+        }
+
+        if self.use_heuristic {
+            self.link_with_heuristic(
+                point_id,
+                curr_level,
+                &visited_list,
+                points_scorer,
+                search_context,
+            );
+        } else {
+            self.link_without_heuristic(point_id, curr_level, points_scorer, search_context);
+        }
+
+        level_entry
+    }
+
+    fn link_with_heuristic(
+        &self,
+        point_id: PointOffsetType,
+        curr_level: usize,
+        visited_list: &VisitedListHandle,
+        points_scorer: &FilteredScorer,
+        mut search_context: SearchContext,
+    ) {
+        let level_m = self.get_m(curr_level);
+        let scorer = |a, b| points_scorer.score_internal(a, b);
+
+        let selected_nearest = {
+            let mut existing_links = self.links_layers[point_id as usize][curr_level].write();
+            {
+                let ready_list = self.ready_list.read();
+                for &existing_link in existing_links.iter() {
+                    if !visited_list.check(existing_link) && ready_list[existing_link as usize] {
+                        search_context.process_candidate(ScoredPointOffset {
+                            idx: existing_link,
+                            score: points_scorer.score_point(existing_link),
+                        });
+                    }
+                }
+            }
+
+            let selected_nearest =
+                Self::select_candidates_with_heuristic(search_context.nearest, level_m, scorer);
+            existing_links.clone_from(&selected_nearest);
+            selected_nearest
+        };
+
+        for &other_point in &selected_nearest {
+            let mut other_point_links = self.links_layers[other_point as usize][curr_level].write();
+            if other_point_links.len() < level_m {
+                // If linked point is lack of neighbours
+                other_point_links.push(point_id);
+            } else {
+                let mut candidates = BinaryHeap::with_capacity(level_m + 1);
+                candidates.push(ScoredPointOffset {
+                    idx: point_id,
+                    score: points_scorer.score_internal(point_id, other_point),
+                });
+                for other_point_link in other_point_links.iter().take(level_m).copied() {
+                    candidates.push(ScoredPointOffset {
+                        idx: other_point_link,
+                        score: points_scorer.score_internal(other_point_link, other_point),
+                    });
+                }
+                let selected_candidates = Self::select_candidate_with_heuristic_from_sorted(
+                    candidates.into_sorted_vec().into_iter().rev(),
+                    level_m,
+                    scorer,
+                );
+                other_point_links.clear(); // this do not free memory, which is good
+                for selected in selected_candidates.iter().copied() {
+                    other_point_links.push(selected);
+                }
+            }
+        }
+    }
+
+    fn link_without_heuristic(
+        &self,
+        point_id: PointOffsetType,
+        curr_level: usize,
+        points_scorer: &FilteredScorer,
+        search_context: SearchContext,
+    ) {
+        let level_m = self.get_m(curr_level);
+        let scorer = |a, b| points_scorer.score_internal(a, b);
+        for nearest_point in search_context.nearest.iter_unsorted() {
+            {
+                let mut links = self.links_layers[point_id as usize][curr_level].write();
+                Self::connect_new_point(&mut links, nearest_point.idx, point_id, level_m, scorer);
+            }
+
+            {
+                let mut links = self.links_layers[nearest_point.idx as usize][curr_level].write();
+                Self::connect_new_point(&mut links, point_id, nearest_point.idx, level_m, scorer);
+            }
+        }
+    }
+
+    /// This function returns average number of links per node in HNSW graph
+    /// on specified level.
+    ///
+    /// Useful for:
+    /// - estimating memory consumption
+    /// - percolation threshold estimation
+    /// - debugging
+    #[allow(dead_code)]
+    pub fn get_average_connectivity_on_level(&self, level: usize) -> f32 {
+        let mut sum = 0;
+        let mut count = 0;
+        for links in self.links_layers.iter() {
+            if links.len() > level {
+                sum += links[level].read().len();
+                count += 1;
+            }
+        }
+        if count == 0 {
+            0.0
+        } else {
+            sum as f32 / count as f32
+        }
+    }
+}
diff --git a/src/query/storages/common/index/src/hnsw_index/graph_links.rs b/src/query/storages/common/index/src/hnsw_index/graph_links.rs
new file mode 100644
index 0000000000000..7574de694a412
--- /dev/null
+++ b/src/query/storages/common/index/src/hnsw_index/graph_links.rs
@@ -0,0 +1,130 @@
+// Copyright Qdrant
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use databend_common_exception::Result;
+
+use crate::hnsw_index::common::types::PointOffsetType;
+
+mod header;
+mod serializer;
+mod view;
+
+pub use serializer::GraphLinksSerializer;
+use view::CompressionInfo;
+use view::GraphLinksView;
+pub use view::LinksIterator;
+
+// Links data for whole graph layers.
+//
+// sorted
+// points:        points:
+// points to lvl        012345         142350
+// 0 -> 0
+// 1 -> 4    lvl4:  7       lvl4: 7
+// 2 -> 2    lvl3:  Z  Y    lvl3: ZY
+// 3 -> 2    lvl2:  abcd    lvl2: adbc
+// 4 -> 3    lvl1:  ABCDE   lvl1: ADBCE
+// 5 -> 1    lvl0: 123456   lvl0: 123456  <- lvl 0 is not sorted
+//
+//
+// lvl offset:        6       11     15     17
+// │       │      │      │
+// │       │      │      │
+// ▼       ▼      ▼      ▼
+// indexes:  012345   6789A   BCDE   FG     H
+//
+// flatten:  123456   ADBCE   adbc   ZY     7
+// ▲ ▲ ▲   ▲ ▲    ▲      ▲
+// │ │ │   │ │    │      │
+// │ │ │   │ │    │      │
+// │ │ │   │ │    │      │
+// reindex:           142350  142350 142350 142350  (same for each level)
+//
+//
+// for lvl > 0:
+// links offset = level_offsets[level] + offsets[reindex[point_id]]
+
+#[derive(Debug, Clone, Copy, Eq, PartialEq)]
+pub enum GraphLinksFormat {
+    #[allow(dead_code)]
+    Plain,
+    Compressed,
+}
+
+self_cell::self_cell! {
+    pub struct GraphLinks {
+        owner: Vec<u8>,
+        #[covariant]
+        dependent: GraphLinksView,
+    }
+
+    impl {Debug}
+}
+
+impl GraphLinks {
+    pub fn load(data: &[u8]) -> Result<Self> {
+        let format = GraphLinksFormat::Compressed;
+        Self::try_new(data.to_vec(), |x| GraphLinksView::load(x, format))
+    }
+
+    fn view(&self) -> &GraphLinksView {
+        self.borrow_dependent()
+    }
+
+    pub fn format(&self) -> GraphLinksFormat {
+        match self.view().compression {
+            CompressionInfo::Uncompressed { .. } => GraphLinksFormat::Plain,
+            CompressionInfo::Compressed { .. } => GraphLinksFormat::Compressed,
+        }
+    }
+
+    pub fn num_points(&self) -> usize {
+        self.view().reindex.len()
+    }
+
+    #[allow(dead_code)]
+    pub fn for_each_link(
+        &self,
+        point_id: PointOffsetType,
+        level: usize,
+        f: impl FnMut(PointOffsetType),
+    ) {
+        self.links(point_id, level).for_each(f);
+    }
+
+    #[inline]
+    pub fn links(&self, point_id: PointOffsetType, level: usize) -> LinksIterator {
+        self.view().links(point_id, level)
+    }
+
+    pub fn point_level(&self, point_id: PointOffsetType) -> usize {
+        self.view().point_level(point_id)
+    }
+
+    /// Convert the graph links to a vector of edges, suitable for passing into
+    /// [`GraphLinksSerializer::new`] or using in tests.
+    pub fn into_edges(self) -> Vec<Vec<Vec<PointOffsetType>>> {
+        let mut edges = Vec::with_capacity(self.num_points());
+        for point_id in 0..self.num_points() {
+            let num_levels = self.point_level(point_id as PointOffsetType) + 1;
+            let mut levels = Vec::with_capacity(num_levels);
+            for level in 0..num_levels {
+                levels.push(self.links(point_id as PointOffsetType, level).collect());
+            }
+            edges.push(levels);
+        }
+        edges
+    }
+}
diff --git a/src/query/storages/common/index/src/hnsw_index/graph_links/header.rs b/src/query/storages/common/index/src/hnsw_index/graph_links/header.rs
new file mode 100644
index 0000000000000..3a538a75e9543
--- /dev/null
+++ b/src/query/storages/common/index/src/hnsw_index/graph_links/header.rs
@@ -0,0 +1,52 @@
+// Copyright Qdrant
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use zerocopy::little_endian::U64 as LittleU64;
+use zerocopy::FromBytes;
+use zerocopy::Immutable;
+use zerocopy::IntoBytes;
+use zerocopy::KnownLayout;
+
+use crate::hnsw_index::common::bitpacking_ordered;
+
+/// File header for the plain format.
+#[derive(FromBytes, Immutable, IntoBytes, KnownLayout)]
+#[repr(C)]
+pub(super) struct HeaderPlain {
+    pub(super) point_count: u64,
+    pub(super) levels_count: u64,
+    pub(super) total_links_count: u64,
+    pub(super) total_offset_count: u64,
+    /// Either 0 or 4.
+    pub(super) offsets_padding_bytes: u64,
+    pub(super) zero_padding: [u8; 24],
+}
+
+/// File header for the compressed format.
+#[derive(FromBytes, Immutable, IntoBytes, KnownLayout)]
+#[repr(C, align(8))]
+pub(super) struct HeaderCompressed {
+    pub(super) point_count: LittleU64,
+    /// Should be [`HEADER_VERSION_COMPRESSED`].
+    pub(super) version: LittleU64,
+    pub(super) levels_count: LittleU64,
+    pub(super) total_links_bytes: LittleU64,
+    pub(super) offsets_parameters: bitpacking_ordered::Parameters,
+    pub(super) m: LittleU64,
+    pub(super) m0: LittleU64,
+    pub(super) zero_padding: [u8; 5],
+}
+
+pub(super) const HEADER_VERSION_COMPRESSED: u64 = 0xFFFF_FFFF_FFFF_FF01;
diff --git a/src/query/storages/common/index/src/hnsw_index/graph_links/serializer.rs b/src/query/storages/common/index/src/hnsw_index/graph_links/serializer.rs
new file mode 100644
index 0000000000000..0942eeeab942a
--- /dev/null
+++ b/src/query/storages/common/index/src/hnsw_index/graph_links/serializer.rs
@@ -0,0 +1,233 @@
+// Copyright Qdrant
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::cmp::Reverse;
+use std::io::Write;
+use std::mem::size_of;
+use std::mem::take;
+
+use itertools::Either;
+use zerocopy::little_endian::U64 as LittleU64;
+use zerocopy::IntoBytes as AsBytes;
+
+use super::header::HeaderCompressed;
+use super::header::HeaderPlain;
+use super::header::HEADER_VERSION_COMPRESSED;
+use super::GraphLinks;
+use super::GraphLinksFormat;
+use crate::hnsw_index::common::bitpacking::packed_bits;
+use crate::hnsw_index::common::bitpacking_links::pack_links;
+use crate::hnsw_index::common::bitpacking_links::MIN_BITS_PER_VALUE;
+use crate::hnsw_index::common::bitpacking_ordered;
+use crate::hnsw_index::common::types::PointOffsetType;
+use crate::hnsw_index::common::zeros::WriteZerosExt;
+use crate::hnsw_index::graph_links::GraphLinksView;
+
+pub struct GraphLinksSerializer {
+    m: usize,
+    m0: usize,
+    links: Vec<u8>,
+    kind: Kind,
+    reindex: Vec<PointOffsetType>,
+    level_offsets: Vec<u64>,
+}
+
+enum Kind {
+    Uncompressed {
+        offsets_padding: usize,
+        offsets: Vec<u64>,
+    },
+    Compressed {
+        compressed_offsets: Vec<u8>,
+        offsets_parameters: bitpacking_ordered::Parameters,
+    },
+}
+
+impl GraphLinksSerializer {
+    pub fn new(
+        mut edges: Vec<Vec<Vec<PointOffsetType>>>,
+        format: GraphLinksFormat,
+        m: usize,
+        m0: usize,
+    ) -> Self {
+        // create map from index in `offsets` to point_id
+        let mut back_index: Vec<usize> = (0..edges.len()).collect();
+        // sort by max layer and use this map to build `Self.reindex`
+        back_index.sort_unstable_by_key(|&i| Reverse(edges[i].len()));
+
+        // `reindex` is map from point id to index in `Self.offsets`
+        let mut reindex = vec![0; back_index.len()];
+        for i in 0..back_index.len() {
+            reindex[back_index[i]] = i as PointOffsetType;
+        }
+
+        let levels_count = back_index
+            .first()
+            .map_or(0, |&point_id| edges[point_id].len());
+        let mut point_count_by_level = vec![0; levels_count];
+        for point in &edges {
+            point_count_by_level[point.len() - 1] += 1;
+        }
+
+        let mut total_offsets_len = 0;
+        let mut level_offsets = Vec::with_capacity(levels_count);
+        let mut suffix_sum = point_count_by_level.iter().sum::<u64>();
+        for &value in point_count_by_level.iter() {
+            level_offsets.push(total_offsets_len);
+            total_offsets_len += suffix_sum;
+            suffix_sum -= value;
+        }
+        total_offsets_len += 1;
+
+        let mut links = Vec::new();
+        let mut offsets = Vec::with_capacity(total_offsets_len as usize);
+        offsets.push(0);
+        let bits_per_unsorted = packed_bits(u32::try_from(edges.len().saturating_sub(1)).unwrap())
+            .max(MIN_BITS_PER_VALUE);
+
+        for level in 0..levels_count {
+            let count = point_count_by_level.iter().skip(level).sum::<u64>() as usize;
+            let (sorted_count, iter) = match level {
+                0 => (m0, Either::Left(0..count)),
+                _ => (m, Either::Right(back_index[..count].iter().copied())),
+            };
+            iter.for_each(|id| {
+                let raw_links = take(&mut edges[id][level]);
+                match format {
+                    GraphLinksFormat::Compressed => {
+                        pack_links(&mut links, raw_links, bits_per_unsorted, sorted_count);
+                        offsets.push(links.len() as u64);
+                    }
+                    GraphLinksFormat::Plain => {
+                        links.extend_from_slice(raw_links.as_bytes());
+                        offsets.push((links.len() as u64) / size_of::<PointOffsetType>() as u64);
+                    }
+                }
+            });
+        }
+
+        let kind = match format {
+            GraphLinksFormat::Compressed => {
+                let (compressed_offsets, offsets_parameters) =
+                    bitpacking_ordered::compress(&offsets);
+                Kind::Compressed {
+                    compressed_offsets,
+                    offsets_parameters,
+                }
+            }
+            GraphLinksFormat::Plain => {
+                let len = links.len() + reindex.as_bytes().len();
+                Kind::Uncompressed {
+                    offsets_padding: len.next_multiple_of(size_of::<u64>()) - len,
+                    offsets,
+                }
+            }
+        };
+
+        Self {
+            m,
+            m0,
+            links,
+            kind,
+            reindex,
+            level_offsets,
+        }
+    }
+
+    pub fn to_graph_links_ram(&self) -> GraphLinks {
+        let format = match &self.kind {
+            Kind::Uncompressed { .. } => GraphLinksFormat::Plain,
+            Kind::Compressed { .. } => GraphLinksFormat::Compressed,
+        };
+
+        let size = self.level_offsets.as_bytes().len()
+            + self.reindex.as_bytes().len()
+            + self.links.len()
+            + (match &self.kind {
+                Kind::Uncompressed {
+                    offsets_padding: padding,
+                    offsets,
+                } => size_of::<HeaderPlain>() + padding + offsets.as_bytes().len(),
+                Kind::Compressed {
+                    compressed_offsets,
+                    offsets_parameters: _,
+                } => size_of::<HeaderCompressed>() + compressed_offsets.len(),
+            });
+
+        let mut data = Vec::with_capacity(size);
+        // Unwrap should be the safe as `impl Write` for `Vec` never fails.
+        self.serialize_to_writer(&mut data).unwrap();
+        debug_assert_eq!(data.len(), size);
+        // Unwrap should be safe as we just created the data.
+        GraphLinks::try_new(data, |x| GraphLinksView::load(x, format)).unwrap()
+    }
+
+    pub(crate) fn serialize_to_writer(&self, writer: &mut impl Write) -> std::io::Result<()> {
+        match &self.kind {
+            Kind::Uncompressed {
+                offsets_padding,
+                offsets,
+            } => {
+                let header = HeaderPlain {
+                    point_count: self.reindex.len() as u64,
+                    levels_count: self.level_offsets.len() as u64,
+                    total_links_count: self.links.len() as u64
+                        / size_of::<PointOffsetType>() as u64,
+                    total_offset_count: offsets.len() as u64,
+                    offsets_padding_bytes: *offsets_padding as u64,
+                    zero_padding: [0; 24],
+                };
+                writer.write_all(header.as_bytes())?;
+            }
+            Kind::Compressed {
+                compressed_offsets: _,
+                offsets_parameters,
+            } => {
+                let header = HeaderCompressed {
+                    version: HEADER_VERSION_COMPRESSED.into(),
+                    point_count: LittleU64::new(self.reindex.len() as u64),
+                    total_links_bytes: LittleU64::new(self.links.len() as u64),
+                    offsets_parameters: *offsets_parameters,
+                    levels_count: LittleU64::new(self.level_offsets.len() as u64),
+                    m: LittleU64::new(self.m as u64),
+                    m0: LittleU64::new(self.m0 as u64),
+                    zero_padding: [0; 5],
+                };
+                writer.write_all(header.as_bytes())?;
+            }
+        }
+
+        writer.write_all(self.level_offsets.as_bytes())?;
+        writer.write_all(self.reindex.as_bytes())?;
+        writer.write_all(&self.links)?;
+        match &self.kind {
+            Kind::Uncompressed {
+                offsets_padding: padding,
+                offsets,
+            } => {
+                writer.write_zeros(*padding)?;
+                writer.write_all(offsets.as_bytes())?;
+            }
+            Kind::Compressed {
+                compressed_offsets,
+                offsets_parameters: _,
+            } => {
+                writer.write_all(compressed_offsets)?;
+            }
+        }
+
+        Ok(())
+    }
+}
diff --git a/src/query/storages/common/index/src/hnsw_index/graph_links/view.rs b/src/query/storages/common/index/src/hnsw_index/graph_links/view.rs
new file mode 100644
index 0000000000000..e466c18d54fb8
--- /dev/null
+++ b/src/query/storages/common/index/src/hnsw_index/graph_links/view.rs
@@ -0,0 +1,189 @@
+// Copyright Qdrant
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::iter::Copied;
+
+use databend_common_exception::ErrorCode;
+use databend_common_exception::Result;
+use itertools::Either;
+use itertools::Itertools as _;
+use zerocopy::native_endian::U64 as NativeU64;
+use zerocopy::FromBytes;
+use zerocopy::Immutable;
+
+use super::header::HeaderCompressed;
+use super::header::HeaderPlain;
+use super::header::HEADER_VERSION_COMPRESSED;
+use super::GraphLinksFormat;
+use crate::hnsw_index::common::bitpacking::packed_bits;
+use crate::hnsw_index::common::bitpacking_links::iterate_packed_links;
+use crate::hnsw_index::common::bitpacking_links::PackedLinksIterator;
+use crate::hnsw_index::common::bitpacking_links::MIN_BITS_PER_VALUE;
+use crate::hnsw_index::common::bitpacking_ordered;
+use crate::hnsw_index::common::types::PointOffsetType;
+
+/// An (almost) zero-copy, non-owning view into serialized graph links stored
+/// as a `&[u8]` slice.
+#[derive(Debug)]
+pub(super) struct GraphLinksView<'a> {
+    pub(super) reindex: &'a [PointOffsetType],
+    pub(super) compression: CompressionInfo<'a>,
+    /// Level offsets, copied into RAM for faster access.
+    /// Has at least two elements:
+    /// - [`super::GraphLinksSerializer`] always writes `0` as the first element.
+    /// - Additional element is added during deserialization.
+    pub(super) level_offsets: Vec<u64>,
+}
+
+/// An iterator type returned by [`GraphLinksView::links`].
+pub type LinksIterator<'a> = Either<Copied<std::slice::Iter<'a, u32>>, PackedLinksIterator<'a>>;
+
+#[derive(Debug)]
+pub(super) enum CompressionInfo<'a> {
+    Uncompressed {
+        links: &'a [u32],
+        offsets: &'a [NativeU64],
+    },
+    Compressed {
+        compressed_links: &'a [u8],
+        offsets: bitpacking_ordered::Reader<'a>,
+        m: usize,
+        m0: usize,
+        bits_per_unsorted: u8,
+    },
+}
+
+impl GraphLinksView<'_> {
+    pub(super) fn load(data: &[u8], format: GraphLinksFormat) -> Result<GraphLinksView> {
+        match format {
+            GraphLinksFormat::Compressed => Self::load_compressed(data),
+            GraphLinksFormat::Plain => Self::load_plain(data),
+        }
+    }
+
+    fn load_plain(data: &[u8]) -> Result<GraphLinksView> {
+        let (header, data) =
+            HeaderPlain::ref_from_prefix(data).map_err(|_| error_unsufficent_size())?;
+        let (level_offsets, data) =
+            read_level_offsets(data, header.levels_count, header.total_offset_count)?;
+        let (reindex, data) = get_slice::<PointOffsetType>(data, header.point_count)?;
+        let (links, data) = get_slice::<u32>(data, header.total_links_count)?;
+        let (_, data) = get_slice::<u8>(data, header.offsets_padding_bytes)?;
+        let (offsets, _bytes) = get_slice::<NativeU64>(data, header.total_offset_count)?;
+        Ok(GraphLinksView {
+            reindex,
+            compression: CompressionInfo::Uncompressed { links, offsets },
+            level_offsets,
+        })
+    }
+
+    fn load_compressed(data: &[u8]) -> Result<GraphLinksView> {
+        let (header, data) =
+            HeaderCompressed::ref_from_prefix(data).map_err(|_| error_unsufficent_size())?;
+        debug_assert_eq!(header.version.get(), HEADER_VERSION_COMPRESSED);
+        let (level_offsets, data) = read_level_offsets(
+            data,
+            header.levels_count.get(),
+            header.offsets_parameters.length.get(),
+        )?;
+        let (reindex, data) = get_slice::<PointOffsetType>(data, header.point_count.get())?;
+        let (compressed_links, data) = get_slice::<u8>(data, header.total_links_bytes.get())?;
+        let (offsets, _bytes) = bitpacking_ordered::Reader::new(header.offsets_parameters, data)
+            .map_err(|e| ErrorCode::Internal(format!("Can't create decompressor: {e}")))?;
+        Ok(GraphLinksView {
+            reindex,
+            compression: CompressionInfo::Compressed {
+                compressed_links,
+                offsets,
+                m: header.m.get() as usize,
+                m0: header.m0.get() as usize,
+                bits_per_unsorted: MIN_BITS_PER_VALUE.max(packed_bits(
+                    u32::try_from(header.point_count.get().saturating_sub(1))
+                        .map_err(|_| ErrorCode::Internal("Too many points in GraphLinks file"))?,
+                )),
+            },
+            level_offsets,
+        })
+    }
+
+    pub(super) fn links(&self, point_id: PointOffsetType, level: usize) -> LinksIterator {
+        let idx = if level == 0 {
+            point_id as usize
+        } else {
+            self.level_offsets[level] as usize + self.reindex[point_id as usize] as usize
+        };
+
+        match self.compression {
+            CompressionInfo::Uncompressed { links, offsets } => {
+                let links_range = offsets[idx].get() as usize..offsets[idx + 1].get() as usize;
+                Either::Left(links[links_range].iter().copied())
+            }
+            CompressionInfo::Compressed {
+                compressed_links,
+                ref offsets,
+                m,
+                m0,
+                bits_per_unsorted,
+            } => {
+                let links_range =
+                    offsets.get(idx).unwrap() as usize..offsets.get(idx + 1).unwrap() as usize;
+                Either::Right(iterate_packed_links(
+                    &compressed_links[links_range],
+                    bits_per_unsorted,
+                    if level == 0 { m0 } else { m },
+                ))
+            }
+        }
+    }
+
+    pub(super) fn point_level(&self, point_id: PointOffsetType) -> usize {
+        let reindexed_point_id = u64::from(self.reindex[point_id as usize]);
+        for (level, (&a, &b)) in self
+            .level_offsets
+            .iter()
+            .skip(1)
+            .tuple_windows()
+            .enumerate()
+        {
+            if reindexed_point_id >= b - a {
+                return level;
+            }
+        }
+        // See the doc comment on `level_offsets`.
+        self.level_offsets.len() - 2
+    }
+}
+
+fn read_level_offsets(
+    bytes: &[u8],
+    levels_count: u64,
+    total_offset_count: u64,
+) -> Result<(Vec<u64>, &[u8])> {
+    let (level_offsets, bytes) = get_slice::<u64>(bytes, levels_count)?;
+    let mut result = Vec::with_capacity(level_offsets.len() + 1);
+    result.extend_from_slice(level_offsets);
+    result.push(total_offset_count.checked_sub(1).ok_or_else(|| {
+        ErrorCode::Internal("Total offset count should be at least 1 in GraphLinks file")
+    })?);
+    Ok((result, bytes))
+}
+
+fn get_slice<T: FromBytes + Immutable>(data: &[u8], length: u64) -> Result<(&[T], &[u8])> {
+    <[T]>::ref_from_prefix_with_elems(data, length as usize).map_err(|_| error_unsufficent_size())
+}
+
+fn error_unsufficent_size() -> ErrorCode {
+    ErrorCode::Internal("Unsufficent file size for GraphLinks file")
+}
diff --git a/src/query/storages/common/index/src/hnsw_index/hnsw.rs b/src/query/storages/common/index/src/hnsw_index/hnsw.rs
new file mode 100644
index 0000000000000..801809d981c1b
--- /dev/null
+++ b/src/query/storages/common/index/src/hnsw_index/hnsw.rs
@@ -0,0 +1,374 @@
+// Copyright Qdrant
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::atomic::AtomicBool;
+
+use databend_common_exception::ErrorCode;
+use databend_common_exception::Result;
+use databend_common_expression::types::Buffer;
+use databend_common_expression::types::DataType;
+use databend_common_expression::types::VectorColumn;
+use databend_common_expression::types::VectorColumnBuilder;
+use databend_common_expression::types::VectorScalar;
+use databend_common_expression::types::F32;
+use databend_common_expression::BlockEntry;
+use databend_common_expression::Column;
+use databend_common_expression::ColumnId;
+use databend_common_expression::Scalar;
+use databend_common_expression::TableDataType;
+use databend_common_expression::TableField;
+use log::error;
+use rand::thread_rng;
+use rayon::iter::IntoParallelIterator;
+use rayon::prelude::*;
+use rayon::ThreadPoolBuilder;
+
+use crate::hnsw_index::common::types::PointOffsetType;
+use crate::hnsw_index::common::types::ScoredPointOffset;
+use crate::hnsw_index::common::utils::check_process_stopped;
+use crate::hnsw_index::graph_layers::GraphLayers;
+use crate::hnsw_index::graph_layers_builder::GraphLayersBuilder;
+use crate::hnsw_index::graph_links::GraphLinksFormat;
+use crate::hnsw_index::point_scorer::FilteredScorer;
+use crate::hnsw_index::point_scorer::OriginalRawScorer;
+use crate::hnsw_index::point_scorer::QuantizedRawScorer;
+use crate::hnsw_index::point_scorer::RawScorer;
+use crate::hnsw_index::quantization::encoded_vectors::EncodedVectors;
+use crate::hnsw_index::quantization::DistanceType;
+use crate::hnsw_index::quantization::EncodedVectorsU8;
+use crate::hnsw_index::quantization::VectorParameters;
+
+pub const SINGLE_THREADED_HNSW_BUILD_THRESHOLD: usize = 256;
+
+pub struct HNSWIndex {
+    graph_layers: GraphLayers,
+    distance_type: DistanceType,
+    encoded_vectors: EncodedVectorsU8<Vec<u8>>,
+}
+
+impl HNSWIndex {
+    pub fn open(
+        distance_type: DistanceType,
+        dim: usize,
+        count: usize,
+        binary_columns: Vec<Column>,
+    ) -> Result<HNSWIndex> {
+        let graph_links = unsafe { binary_columns[0].as_binary().unwrap().index_unchecked(0) };
+        let graph_data = unsafe { binary_columns[1].as_binary().unwrap().index_unchecked(0) };
+        let encoded_meta = unsafe { binary_columns[2].as_binary().unwrap().index_unchecked(0) };
+        let encoded_data = unsafe { binary_columns[3].as_binary().unwrap().index_unchecked(0) };
+
+        let graph_layers = GraphLayers::open(graph_links, graph_data)?;
+
+        let invert = match distance_type {
+            DistanceType::Dot => false,
+            DistanceType::L1 | DistanceType::L2 => true,
+        };
+
+        let params = VectorParameters {
+            dim,
+            count,
+            distance_type,
+            invert,
+        };
+
+        let encoded_vectors: EncodedVectorsU8<Vec<u8>> =
+            EncodedVectorsU8::load(encoded_data, encoded_meta, &params)?;
+
+        Ok(Self {
+            graph_layers,
+            distance_type,
+            encoded_vectors,
+        })
+    }
+
+    pub fn search(&self, limit: usize, query_values: &[f32]) -> Result<Vec<ScoredPointOffset>> {
+        let query_encode = self.encoded_vectors.encode_query(query_values);
+
+        let raw_scorer = RawScorer::Quantized(QuantizedRawScorer {
+            query: query_encode,
+            vector: &self.encoded_vectors,
+        });
+
+        // ef is used to maintain the size of the set of candidate points in the search process,
+        // the larger the search precision is higher, the smaller the speed is faster,
+        // take 4 times the limit is a more balanced parameter.
+        let ef = limit * 4;
+        let query_filter_scorer = FilteredScorer::new(&raw_scorer);
+
+        let is_stopped = AtomicBool::new(false);
+        let values = self
+            .graph_layers
+            .search(limit, ef, query_filter_scorer, None, &is_stopped)?;
+
+        let values = Self::postprocess_score(self.distance_type, values);
+        Ok(values)
+    }
+
+    pub fn generate_scores(
+        &self,
+        row_nums: u32,
+        query_values: &[f32],
+    ) -> Result<Vec<ScoredPointOffset>> {
+        let query_encode = self.encoded_vectors.encode_query(query_values);
+
+        let raw_scorer = RawScorer::Quantized(QuantizedRawScorer {
+            query: query_encode,
+            vector: &self.encoded_vectors,
+        });
+
+        let mut values = Vec::with_capacity(row_nums as usize);
+        for idx in 0..row_nums {
+            let score = raw_scorer.score_point(idx);
+            values.push(ScoredPointOffset { idx, score });
+        }
+
+        let values = Self::postprocess_score(self.distance_type, values);
+        Ok(values)
+    }
+
+    pub fn build(
+        m: usize,
+        ef_construct: usize,
+        column_id: ColumnId,
+        column: Column,
+        distance_type: DistanceType,
+    ) -> Result<(Vec<TableField>, Vec<BlockEntry>)> {
+        let m0 = m * 2;
+        let entry_points_num = 2;
+        let use_heuristic = true;
+        let num_vectors = column.len();
+
+        let column = column.remove_nullable();
+        let vector_column = column.as_vector().unwrap();
+        let vector_column = preprocess(distance_type, vector_column.clone());
+
+        let mut rng = thread_rng();
+        let mut graph_layers_builder = GraphLayersBuilder::new(
+            num_vectors,
+            m,
+            m0,
+            ef_construct,
+            entry_points_num,
+            use_heuristic,
+        );
+
+        for i in 0..column.len() {
+            let vector_id = i as PointOffsetType;
+            let level = graph_layers_builder.get_random_layer(&mut rng);
+            graph_layers_builder.set_levels(vector_id, level);
+        }
+
+        let parallelism = match std::thread::available_parallelism() {
+            Ok(degree) => degree.get(),
+            Err(e) => {
+                error!(
+                    "failed to detect the number of parallelism: {}, fallback to 8",
+                    e
+                );
+                8
+            }
+        };
+
+        let pool = ThreadPoolBuilder::new()
+            .thread_name(|index| format!("hnsw-build-{}", index))
+            .num_threads(parallelism)
+            .build()
+            .expect("failed to build hnsw build thread pool");
+
+        let first_few_num = std::cmp::min(SINGLE_THREADED_HNSW_BUILD_THRESHOLD, column.len());
+        let left_num = if column.len() > first_few_num {
+            column.len() - first_few_num
+        } else {
+            0
+        };
+
+        let mut first_few_ids = Vec::with_capacity(first_few_num);
+        let mut ids = Vec::with_capacity(left_num);
+        for i in 0..first_few_num {
+            first_few_ids.push(i);
+        }
+        for i in first_few_num..column.len() {
+            ids.push(i);
+        }
+
+        let (data, dim) = vector_column.as_float32().unwrap();
+        let data = unsafe { std::mem::transmute::<Buffer<F32>, Buffer<f32>>(data.clone()) };
+
+        let stopped = AtomicBool::new(false);
+
+        let mut index_fields = Vec::with_capacity(4);
+        let mut index_columns = Vec::with_capacity(4);
+
+        let insert_point = |vector_id| {
+            check_process_stopped(&stopped)?;
+
+            let raw_scorer = RawScorer::Original(OriginalRawScorer {
+                distance_type,
+                index: vector_id,
+                column: &column,
+            });
+            let points_scorer = FilteredScorer::new(&raw_scorer);
+            graph_layers_builder.link_new_point(vector_id as PointOffsetType, points_scorer);
+
+            Ok::<_, ErrorCode>(())
+        };
+
+        for vector_id in first_few_ids {
+            insert_point(vector_id)?;
+        }
+        if !ids.is_empty() {
+            pool.install(|| ids.into_par_iter().try_for_each(insert_point))?;
+        }
+
+        // let graph_layers = graph_layers_builder.into_graph_layers_ram(GraphLinksFormat::Compressed);
+        let (graph_links, graph_data) =
+            graph_layers_builder.into_graph_data(GraphLinksFormat::Compressed)?;
+
+        index_columns.push(BlockEntry::new_const_column(
+            DataType::Binary,
+            Scalar::Binary(graph_links),
+            1,
+        ));
+        index_columns.push(BlockEntry::new_const_column(
+            DataType::Binary,
+            Scalar::Binary(graph_data),
+            1,
+        ));
+
+        index_fields.push(TableField::new(
+            &format!("{}-{}_graph_links", column_id, distance_type),
+            TableDataType::Binary,
+        ));
+        index_fields.push(TableField::new(
+            &format!("{}-{}_graph_data", column_id, distance_type),
+            TableDataType::Binary,
+        ));
+
+        // Set invert parameter to query the closest vector (the most similar vector).
+        // For Dot distances: invert = false (because a larger dot product means more similar)
+        // For L1 distances: invert = true (because a smaller Manhattan distance means more similar)
+        // For L2 distances: invert = true (because a smaller Euclidean distance means more similar)
+        let invert = match distance_type {
+            DistanceType::Dot => false,
+            DistanceType::L1 | DistanceType::L2 => true,
+        };
+
+        let params = VectorParameters {
+            dim: *dim,
+            count: column.len(),
+            distance_type,
+            invert,
+        };
+
+        let builder = Vec::new();
+        let encoded_vectors = EncodedVectorsU8::encode(
+            (0..params.count).map(|i| &data.as_ref()[i * params.dim..(i + 1) * params.dim]),
+            builder,
+            &params,
+            None,
+            &stopped,
+        )?;
+
+        let encoded_meta = encoded_vectors.build_meta()?;
+        let encoded_data = encoded_vectors.build_data()?;
+
+        index_columns.push(BlockEntry::new_const_column(
+            DataType::Binary,
+            Scalar::Binary(encoded_meta),
+            1,
+        ));
+        index_columns.push(BlockEntry::new_const_column(
+            DataType::Binary,
+            Scalar::Binary(encoded_data),
+            1,
+        ));
+        index_fields.push(TableField::new(
+            &format!("{}-{}_encoded_u8_meta", column_id, distance_type),
+            TableDataType::Binary,
+        ));
+        index_fields.push(TableField::new(
+            &format!("{}-{}_encoded_u8_data", column_id, distance_type),
+            TableDataType::Binary,
+        ));
+
+        Ok((index_fields, index_columns))
+    }
+
+    pub fn preprocess_query(distance_type: DistanceType, query_values: Vec<f32>) -> Vec<f32> {
+        match distance_type {
+            DistanceType::Dot => cosine_preprocess(query_values),
+            DistanceType::L1 | DistanceType::L2 => query_values,
+        }
+    }
+
+    fn postprocess_score(
+        distance_type: DistanceType,
+        mut values: Vec<ScoredPointOffset>,
+    ) -> Vec<ScoredPointOffset> {
+        match distance_type {
+            DistanceType::L1 => {
+                for value in &mut values {
+                    value.score = value.score.abs();
+                }
+            }
+            DistanceType::L2 => {
+                for value in &mut values {
+                    value.score = value.score.abs().sqrt();
+                }
+            }
+            DistanceType::Dot => {
+                for value in &mut values {
+                    value.score = (1.0_f32 - value.score).abs();
+                }
+            }
+        }
+        values
+    }
+}
+
+fn preprocess(distance_type: DistanceType, column: VectorColumn) -> VectorColumn {
+    match distance_type {
+        DistanceType::Dot => {
+            let ty = column.data_type();
+            let len = column.len();
+            let mut builder = VectorColumnBuilder::with_capacity(&ty, len);
+            for scalar in column.iter() {
+                let val = scalar.as_float32().unwrap();
+                let val = unsafe { std::mem::transmute::<Vec<F32>, Vec<f32>>(val.to_vec()) };
+
+                let new_val = cosine_preprocess(val);
+                let new_val = unsafe { std::mem::transmute::<Vec<f32>, Vec<F32>>(new_val) };
+                let new_scalar = VectorScalar::Float32(new_val);
+                builder.push(&new_scalar.as_ref());
+            }
+            builder.build()
+        }
+        DistanceType::L1 | DistanceType::L2 => column,
+    }
+}
+
+fn is_length_zero_or_normalized(length: f32) -> bool {
+    length < f32::EPSILON || (length - 1.0).abs() <= 1.0e-6
+}
+
+fn cosine_preprocess(vector: Vec<f32>) -> Vec<f32> {
+    let mut length: f32 = vector.iter().map(|x| x * x).sum();
+    if is_length_zero_or_normalized(length) {
+        return vector;
+    }
+    length = length.sqrt();
+    vector.iter().map(|x| x / length).collect()
+}
diff --git a/src/query/storages/common/index/src/hnsw_index/mod.rs b/src/query/storages/common/index/src/hnsw_index/mod.rs
new file mode 100644
index 0000000000000..243e824d566a0
--- /dev/null
+++ b/src/query/storages/common/index/src/hnsw_index/mod.rs
@@ -0,0 +1,91 @@
+// Copyright Qdrant
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+mod common;
+mod entry_points;
+mod graph_layers;
+mod graph_layers_builder;
+mod graph_links;
+mod hnsw;
+mod point_scorer;
+mod quantization;
+mod search_context;
+mod visited_pool;
+
+pub use common::fixed_length_priority_queue::FixedLengthPriorityQueue;
+pub use common::types::ScoredPointOffset;
+use databend_common_exception::ErrorCode;
+use databend_storages_common_table_meta::meta::SingleColumnMeta;
+pub use hnsw::HNSWIndex;
+use parquet::format::FileMetaData;
+pub use quantization::DistanceType;
+
+#[derive(Clone)]
+pub struct VectorIndexMeta {
+    pub columns: Vec<(String, SingleColumnMeta)>,
+}
+
+#[derive(Clone, Debug)]
+pub struct VectorIndexFile {
+    pub name: String,
+    pub data: Vec<u8>,
+}
+
+impl VectorIndexFile {
+    pub fn create(name: String, data: Vec<u8>) -> Self {
+        Self { name, data }
+    }
+}
+
+impl TryFrom<FileMetaData> for VectorIndexMeta {
+    type Error = ErrorCode;
+
+    fn try_from(mut meta: FileMetaData) -> std::result::Result<Self, Self::Error> {
+        let rg = meta.row_groups.remove(0);
+        let mut col_metas = Vec::with_capacity(rg.columns.len());
+        for x in &rg.columns {
+            match &x.meta_data {
+                Some(chunk_meta) => {
+                    let col_start =
+                        if let Some(dict_page_offset) = chunk_meta.dictionary_page_offset {
+                            dict_page_offset
+                        } else {
+                            chunk_meta.data_page_offset
+                        };
+                    let col_len = chunk_meta.total_compressed_size;
+                    assert!(
+                        col_start >= 0 && col_len >= 0,
+                        "column start and length should not be negative"
+                    );
+                    let num_values = chunk_meta.num_values as u64;
+                    let res = SingleColumnMeta {
+                        offset: col_start as u64,
+                        len: col_len as u64,
+                        num_values,
+                    };
+                    let column_name = chunk_meta.path_in_schema[0].to_owned();
+                    col_metas.push((column_name, res));
+                }
+                None => {
+                    panic!(
+                        "expecting chunk meta data while converting ThriftFileMetaData to BloomIndexMeta"
+                    )
+                }
+            }
+        }
+        col_metas.shrink_to_fit();
+        Ok(Self { columns: col_metas })
+    }
+}
diff --git a/src/query/storages/common/index/src/hnsw_index/point_scorer.rs b/src/query/storages/common/index/src/hnsw_index/point_scorer.rs
new file mode 100644
index 0000000000000..36ecc2e96a360
--- /dev/null
+++ b/src/query/storages/common/index/src/hnsw_index/point_scorer.rs
@@ -0,0 +1,174 @@
+// Copyright Qdrant
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use databend_common_expression::types::VectorScalarRef;
+use databend_common_expression::types::F32;
+use databend_common_expression::Column;
+use databend_common_expression::ScalarRef;
+
+use crate::hnsw_index::common::types::PointOffsetType;
+use crate::hnsw_index::common::types::ScoreType;
+use crate::hnsw_index::common::types::ScoredPointOffset;
+use crate::hnsw_index::quantization::encoded_vectors::EncodedVectors;
+use crate::hnsw_index::quantization::EncodedQueryU8;
+use crate::hnsw_index::quantization::EncodedVectorsU8;
+use crate::DistanceType;
+
+pub enum RawScorer<'a> {
+    Original(OriginalRawScorer<'a>),
+    Quantized(QuantizedRawScorer<'a>),
+}
+
+pub struct OriginalRawScorer<'a> {
+    pub distance_type: DistanceType,
+    pub index: usize,
+    pub column: &'a Column,
+}
+
+pub struct QuantizedRawScorer<'a> {
+    pub query: EncodedQueryU8,
+    pub vector: &'a EncodedVectorsU8<Vec<u8>>,
+}
+
+impl RawScorer<'_> {
+    pub fn score_point(&self, point_id: PointOffsetType) -> ScoreType {
+        match self {
+            RawScorer::Original(original) => {
+                let self_val = unsafe { original.column.index_unchecked(original.index) };
+                let point_val = unsafe { original.column.index_unchecked(point_id as usize) };
+                calculate_score(original.distance_type, self_val, point_val)
+            }
+            RawScorer::Quantized(quantized) => {
+                quantized.vector.score_point(&quantized.query, point_id)
+            }
+        }
+    }
+
+    pub fn score_internal(&self, point_a: PointOffsetType, point_b: PointOffsetType) -> ScoreType {
+        match self {
+            RawScorer::Original(original) => {
+                let point_a_val = unsafe { original.column.index_unchecked(point_a as usize) };
+                let point_b_val = unsafe { original.column.index_unchecked(point_b as usize) };
+                calculate_score(original.distance_type, point_a_val, point_b_val)
+            }
+            RawScorer::Quantized(quantized) => quantized.vector.score_internal(point_a, point_b),
+        }
+    }
+}
+
+pub struct FilteredScorer<'a> {
+    raw_scorer: &'a RawScorer<'a>,
+    points_buffer: Vec<ScoredPointOffset>,
+}
+
+impl<'a> FilteredScorer<'a> {
+    pub fn new(raw_scorer: &'a RawScorer) -> Self {
+        FilteredScorer {
+            raw_scorer,
+            points_buffer: Vec::new(),
+        }
+    }
+
+    pub fn check_vector(&self, _point_id: PointOffsetType) -> bool {
+        true
+    }
+
+    /// Method filters and calculates scores for the given slice of points IDs
+    ///
+    /// For performance reasons this function mutates input values.
+    /// For result slice allocation this function mutates self.
+    ///
+    /// # Arguments
+    ///
+    /// * `point_ids` - list of points to score. *Warn*: This input will be wrecked during the execution.
+    /// * `limit` - limits the number of points to process after filtering.
+    pub fn score_points(
+        &mut self,
+        point_ids: &mut [PointOffsetType],
+        limit: usize,
+    ) -> &[ScoredPointOffset] {
+        if limit == 0 {
+            self.points_buffer
+                .resize_with(point_ids.len(), ScoredPointOffset::default);
+        } else {
+            self.points_buffer
+                .resize_with(limit, ScoredPointOffset::default);
+        }
+        let mut size: usize = 0;
+        for point_id in point_ids.iter().copied() {
+            let score = self.score_point(point_id);
+            self.points_buffer[size] = ScoredPointOffset {
+                idx: point_id,
+                score,
+            };
+
+            size += 1;
+            if size == self.points_buffer.len() {
+                break;
+            }
+        }
+        &self.points_buffer[0..size]
+    }
+
+    pub fn score_point(&self, point_id: PointOffsetType) -> ScoreType {
+        self.raw_scorer.score_point(point_id)
+    }
+
+    pub fn score_internal(&self, point_a: PointOffsetType, point_b: PointOffsetType) -> ScoreType {
+        self.raw_scorer.score_internal(point_a, point_b)
+    }
+}
+
+fn calculate_score(distance_type: DistanceType, lhs: ScalarRef, rhs: ScalarRef) -> f32 {
+    match (lhs, rhs) {
+        (
+            ScalarRef::Vector(VectorScalarRef::Int8(lhs)),
+            ScalarRef::Vector(VectorScalarRef::Int8(rhs)),
+        ) => {
+            let l: Vec<_> = lhs.iter().map(|v| *v as f32).collect();
+            let r: Vec<_> = rhs.iter().map(|v| *v as f32).collect();
+            match distance_type {
+                DistanceType::Dot => dot_similarity(&l, &r),
+                DistanceType::L1 => manhattan_similarity(&l, &r),
+                DistanceType::L2 => euclid_similarity(&l, &r),
+            }
+        }
+        (
+            ScalarRef::Vector(VectorScalarRef::Float32(lhs)),
+            ScalarRef::Vector(VectorScalarRef::Float32(rhs)),
+        ) => {
+            let l = unsafe { std::mem::transmute::<&[F32], &[f32]>(lhs) };
+            let r = unsafe { std::mem::transmute::<&[F32], &[f32]>(rhs) };
+            match distance_type {
+                DistanceType::Dot => dot_similarity(l, r),
+                DistanceType::L1 => manhattan_similarity(l, r),
+                DistanceType::L2 => euclid_similarity(l, r),
+            }
+        }
+        (_, _) => 0.0,
+    }
+}
+
+pub fn dot_similarity(v1: &[f32], v2: &[f32]) -> f32 {
+    v1.iter().zip(v2).map(|(a, b)| a * b).sum()
+}
+
+fn euclid_similarity(v1: &[f32], v2: &[f32]) -> f32 {
+    -v1.iter().zip(v2).map(|(a, b)| (a - b).powi(2)).sum::<f32>()
+}
+
+fn manhattan_similarity(v1: &[f32], v2: &[f32]) -> f32 {
+    -v1.iter().zip(v2).map(|(a, b)| (a - b).abs()).sum::<f32>()
+}
diff --git a/src/query/storages/common/index/src/hnsw_index/quantization/encoded_storage.rs b/src/query/storages/common/index/src/hnsw_index/quantization/encoded_storage.rs
new file mode 100644
index 0000000000000..31d7caeef61f1
--- /dev/null
+++ b/src/query/storages/common/index/src/hnsw_index/quantization/encoded_storage.rs
@@ -0,0 +1,68 @@
+// Copyright Qdrant
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use databend_common_exception::ErrorCode;
+use databend_common_exception::Result;
+
+pub trait EncodedStorage {
+    fn get_vector_data(&self, index: usize, vector_size: usize) -> &[u8];
+
+    fn from_slice(slice: &[u8], quantized_vector_size: usize, vectors_count: usize) -> Result<Self>
+    where Self: Sized;
+
+    fn to_vec(&self) -> Result<Vec<u8>>;
+}
+
+pub trait EncodedStorageBuilder<TStorage: EncodedStorage> {
+    fn build(self) -> TStorage;
+
+    fn push_vector_data(&mut self, other: &[u8]);
+}
+
+impl EncodedStorage for Vec<u8> {
+    fn get_vector_data(&self, index: usize, vector_size: usize) -> &[u8] {
+        &self[vector_size * index..vector_size * (index + 1)]
+    }
+
+    fn from_slice(
+        slice: &[u8],
+        quantized_vector_size: usize,
+        vectors_count: usize,
+    ) -> Result<Self> {
+        let expected_size = quantized_vector_size * vectors_count;
+        if slice.len() == expected_size {
+            Ok(slice.to_vec())
+        } else {
+            Err(ErrorCode::Internal(format!(
+                "Loaded storage size {} is not equal to expected size {expected_size}",
+                slice.len()
+            )))
+        }
+    }
+
+    fn to_vec(&self) -> Result<Vec<u8>> {
+        Ok(self.clone())
+    }
+}
+
+impl EncodedStorageBuilder<Vec<u8>> for Vec<u8> {
+    fn build(self) -> Vec<u8> {
+        self
+    }
+
+    fn push_vector_data(&mut self, other: &[u8]) {
+        self.extend_from_slice(other);
+    }
+}
diff --git a/src/query/storages/common/index/src/hnsw_index/quantization/encoded_vectors.rs b/src/query/storages/common/index/src/hnsw_index/quantization/encoded_vectors.rs
new file mode 100644
index 0000000000000..4620866d928fb
--- /dev/null
+++ b/src/query/storages/common/index/src/hnsw_index/quantization/encoded_vectors.rs
@@ -0,0 +1,96 @@
+// Copyright Qdrant
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::fmt::Display;
+use std::fmt::Formatter;
+
+use databend_common_exception::ErrorCode;
+use databend_common_exception::Result;
+use serde::Deserialize;
+use serde::Serialize;
+
+#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq)]
+pub enum DistanceType {
+    Dot,
+    L1,
+    L2,
+}
+
+impl Display for DistanceType {
+    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
+        match self {
+            DistanceType::Dot => write!(f, "dot"),
+            DistanceType::L1 => write!(f, "l1"),
+            DistanceType::L2 => write!(f, "l2"),
+        }
+    }
+}
+
+#[derive(Serialize, Deserialize, Clone)]
+pub struct VectorParameters {
+    pub dim: usize,
+    pub count: usize,
+    pub distance_type: DistanceType,
+    pub invert: bool,
+}
+
+pub trait EncodedVectors<TEncodedQuery: Sized>: Sized {
+    fn build_data(&self) -> Result<Vec<u8>>;
+
+    fn build_meta(&self) -> Result<Vec<u8>>;
+
+    fn load(data: &[u8], meta: &[u8], vector_parameters: &VectorParameters) -> Result<Self>;
+
+    fn encode_query(&self, query: &[f32]) -> TEncodedQuery;
+
+    fn score_point(&self, query: &TEncodedQuery, i: u32) -> f32;
+
+    fn score_internal(&self, i: u32, j: u32) -> f32;
+}
+
+impl DistanceType {
+    pub fn distance(&self, a: &[f32], b: &[f32]) -> f32 {
+        match self {
+            DistanceType::Dot => a.iter().zip(b).map(|(a, b)| a * b).sum(),
+            DistanceType::L1 => a.iter().zip(b).map(|(a, b)| (a - b).abs()).sum(),
+            DistanceType::L2 => a.iter().zip(b).map(|(a, b)| (a - b) * (a - b)).sum(),
+        }
+    }
+}
+
+pub(crate) fn validate_vector_parameters<'a>(
+    data: impl Iterator<Item = impl AsRef<[f32]> + 'a> + Clone,
+    vector_parameters: &VectorParameters,
+) -> Result<()> {
+    let mut count = 0;
+    for vector in data {
+        let vector = vector.as_ref();
+        if vector.len() != vector_parameters.dim {
+            return Err(ErrorCode::BadArguments(format!(
+                "Vector length {} does not match vector parameters dim {}",
+                vector.len(),
+                vector_parameters.dim
+            )));
+        }
+        count += 1;
+    }
+    if count != vector_parameters.count {
+        return Err(ErrorCode::BadArguments(format!(
+            "Vector count {} does not match vector parameters count {}",
+            count, vector_parameters.count
+        )));
+    }
+    Ok(())
+}
diff --git a/src/query/storages/common/index/src/hnsw_index/quantization/encoded_vectors_u8.rs b/src/query/storages/common/index/src/hnsw_index/quantization/encoded_vectors_u8.rs
new file mode 100644
index 0000000000000..652d689571462
--- /dev/null
+++ b/src/query/storages/common/index/src/hnsw_index/quantization/encoded_vectors_u8.rs
@@ -0,0 +1,364 @@
+// Copyright Qdrant
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::atomic::AtomicBool;
+use std::sync::atomic::Ordering;
+
+use databend_common_exception::ErrorCode;
+use databend_common_exception::Result;
+use serde::Deserialize;
+use serde::Serialize;
+
+use crate::hnsw_index::quantization::encoded_storage::EncodedStorage;
+use crate::hnsw_index::quantization::encoded_storage::EncodedStorageBuilder;
+use crate::hnsw_index::quantization::encoded_vectors::validate_vector_parameters;
+use crate::hnsw_index::quantization::encoded_vectors::DistanceType;
+use crate::hnsw_index::quantization::encoded_vectors::EncodedVectors;
+use crate::hnsw_index::quantization::encoded_vectors::VectorParameters;
+use crate::hnsw_index::quantization::quantile::find_min_max_from_iter;
+use crate::hnsw_index::quantization::quantile::find_quantile_interval;
+
+pub const ALIGNMENT: usize = 16;
+
+pub struct EncodedVectorsU8<TStorage: EncodedStorage> {
+    encoded_vectors: TStorage,
+    metadata: Metadata,
+}
+
+pub struct EncodedQueryU8 {
+    offset: f32,
+    encoded_query: Vec<u8>,
+}
+
+#[derive(Serialize, Deserialize)]
+struct Metadata {
+    actual_dim: usize,
+    alpha: f32,
+    offset: f32,
+    multiplier: f32,
+    vector_parameters: VectorParameters,
+}
+
+impl<TStorage: EncodedStorage> EncodedVectorsU8<TStorage> {
+    pub fn encode<'a>(
+        orig_data: impl Iterator<Item = impl AsRef<[f32]> + 'a> + Clone,
+        mut storage_builder: impl EncodedStorageBuilder<TStorage>,
+        vector_parameters: &VectorParameters,
+        quantile: Option<f32>,
+        stopped: &AtomicBool,
+    ) -> Result<Self> {
+        let actual_dim = Self::get_actual_dim(vector_parameters);
+
+        if vector_parameters.count == 0 {
+            return Ok(EncodedVectorsU8 {
+                encoded_vectors: storage_builder.build(),
+                metadata: Metadata {
+                    actual_dim,
+                    alpha: 0.0,
+                    offset: 0.0,
+                    multiplier: 0.0,
+                    vector_parameters: vector_parameters.clone(),
+                },
+            });
+        }
+
+        debug_assert!(validate_vector_parameters(orig_data.clone(), vector_parameters).is_ok());
+        let (alpha, offset) = Self::find_alpha_offset_size_dim(orig_data.clone());
+        let (alpha, offset) = if let Some(quantile) = quantile {
+            if let Some((min, max)) = find_quantile_interval(
+                orig_data.clone(),
+                vector_parameters.dim,
+                vector_parameters.count,
+                quantile,
+            ) {
+                Self::alpha_offset_from_min_max(min, max)
+            } else {
+                (alpha, offset)
+            }
+        } else {
+            (alpha, offset)
+        };
+
+        for vector in orig_data {
+            if stopped.load(Ordering::Relaxed) {
+                return Err(ErrorCode::Internal("check process stopped error"));
+            }
+
+            let mut encoded_vector = Vec::with_capacity(actual_dim + std::mem::size_of::<f32>());
+            encoded_vector.extend_from_slice(&f32::default().to_ne_bytes());
+            for &value in vector.as_ref() {
+                let encoded = Self::f32_to_u8(value, alpha, offset);
+                encoded_vector.push(encoded);
+            }
+            if vector_parameters.dim % ALIGNMENT != 0 {
+                for _ in 0..(ALIGNMENT - vector_parameters.dim % ALIGNMENT) {
+                    let placeholder = match vector_parameters.distance_type {
+                        DistanceType::Dot => 0.0,
+                        DistanceType::L1 | DistanceType::L2 => offset,
+                    };
+                    let encoded = Self::f32_to_u8(placeholder, alpha, offset);
+                    encoded_vector.push(encoded);
+                }
+            }
+            let vector_offset = match vector_parameters.distance_type {
+                DistanceType::Dot => {
+                    actual_dim as f32 * offset * offset
+                        + encoded_vector.iter().map(|&x| f32::from(x)).sum::<f32>() * alpha * offset
+                }
+                DistanceType::L1 => 0.0,
+                DistanceType::L2 => {
+                    actual_dim as f32 * offset * offset
+                        + encoded_vector
+                            .iter()
+                            .map(|&x| f32::from(x) * f32::from(x))
+                            .sum::<f32>()
+                            * alpha
+                            * alpha
+                }
+            };
+            let vector_offset = if vector_parameters.invert {
+                -vector_offset
+            } else {
+                vector_offset
+            };
+            encoded_vector[0..std::mem::size_of::<f32>()]
+                .copy_from_slice(&vector_offset.to_ne_bytes());
+            storage_builder.push_vector_data(&encoded_vector);
+        }
+        let multiplier = match vector_parameters.distance_type {
+            DistanceType::Dot => alpha * alpha,
+            DistanceType::L1 => alpha,
+            DistanceType::L2 => -2.0 * alpha * alpha,
+        };
+        let multiplier = if vector_parameters.invert {
+            -multiplier
+        } else {
+            multiplier
+        };
+
+        Ok(EncodedVectorsU8 {
+            encoded_vectors: storage_builder.build(),
+            metadata: Metadata {
+                actual_dim,
+                alpha,
+                offset,
+                multiplier,
+                vector_parameters: vector_parameters.clone(),
+            },
+        })
+    }
+
+    pub fn score_point_simple(&self, query: &EncodedQueryU8, i: u32) -> f32 {
+        let (vector_offset, v_ptr) = self.get_vec_ptr(i);
+
+        let score = match self.metadata.vector_parameters.distance_type {
+            DistanceType::Dot | DistanceType::L2 => impl_score_dot(
+                query.encoded_query.as_ptr(),
+                v_ptr,
+                self.metadata.actual_dim,
+            ),
+            DistanceType::L1 => impl_score_l1(
+                query.encoded_query.as_ptr(),
+                v_ptr,
+                self.metadata.actual_dim,
+            ),
+        };
+
+        self.metadata.multiplier * score as f32 + query.offset + vector_offset
+    }
+
+    fn find_alpha_offset_size_dim<'a>(
+        orig_data: impl Iterator<Item = impl AsRef<[f32]> + 'a> + Clone,
+    ) -> (f32, f32) {
+        let (min, max) = find_min_max_from_iter(orig_data);
+        Self::alpha_offset_from_min_max(min, max)
+    }
+
+    fn alpha_offset_from_min_max(min: f32, max: f32) -> (f32, f32) {
+        let alpha = (max - min) / 127.0;
+        let offset = min;
+        (alpha, offset)
+    }
+
+    fn f32_to_u8(i: f32, alpha: f32, offset: f32) -> u8 {
+        let i = (i - offset) / alpha;
+        i.clamp(0.0, 127.0) as u8
+    }
+
+    #[inline]
+    fn get_vec_ptr(&self, i: u32) -> (f32, *const u8) {
+        unsafe {
+            let vector_data_size = self.metadata.actual_dim + std::mem::size_of::<f32>();
+            let v_ptr = self
+                .encoded_vectors
+                .get_vector_data(i as usize, vector_data_size)
+                .as_ptr();
+            let vector_offset = *v_ptr.cast::<f32>();
+            (vector_offset, v_ptr.add(std::mem::size_of::<f32>()))
+        }
+    }
+
+    #[allow(dead_code)]
+    pub fn get_quantized_vector(&self, i: u32) -> (f32, &[u8]) {
+        let (offset, v_ptr) = self.get_vec_ptr(i);
+        let vector_data_size = self.metadata.actual_dim;
+        (offset, unsafe {
+            std::slice::from_raw_parts(v_ptr, vector_data_size)
+        })
+    }
+
+    pub fn get_quantized_vector_size(vector_parameters: &VectorParameters) -> usize {
+        let actual_dim = Self::get_actual_dim(vector_parameters);
+        actual_dim + std::mem::size_of::<f32>()
+    }
+
+    #[allow(dead_code)]
+    pub fn get_multiplier(&self) -> f32 {
+        self.metadata.multiplier
+    }
+
+    #[allow(dead_code)]
+    pub fn get_diff(&self) -> f32 {
+        let diff = self.metadata.actual_dim as f32 * self.metadata.offset * self.metadata.offset;
+        if self.metadata.vector_parameters.invert {
+            -diff
+        } else {
+            diff
+        }
+    }
+
+    pub fn get_actual_dim(vector_parameters: &VectorParameters) -> usize {
+        vector_parameters.dim + (ALIGNMENT - vector_parameters.dim % ALIGNMENT) % ALIGNMENT
+    }
+
+    #[allow(dead_code)]
+    pub fn vectors_count(&self) -> usize {
+        self.metadata.vector_parameters.count
+    }
+}
+
+impl<TStorage: EncodedStorage> EncodedVectors<EncodedQueryU8> for EncodedVectorsU8<TStorage> {
+    fn build_data(&self) -> Result<Vec<u8>> {
+        self.encoded_vectors.to_vec()
+    }
+
+    fn build_meta(&self) -> Result<Vec<u8>> {
+        let mut buf = Vec::new();
+        serde_json::to_writer(&mut buf, &self.metadata)?;
+        Ok(buf)
+    }
+
+    fn load(data: &[u8], meta: &[u8], vector_parameters: &VectorParameters) -> Result<Self> {
+        let contents = std::str::from_utf8(meta)?;
+        let metadata: Metadata = serde_json::from_str(contents)?;
+        let quantized_vector_size = Self::get_quantized_vector_size(vector_parameters);
+        let encoded_vectors =
+            TStorage::from_slice(data, quantized_vector_size, vector_parameters.count)?;
+        let result = Self {
+            encoded_vectors,
+            metadata,
+        };
+        Ok(result)
+    }
+
+    fn encode_query(&self, query: &[f32]) -> EncodedQueryU8 {
+        let dim = query.len();
+        let mut query: Vec<_> = query
+            .iter()
+            .map(|&v| Self::f32_to_u8(v, self.metadata.alpha, self.metadata.offset))
+            .collect();
+        if dim % ALIGNMENT != 0 {
+            for _ in 0..(ALIGNMENT - dim % ALIGNMENT) {
+                let placeholder = match self.metadata.vector_parameters.distance_type {
+                    DistanceType::Dot => 0.0,
+                    DistanceType::L1 | DistanceType::L2 => self.metadata.offset,
+                };
+                let encoded =
+                    Self::f32_to_u8(placeholder, self.metadata.alpha, self.metadata.offset);
+                query.push(encoded);
+            }
+        }
+        let offset = match self.metadata.vector_parameters.distance_type {
+            DistanceType::Dot => {
+                query.iter().map(|&x| f32::from(x)).sum::<f32>()
+                    * self.metadata.alpha
+                    * self.metadata.offset
+            }
+            DistanceType::L1 => 0.0,
+            DistanceType::L2 => {
+                query
+                    .iter()
+                    .map(|&x| f32::from(x) * f32::from(x))
+                    .sum::<f32>()
+                    * self.metadata.alpha
+                    * self.metadata.alpha
+            }
+        };
+        let offset = if self.metadata.vector_parameters.invert {
+            -offset
+        } else {
+            offset
+        };
+        EncodedQueryU8 {
+            offset,
+            encoded_query: query,
+        }
+    }
+
+    fn score_point(&self, query: &EncodedQueryU8, i: u32) -> f32 {
+        self.score_point_simple(query, i)
+    }
+
+    fn score_internal(&self, i: u32, j: u32) -> f32 {
+        let (query_offset, q_ptr) = self.get_vec_ptr(i);
+        let (vector_offset, v_ptr) = self.get_vec_ptr(j);
+        let diff = self.metadata.actual_dim as f32 * self.metadata.offset * self.metadata.offset;
+        let diff = if self.metadata.vector_parameters.invert {
+            -diff
+        } else {
+            diff
+        };
+        let offset = query_offset + vector_offset - diff;
+
+        let score = match self.metadata.vector_parameters.distance_type {
+            DistanceType::Dot | DistanceType::L2 => {
+                impl_score_dot(q_ptr, v_ptr, self.metadata.actual_dim)
+            }
+            DistanceType::L1 => impl_score_l1(q_ptr, v_ptr, self.metadata.actual_dim),
+        };
+
+        self.metadata.multiplier * score as f32 + offset
+    }
+}
+
+fn impl_score_dot(q_ptr: *const u8, v_ptr: *const u8, actual_dim: usize) -> i32 {
+    unsafe {
+        let mut score = 0i32;
+        for i in 0..actual_dim {
+            score += i32::from(*q_ptr.add(i)) * i32::from(*v_ptr.add(i));
+        }
+        score
+    }
+}
+
+fn impl_score_l1(q_ptr: *const u8, v_ptr: *const u8, actual_dim: usize) -> i32 {
+    unsafe {
+        let mut score = 0i32;
+        for i in 0..actual_dim {
+            score += i32::from(*q_ptr.add(i)).abs_diff(i32::from(*v_ptr.add(i))) as i32;
+        }
+        score
+    }
+}
diff --git a/src/query/storages/common/index/src/hnsw_index/quantization/mod.rs b/src/query/storages/common/index/src/hnsw_index/quantization/mod.rs
new file mode 100644
index 0000000000000..eca88c10d520e
--- /dev/null
+++ b/src/query/storages/common/index/src/hnsw_index/quantization/mod.rs
@@ -0,0 +1,24 @@
+// Copyright Qdrant
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+pub mod encoded_storage;
+pub mod encoded_vectors;
+pub mod encoded_vectors_u8;
+pub mod quantile;
+
+pub use encoded_vectors::DistanceType;
+pub use encoded_vectors::VectorParameters;
+pub use encoded_vectors_u8::EncodedQueryU8;
+pub use encoded_vectors_u8::EncodedVectorsU8;
diff --git a/src/query/storages/common/index/src/hnsw_index/quantization/quantile.rs b/src/query/storages/common/index/src/hnsw_index/quantization/quantile.rs
new file mode 100644
index 0000000000000..fca620aec2b11
--- /dev/null
+++ b/src/query/storages/common/index/src/hnsw_index/quantization/quantile.rs
@@ -0,0 +1,97 @@
+// Copyright Qdrant
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use feistel_permutation_rs::DefaultBuildHasher;
+use feistel_permutation_rs::Permutation;
+use rand::rngs::SmallRng;
+use rand::Rng;
+use rand::SeedableRng;
+
+pub const QUANTILE_SAMPLE_SIZE: usize = 100_000;
+
+pub(crate) fn find_min_max_from_iter<'a>(
+    iter: impl Iterator<Item = impl AsRef<[f32]> + 'a> + Clone,
+) -> (f32, f32) {
+    iter.fold((f32::MAX, f32::MIN), |(mut min, mut max), vector| {
+        for &value in vector.as_ref() {
+            if value < min {
+                min = value;
+            }
+            if value > max {
+                max = value;
+            }
+        }
+        (min, max)
+    })
+}
+
+pub(crate) fn find_quantile_interval<'a>(
+    vector_data: impl Iterator<Item = impl AsRef<[f32]> + 'a> + Clone,
+    dim: usize,
+    count: usize,
+    quantile: f32,
+) -> Option<(f32, f32)> {
+    if count < 127 || quantile >= 1.0 {
+        return None;
+    }
+
+    let slice_size = std::cmp::min(count, QUANTILE_SAMPLE_SIZE);
+    let mut rng = SmallRng::from_entropy();
+    let seed: u64 = rng.gen();
+    let permutor = Permutation::new(count as u64, seed, DefaultBuildHasher::new());
+    let mut selected_vectors: Vec<usize> = permutor
+        .iter()
+        .map(|i| i as usize)
+        .take(slice_size)
+        .collect();
+
+    selected_vectors.sort_unstable();
+
+    let mut data_slice = Vec::with_capacity(slice_size * dim);
+    let mut selected_index: usize = 0;
+    for (vector_index, vector_data) in vector_data.into_iter().enumerate() {
+        if vector_index == selected_vectors[selected_index] {
+            data_slice.extend_from_slice(vector_data.as_ref());
+            selected_index += 1;
+            if selected_index == slice_size {
+                break;
+            }
+        }
+    }
+
+    let data_slice_len = data_slice.len();
+    if data_slice_len < 4 {
+        return None;
+    }
+
+    let cut_index = std::cmp::min(
+        (data_slice_len - 1) / 2,
+        (slice_size as f32 * (1.0 - quantile) / 2.0) as usize,
+    );
+    let cut_index = std::cmp::max(cut_index, 1);
+    let comparator = |a: &f32, b: &f32| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal);
+    let (selected_values, _, _) =
+        data_slice.select_nth_unstable_by(data_slice_len - cut_index, comparator);
+    let (_, _, selected_values) = selected_values.select_nth_unstable_by(cut_index, comparator);
+
+    if selected_values.len() < 2 {
+        return None;
+    }
+
+    let selected_values = [selected_values];
+    Some(find_min_max_from_iter(
+        selected_values.iter().map(|v| &v[..]),
+    ))
+}
diff --git a/src/query/storages/common/index/src/hnsw_index/search_context.rs b/src/query/storages/common/index/src/hnsw_index/search_context.rs
new file mode 100644
index 0000000000000..b78637ef3ea47
--- /dev/null
+++ b/src/query/storages/common/index/src/hnsw_index/search_context.rs
@@ -0,0 +1,61 @@
+// Copyright Qdrant
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::BinaryHeap;
+use std::iter::FromIterator;
+
+use num_traits::float::FloatCore;
+
+use crate::hnsw_index::common::fixed_length_priority_queue::FixedLengthPriorityQueue;
+use crate::hnsw_index::common::types::ScoreType;
+use crate::hnsw_index::common::types::ScoredPointOffset;
+
+/// Structure that holds context of the search
+pub struct SearchContext {
+    /// Overall nearest points found so far
+    pub nearest: FixedLengthPriorityQueue<ScoredPointOffset>,
+    /// Current candidates to process
+    pub candidates: BinaryHeap<ScoredPointOffset>,
+}
+
+impl SearchContext {
+    pub fn new(entry_point: ScoredPointOffset, ef: usize) -> Self {
+        let mut nearest = FixedLengthPriorityQueue::new(ef);
+        nearest.push(entry_point);
+        SearchContext {
+            nearest,
+            candidates: BinaryHeap::from_iter([entry_point]),
+        }
+    }
+
+    pub fn lower_bound(&self) -> ScoreType {
+        match self.nearest.top() {
+            None => ScoreType::min_value(),
+            Some(worst_of_the_best) => worst_of_the_best.score,
+        }
+    }
+
+    /// Updates search context with new scored point.
+    /// If it is closer than existing - also add it to candidates for further search
+    pub fn process_candidate(&mut self, score_point: ScoredPointOffset) {
+        let was_added = match self.nearest.push(score_point) {
+            None => true,
+            Some(removed) => removed.idx != score_point.idx,
+        };
+        if was_added {
+            self.candidates.push(score_point);
+        }
+    }
+}
diff --git a/src/query/storages/common/index/src/hnsw_index/visited_pool.rs b/src/query/storages/common/index/src/hnsw_index/visited_pool.rs
new file mode 100644
index 0000000000000..57b5dd9097fba
--- /dev/null
+++ b/src/query/storages/common/index/src/hnsw_index/visited_pool.rs
@@ -0,0 +1,180 @@
+// Copyright Qdrant
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Structures for fast and tread-safe way to check if some points were visited or not
+
+use std::sync::LazyLock;
+
+use parking_lot::RwLock;
+
+use crate::hnsw_index::common::types::PointOffsetType;
+
+/// Max number of pooled elements to preserve in memory.
+/// Scaled according to the number of logical CPU cores to account for concurrent operations.
+pub static POOL_KEEP_LIMIT: LazyLock<usize> = LazyLock::new(|| num_cpus::get().clamp(16, 128));
+
+/// Visited list handle is an owner of the `VisitedList`, which is returned by `VisitedPool` and returned back to it
+#[derive(Debug)]
+pub struct VisitedListHandle<'a> {
+    pool: &'a VisitedPool,
+    visited_list: VisitedList,
+}
+
+/// Visited list reuses same memory to keep track of visited points ids among multiple consequent queries
+///
+/// It stores the sequence number of last processed operation next to the point ID, which allows to avoid memory allocation
+/// and reuse same counter for multiple queries.
+#[derive(Debug)]
+struct VisitedList {
+    current_iter: u8,
+    visit_counters: Vec<u8>,
+}
+
+impl Default for VisitedList {
+    fn default() -> Self {
+        VisitedList {
+            current_iter: 1,
+            visit_counters: vec![],
+        }
+    }
+}
+
+impl VisitedList {
+    fn new(num_points: usize) -> Self {
+        VisitedList {
+            current_iter: 1,
+            visit_counters: vec![0; num_points],
+        }
+    }
+}
+
+impl Drop for VisitedListHandle<'_> {
+    fn drop(&mut self) {
+        self.pool
+            .return_back(std::mem::take(&mut self.visited_list));
+    }
+}
+
+impl<'a> VisitedListHandle<'a> {
+    fn new(pool: &'a VisitedPool, data: VisitedList) -> Self {
+        VisitedListHandle {
+            pool,
+            visited_list: data,
+        }
+    }
+
+    /// Return `true` if visited
+    pub fn check(&self, point_id: PointOffsetType) -> bool {
+        self.visited_list
+            .visit_counters
+            .get(point_id as usize)
+            .is_some_and(|x| *x == self.visited_list.current_iter)
+    }
+
+    /// Updates visited list
+    /// return `true` if point was visited before
+    pub fn check_and_update_visited(&mut self, point_id: PointOffsetType) -> bool {
+        let idx = point_id as usize;
+        if idx >= self.visited_list.visit_counters.len() {
+            self.visited_list.visit_counters.resize(idx + 1, 0);
+        }
+        std::mem::replace(
+            &mut self.visited_list.visit_counters[idx],
+            self.visited_list.current_iter,
+        ) == self.visited_list.current_iter
+    }
+
+    pub fn next_iteration(&mut self) {
+        self.visited_list.current_iter = self.visited_list.current_iter.wrapping_add(1);
+        if self.visited_list.current_iter == 0 {
+            self.visited_list.current_iter = 1;
+            self.visited_list.visit_counters.fill(0);
+        }
+    }
+
+    fn resize(&mut self, num_points: usize) {
+        // `self.current_iter` is never 0, so it's safe to use 0 as a default
+        // value.
+        self.visited_list.visit_counters.resize(num_points, 0);
+    }
+}
+
+/// Keeps a list of `VisitedList` which could be requested and released from multiple threads
+///
+/// If there are more requests than lists - creates a new list, but only keeps max defined amount.
+#[derive(Debug)]
+pub struct VisitedPool {
+    pool: RwLock<Vec<VisitedList>>,
+}
+
+impl VisitedPool {
+    pub fn new() -> Self {
+        VisitedPool {
+            pool: RwLock::new(Vec::with_capacity(*POOL_KEEP_LIMIT)),
+        }
+    }
+
+    pub fn get(&self, num_points: usize) -> VisitedListHandle {
+        // If there are more concurrent requests, a new temporary list is created dynamically.
+        // This limit is implemented to prevent memory leakage.
+        match self.pool.write().pop() {
+            None => VisitedListHandle::new(self, VisitedList::new(num_points)),
+            Some(data) => {
+                let mut visited_list = VisitedListHandle::new(self, data);
+                visited_list.resize(num_points);
+                visited_list.next_iteration();
+                visited_list
+            }
+        }
+    }
+
+    fn return_back(&self, data: VisitedList) {
+        let mut pool = self.pool.write();
+        if pool.len() < *POOL_KEEP_LIMIT {
+            pool.push(data);
+        }
+    }
+}
+
+impl Default for VisitedPool {
+    fn default() -> Self {
+        VisitedPool::new()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_visited_list() {
+        let pool = VisitedPool::new();
+        let mut visited_list = pool.get(10);
+
+        for _ in 0..2 {
+            assert!(!visited_list.check(0));
+            assert!(!visited_list.check_and_update_visited(0));
+            assert!(visited_list.check(0));
+
+            assert!(visited_list.check_and_update_visited(0));
+            assert!(visited_list.check(0));
+
+            for _ in 0..(u8::MAX as usize * 2 + 10) {
+                visited_list.next_iteration();
+                assert!(!visited_list.check(0));
+            }
+        }
+    }
+}
diff --git a/src/query/storages/common/index/src/lib.rs b/src/query/storages/common/index/src/lib.rs
index 50f57df6908f8..cd6cbb69a581f 100644
--- a/src/query/storages/common/index/src/lib.rs
+++ b/src/query/storages/common/index/src/lib.rs
@@ -19,6 +19,7 @@
 mod bloom_index;
 mod eliminate_cast;
 pub mod filters;
+mod hnsw_index;
 mod index;
 mod inverted_index;
 mod page_index;
@@ -31,6 +32,12 @@ pub use bloom_index::BloomIndexResult;
 pub use bloom_index::FilterEvalResult;
 pub use bloom_index::NgramArgs;
 pub use eliminate_cast::eliminate_cast;
+pub use hnsw_index::DistanceType;
+pub use hnsw_index::FixedLengthPriorityQueue;
+pub use hnsw_index::HNSWIndex;
+pub use hnsw_index::ScoredPointOffset;
+pub use hnsw_index::VectorIndexFile;
+pub use hnsw_index::VectorIndexMeta;
 pub use index::Index;
 pub use inverted_index::extract_component_fields;
 pub use inverted_index::extract_fsts;
diff --git a/src/query/storages/common/pruner/src/block_meta.rs b/src/query/storages/common/pruner/src/block_meta.rs
index b95280edf1284..febc97e5bdaa4 100644
--- a/src/query/storages/common/pruner/src/block_meta.rs
+++ b/src/query/storages/common/pruner/src/block_meta.rs
@@ -43,6 +43,8 @@ pub struct BlockMetaIndex {
     pub snapshot_location: Option<String>,
     // The search matched rows and optional scores in the block.
     pub matched_rows: Option<Vec<(usize, Option<F32>)>>,
+    // The vector topn rows and scores in the block.
+    pub vector_scores: Option<Vec<(usize, F32)>>,
     // The optional meta of virtual columns.
     pub virtual_block_meta: Option<VirtualBlockMetaIndex>,
 }
diff --git a/src/query/storages/common/pruner/src/lib.rs b/src/query/storages/common/pruner/src/lib.rs
index 9ada3141465aa..4a8414caf935b 100644
--- a/src/query/storages/common/pruner/src/lib.rs
+++ b/src/query/storages/common/pruner/src/lib.rs
@@ -32,4 +32,4 @@ pub use page_pruner::PagePruner;
 pub use page_pruner::PagePrunerCreator;
 pub use range_pruner::RangePruner;
 pub use range_pruner::RangePrunerCreator;
-pub use topn_pruner::TopNPrunner;
+pub use topn_pruner::TopNPruner;
diff --git a/src/query/storages/common/pruner/src/topn_pruner.rs b/src/query/storages/common/pruner/src/topn_pruner.rs
index 93cf9cdb075bc..fcba2ebe7e344 100644
--- a/src/query/storages/common/pruner/src/topn_pruner.rs
+++ b/src/query/storages/common/pruner/src/topn_pruner.rs
@@ -26,13 +26,13 @@ use crate::BlockMetaIndex;
 /// TopN pruner.
 /// Pruning for order by x limit N.
 #[derive(Clone)]
-pub struct TopNPrunner {
+pub struct TopNPruner {
     schema: TableSchemaRef,
     sort: Vec<(RemoteExpr<String>, bool, bool)>,
     limit: usize,
 }
 
-impl TopNPrunner {
+impl TopNPruner {
     pub fn create(
         schema: TableSchemaRef,
         sort: Vec<(RemoteExpr<String>, bool, bool)>,
@@ -46,7 +46,7 @@ impl TopNPrunner {
     }
 }
 
-impl TopNPrunner {
+impl TopNPruner {
     pub fn prune(
         &self,
         metas: Vec<(BlockMetaIndex, Arc<BlockMeta>)>,
diff --git a/src/query/storages/common/table_meta/src/meta/v2/segment.rs b/src/query/storages/common/table_meta/src/meta/v2/segment.rs
index bd31c3bf80396..47c2127145d60 100644
--- a/src/query/storages/common/table_meta/src/meta/v2/segment.rs
+++ b/src/query/storages/common/table_meta/src/meta/v2/segment.rs
@@ -170,6 +170,8 @@ pub struct BlockMeta {
     pub bloom_filter_index_size: u64,
     pub inverted_index_size: Option<u64>,
     pub ngram_filter_index_size: Option<u64>,
+    pub vector_index_size: Option<u64>,
+    pub vector_index_location: Option<Location>,
     /// The block meta of virtual columns.
     pub virtual_block_meta: Option<VirtualBlockMeta>,
     pub compression: Compression,
@@ -192,6 +194,8 @@ impl BlockMeta {
         bloom_filter_index_size: u64,
         inverted_index_size: Option<u64>,
         ngram_filter_index_size: Option<u64>,
+        vector_index_size: Option<u64>,
+        vector_index_location: Option<Location>,
         virtual_block_meta: Option<VirtualBlockMeta>,
         compression: Compression,
         create_on: Option<DateTime<Utc>>,
@@ -208,6 +212,8 @@ impl BlockMeta {
             bloom_filter_index_size,
             inverted_index_size,
             ngram_filter_index_size,
+            vector_index_size,
+            vector_index_location,
             virtual_block_meta,
             compression,
             create_on,
@@ -368,6 +374,8 @@ impl BlockMeta {
             bloom_filter_index_size: 0,
             compression: Compression::Lz4,
             inverted_index_size: None,
+            vector_index_size: None,
+            vector_index_location: None,
             virtual_block_meta: None,
             create_on: None,
             ngram_filter_index_size: None,
@@ -394,6 +402,8 @@ impl BlockMeta {
             bloom_filter_index_size: s.bloom_filter_index_size,
             compression: s.compression,
             inverted_index_size: None,
+            vector_index_size: None,
+            vector_index_location: None,
             virtual_block_meta: None,
             create_on: None,
             ngram_filter_index_size: None,
diff --git a/src/query/storages/common/table_meta/src/meta/v3/frozen/block_meta.rs b/src/query/storages/common/table_meta/src/meta/v3/frozen/block_meta.rs
index 41207315d2d19..ccc22fdbdb600 100644
--- a/src/query/storages/common/table_meta/src/meta/v3/frozen/block_meta.rs
+++ b/src/query/storages/common/table_meta/src/meta/v3/frozen/block_meta.rs
@@ -63,6 +63,8 @@ impl From<BlockMeta> for crate::meta::BlockMeta {
             bloom_filter_index_size: value.bloom_filter_index_size,
             inverted_index_size: None,
             ngram_filter_index_size: None,
+            vector_index_size: None,
+            vector_index_location: None,
             virtual_block_meta: None,
             compression: value.compression.into(),
             create_on: None,
diff --git a/src/query/storages/fuse/src/constants.rs b/src/query/storages/fuse/src/constants.rs
index 890153cb5f15b..f16b4975939ad 100644
--- a/src/query/storages/fuse/src/constants.rs
+++ b/src/query/storages/fuse/src/constants.rs
@@ -36,6 +36,7 @@ pub const FUSE_TBL_LAST_SNAPSHOT_HINT_V2: &str = "last_snapshot_location_hint_v2
 pub const FUSE_TBL_VIRTUAL_BLOCK_PREFIX: &str = "_vb";
 pub const FUSE_TBL_AGG_INDEX_PREFIX: &str = "_i_a";
 pub const FUSE_TBL_INVERTED_INDEX_PREFIX: &str = "_i_i";
+pub const FUSE_TBL_VECTOR_INDEX_PREFIX: &str = "_i_v";
 
 pub const DEFAULT_ROW_PER_PAGE: usize = 8192;
 pub const DEFAULT_ROW_PER_INDEX: usize = 100000;
diff --git a/src/query/storages/fuse/src/fuse_table.rs b/src/query/storages/fuse/src/fuse_table.rs
index 96c17992f4141..f832b97be638c 100644
--- a/src/query/storages/fuse/src/fuse_table.rs
+++ b/src/query/storages/fuse/src/fuse_table.rs
@@ -54,7 +54,7 @@ use databend_common_expression::TableSchema;
 use databend_common_expression::ORIGIN_BLOCK_ID_COL_NAME;
 use databend_common_expression::ORIGIN_BLOCK_ROW_NUM_COL_NAME;
 use databend_common_expression::ORIGIN_VERSION_COL_NAME;
-use databend_common_expression::SEARCH_SCORE_COLUMN_ID;
+use databend_common_expression::VECTOR_SCORE_COLUMN_ID;
 use databend_common_io::constants::DEFAULT_BLOCK_BUFFER_SIZE;
 use databend_common_io::constants::DEFAULT_BLOCK_COMPRESSED_SIZE;
 use databend_common_io::constants::DEFAULT_BLOCK_PER_SEGMENT;
@@ -766,7 +766,7 @@ impl Table for FuseTable {
     }
 
     fn supported_internal_column(&self, column_id: ColumnId) -> bool {
-        column_id >= SEARCH_SCORE_COLUMN_ID
+        column_id >= VECTOR_SCORE_COLUMN_ID
     }
 
     fn support_column_projection(&self) -> bool {
diff --git a/src/query/storages/fuse/src/io/locations.rs b/src/query/storages/fuse/src/io/locations.rs
index faf266d9016fd..13272ee77706c 100644
--- a/src/query/storages/fuse/src/io/locations.rs
+++ b/src/query/storages/fuse/src/io/locations.rs
@@ -38,6 +38,7 @@ use crate::index::InvertedIndexFile;
 use crate::FUSE_TBL_AGG_INDEX_PREFIX;
 use crate::FUSE_TBL_INVERTED_INDEX_PREFIX;
 use crate::FUSE_TBL_LAST_SNAPSHOT_HINT_V2;
+use crate::FUSE_TBL_VECTOR_INDEX_PREFIX;
 use crate::FUSE_TBL_XOR_BLOOM_INDEX_PREFIX;
 static SNAPSHOT_V0: SnapshotVersion = SnapshotVersion::V0(PhantomData);
 static SNAPSHOT_V1: SnapshotVersion = SnapshotVersion::V1(PhantomData);
@@ -63,6 +64,7 @@ pub struct TableMetaLocationGenerator {
     snapshot_location_prefix: String,
     agg_index_location_prefix: String,
     inverted_index_location_prefix: String,
+    vector_index_location_prefix: String,
 }
 
 impl TableMetaLocationGenerator {
@@ -75,6 +77,7 @@ impl TableMetaLocationGenerator {
         let agg_index_location_prefix = format!("{}/{}/", &prefix, FUSE_TBL_AGG_INDEX_PREFIX);
         let inverted_index_location_prefix =
             format!("{}/{}/", &prefix, FUSE_TBL_INVERTED_INDEX_PREFIX);
+        let vector_index_location_prefix = format!("{}/{}/", &prefix, FUSE_TBL_VECTOR_INDEX_PREFIX);
         Self {
             prefix,
             block_location_prefix,
@@ -83,6 +86,7 @@ impl TableMetaLocationGenerator {
             snapshot_location_prefix,
             agg_index_location_prefix,
             inverted_index_location_prefix,
+            vector_index_location_prefix,
         }
     }
 
@@ -98,6 +102,10 @@ impl TableMetaLocationGenerator {
         &self.bloom_index_location_prefix
     }
 
+    pub fn block_vector_index_prefix(&self) -> &str {
+        &self.vector_index_location_prefix
+    }
+
     pub fn segment_location_prefix(&self) -> &str {
         &self.segment_info_location_prefix
     }
@@ -134,6 +142,19 @@ impl TableMetaLocationGenerator {
         )
     }
 
+    pub fn block_vector_index_location(&self) -> Location {
+        let uuid = Uuid::now_v7();
+        (
+            format!(
+                "{}{}_v{}.parquet",
+                self.block_vector_index_prefix(),
+                uuid.as_simple(),
+                BlockFilter::VERSION,
+            ),
+            BlockFilter::VERSION,
+        )
+    }
+
     pub fn gen_segment_info_location(
         &self,
         table_meta_timestamps: TableMetaTimestamps,
diff --git a/src/query/storages/fuse/src/io/mod.rs b/src/query/storages/fuse/src/io/mod.rs
index 63b43a9ff785f..94d22a40bbb90 100644
--- a/src/query/storages/fuse/src/io/mod.rs
+++ b/src/query/storages/fuse/src/io/mod.rs
@@ -52,6 +52,7 @@ pub use write::InvertedIndexWriter;
 pub use write::MetaWriter;
 pub(crate) use write::StreamBlockBuilder;
 pub(crate) use write::StreamBlockProperties;
+pub use write::VectorIndexBuilder;
 pub use write::VirtualColumnBuilder;
 pub use write::WriteSettings;
 pub use write::MAX_BLOCK_UNCOMPRESSED_SIZE;
diff --git a/src/query/storages/fuse/src/io/read/meta/meta_readers.rs b/src/query/storages/fuse/src/io/read/meta/meta_readers.rs
index f0ce5c5c789db..a07e089a42cca 100644
--- a/src/query/storages/fuse/src/io/read/meta/meta_readers.rs
+++ b/src/query/storages/fuse/src/io/read/meta/meta_readers.rs
@@ -27,6 +27,7 @@ use databend_storages_common_cache::LoadParams;
 use databend_storages_common_cache::Loader;
 use databend_storages_common_index::BloomIndexMeta;
 use databend_storages_common_index::InvertedIndexMeta;
+use databend_storages_common_index::VectorIndexMeta;
 use databend_storages_common_table_meta::meta::CompactSegmentInfo;
 use databend_storages_common_table_meta::meta::SegmentInfoVersion;
 use databend_storages_common_table_meta::meta::SingleColumnMeta;
@@ -51,6 +52,7 @@ pub type TableSnapshotReader = InMemoryCacheReader<TableSnapshot, LoaderWrapper<
 pub type CompactSegmentInfoReader =
     InMemoryCacheReader<CompactSegmentInfo, LoaderWrapper<(Operator, TableSchemaRef)>>;
 pub type InvertedIndexMetaReader = InMemoryCacheReader<InvertedIndexMeta, LoaderWrapper<Operator>>;
+pub type VectorIndexMetaReader = InMemoryCacheReader<VectorIndexMeta, LoaderWrapper<Operator>>;
 
 pub struct MetaReaders;
 
@@ -100,6 +102,13 @@ impl MetaReaders {
             LoaderWrapper(dal),
         )
     }
+
+    pub fn vector_index_meta_reader(dal: Operator) -> VectorIndexMetaReader {
+        VectorIndexMetaReader::new(
+            CacheManager::instance().get_vector_index_meta_cache(),
+            LoaderWrapper(dal),
+        )
+    }
 }
 
 // workaround for the orphan rules
@@ -254,6 +263,24 @@ impl Loader<InvertedIndexMeta> for LoaderWrapper<Operator> {
     }
 }
 
+#[async_trait::async_trait]
+impl Loader<VectorIndexMeta> for LoaderWrapper<Operator> {
+    #[async_backtrace::framed]
+    async fn load(&self, params: &LoadParams) -> Result<VectorIndexMeta> {
+        // read the ThriftFileMetaData, omit unnecessary conversions
+        let meta = read_thrift_file_metadata(self.0.clone(), &params.location, params.len_hint)
+            .await
+            .map_err(|err| {
+                ErrorCode::StorageOther(format!(
+                    "read file meta failed, {}, {:?}",
+                    params.location, err
+                ))
+            })?;
+
+        VectorIndexMeta::try_from(meta)
+    }
+}
+
 pub async fn bytes_reader(op: &Operator, path: &str, len_hint: Option<u64>) -> Result<Buffer> {
     let reader = if let Some(len) = len_hint {
         op.read_with(path).range(0..len).await?
diff --git a/src/query/storages/fuse/src/io/read/mod.rs b/src/query/storages/fuse/src/io/read/mod.rs
index 4dbac4c46e853..391f23e33c6e9 100644
--- a/src/query/storages/fuse/src/io/read/mod.rs
+++ b/src/query/storages/fuse/src/io/read/mod.rs
@@ -20,6 +20,7 @@ pub mod meta;
 mod segment_reader;
 mod snapshot_history_reader;
 mod utils;
+mod vector_index;
 mod virtual_column;
 
 pub use agg_index::AggIndexReader;
@@ -38,5 +39,6 @@ pub use segment_reader::RowOrientedSegmentReader;
 pub use segment_reader::SegmentReader;
 pub use snapshot_history_reader::SnapshotHistoryReader;
 pub use utils::build_columns_meta;
+pub use vector_index::VectorIndexReader;
 pub use virtual_column::VirtualBlockReadResult;
 pub use virtual_column::VirtualColumnReader;
diff --git a/src/query/storages/fuse/src/io/read/vector_index/mod.rs b/src/query/storages/fuse/src/io/read/vector_index/mod.rs
new file mode 100644
index 0000000000000..f62d5686f187c
--- /dev/null
+++ b/src/query/storages/fuse/src/io/read/vector_index/mod.rs
@@ -0,0 +1,18 @@
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+mod vector_index_loader;
+mod vector_index_reader;
+
+pub use vector_index_reader::VectorIndexReader;
diff --git a/src/query/storages/fuse/src/io/read/vector_index/vector_index_loader.rs b/src/query/storages/fuse/src/io/read/vector_index/vector_index_loader.rs
new file mode 100644
index 0000000000000..53a220b622c80
--- /dev/null
+++ b/src/query/storages/fuse/src/io/read/vector_index/vector_index_loader.rs
@@ -0,0 +1,224 @@
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::HashMap;
+use std::future::Future;
+use std::sync::Arc;
+use std::time::Instant;
+
+use arrow::datatypes::Field;
+use arrow::datatypes::Fields;
+use arrow::datatypes::Schema;
+use bytes::Bytes;
+use databend_common_base::runtime::GlobalIORuntime;
+use databend_common_base::runtime::Runtime;
+use databend_common_base::runtime::TrySpawn;
+use databend_common_exception::ErrorCode;
+use databend_common_exception::Result;
+use databend_common_expression::Column;
+use databend_common_metrics::storage::metrics_inc_block_vector_index_read_bytes;
+use databend_common_metrics::storage::metrics_inc_block_vector_index_read_milliseconds;
+use databend_storages_common_cache::CacheAccessor;
+use databend_storages_common_cache::CacheManager;
+use databend_storages_common_cache::LoadParams;
+use databend_storages_common_index::VectorIndexMeta;
+use databend_storages_common_io::MergeIOReader;
+use databend_storages_common_io::ReadSettings;
+use databend_storages_common_table_meta::table::TableCompression;
+use opendal::Operator;
+use parquet::arrow::arrow_reader::ParquetRecordBatchReader;
+use parquet::arrow::parquet_to_arrow_field_levels;
+use parquet::arrow::ArrowSchemaConverter;
+use parquet::arrow::ProjectionMask;
+
+use crate::index::VectorIndexFile;
+use crate::io::read::block::parquet::RowGroupImplBuilder;
+use crate::io::MetaReaders;
+
+#[async_trait::async_trait]
+trait InRuntime
+where Self: Future
+{
+    async fn execute_in_runtime(self, runtime: &Runtime) -> Result<Self::Output>;
+}
+
+#[async_trait::async_trait]
+impl<T> InRuntime for T
+where
+    T: Future + Send + 'static,
+    T::Output: Send + 'static,
+{
+    #[async_backtrace::framed]
+    async fn execute_in_runtime(self, runtime: &Runtime) -> Result<T::Output> {
+        runtime
+            .try_spawn(self, None)?
+            .await
+            .map_err(|e| ErrorCode::TokioError(format!("runtime join error. {}", e)))
+    }
+}
+
+/// Loads vector index meta data
+/// read data from cache, or populate cache items if possible
+#[fastrace::trace]
+pub(crate) async fn load_vector_index_meta(
+    dal: Operator,
+    path: &str,
+) -> Result<Arc<VectorIndexMeta>> {
+    let path_owned = path.to_owned();
+    async move {
+        let reader = MetaReaders::vector_index_meta_reader(dal);
+        let version = 0;
+
+        let load_params = LoadParams {
+            location: path_owned,
+            len_hint: None,
+            ver: version,
+            put_cache: true,
+        };
+
+        reader.read(&load_params).await
+    }
+    .execute_in_runtime(&GlobalIORuntime::instance())
+    .await?
+}
+
+/// load index column data
+#[fastrace::trace]
+pub(crate) async fn load_vector_index_files<'a>(
+    operator: Operator,
+    settings: &ReadSettings,
+    column_names: &'a [String],
+    location: &'a str,
+) -> Result<Vec<Column>> {
+    let start = Instant::now();
+
+    // 1. load index meta
+    let vector_index_meta = load_vector_index_meta(operator.clone(), location).await?;
+
+    // 2. build index schema
+    let vector_index_fields: Vec<_> = vector_index_meta
+        .columns
+        .iter()
+        .map(|col| Field::new(col.0.clone(), arrow::datatypes::DataType::Binary, false))
+        .collect();
+    let vector_index_schema = Schema::new(Fields::from(vector_index_fields));
+
+    let vector_index_schema_desc =
+        Arc::new(ArrowSchemaConverter::new().convert(&vector_index_schema)?);
+
+    // 3. collect column metas that needed to build vector index
+    let vector_column_chunk_metas = &vector_index_meta.columns;
+
+    let mut column_indices = Vec::with_capacity(column_names.len());
+    for column_name in column_names {
+        let column_index = vector_index_schema.index_of(column_name)?;
+        column_indices.push(column_index);
+    }
+
+    let projected_column_metas: Vec<_> = vector_column_chunk_metas
+        .iter()
+        .enumerate()
+        .filter(|(i, _)| column_indices.contains(i))
+        .map(|(_, meta)| meta)
+        .collect();
+
+    // 4. read column data, first try to read from cache,
+    // if not exists, fetch from object storage
+    let mut ranges = Vec::new();
+    let mut names_map = HashMap::new();
+    let mut column_data = HashMap::new();
+    let vector_index_file_cache = CacheManager::instance().get_vector_index_file_cache();
+    for (i, (name, col_meta)) in column_indices
+        .iter()
+        .zip(projected_column_metas.into_iter())
+    {
+        let cache_key = cache_key_of_column(location, name);
+        if let Some(cache_file) = vector_index_file_cache.get_sized(&cache_key, col_meta.len) {
+            column_data.insert(*i, cache_file);
+            continue;
+        }
+
+        // if cache missed, prepare the ranges to be read
+        let col_range = col_meta.offset..(col_meta.offset + col_meta.len);
+
+        ranges.push((*i as u32, col_range));
+        names_map.insert(*i as u32, (name, cache_key));
+    }
+
+    if !ranges.is_empty() {
+        let merge_io_result =
+            MergeIOReader::merge_io_read(settings, operator.clone(), location, &ranges).await?;
+
+        // merge column data fetched from object storage
+        for (i, (chunk_idx, range)) in &merge_io_result.columns_chunk_offsets {
+            let chunk = merge_io_result
+                .owner_memory
+                .get_chunk(*chunk_idx, &merge_io_result.block_path)?;
+            let data = chunk.slice(range.clone());
+
+            let (name, cache_key) = names_map.remove(i).unwrap();
+            let file = VectorIndexFile::create(name.clone(), data.into());
+
+            // add index file to cache
+            vector_index_file_cache.insert(cache_key, file.clone());
+            column_data.insert(*i as usize, Arc::new(file));
+        }
+    }
+
+    // 5. deserialize raw data to vector index data
+    let mut builder =
+        RowGroupImplBuilder::new(1, &vector_index_schema_desc, TableCompression::Zstd.into());
+
+    for (i, column_data) in column_data {
+        builder.add_column_chunk(i, Bytes::copy_from_slice(&column_data.data));
+    }
+    let row_group = Box::new(builder.build());
+    let field_levels = parquet_to_arrow_field_levels(
+        vector_index_schema_desc.as_ref(),
+        ProjectionMask::leaves(&vector_index_schema_desc, column_indices),
+        None,
+    )?;
+    let mut record_reader = ParquetRecordBatchReader::try_new_with_row_groups(
+        &field_levels,
+        row_group.as_ref(),
+        1,
+        None,
+    )?;
+    let record = record_reader.next().unwrap()?;
+    assert!(record_reader.next().is_none());
+
+    let mut vector_bytes_len = 0;
+    let mut vector_columns = Vec::with_capacity(4);
+    for i in 0..record.num_columns() {
+        let vector_binary = record.column(i).clone();
+        let column = Column::from_arrow_rs(
+            vector_binary,
+            &databend_common_expression::types::DataType::Binary,
+        )?;
+        vector_bytes_len += column.memory_size();
+        vector_columns.push(column);
+    }
+
+    // Perf.
+    {
+        metrics_inc_block_vector_index_read_bytes(vector_bytes_len as u64);
+        metrics_inc_block_vector_index_read_milliseconds(start.elapsed().as_millis() as u64);
+    }
+
+    Ok(vector_columns)
+}
+
+fn cache_key_of_column(index_path: &str, index_column_name: &str) -> String {
+    format!("{index_path}-{index_column_name}")
+}
diff --git a/src/query/storages/fuse/src/io/read/vector_index/vector_index_reader.rs b/src/query/storages/fuse/src/io/read/vector_index/vector_index_reader.rs
new file mode 100644
index 0000000000000..bfb084787a126
--- /dev/null
+++ b/src/query/storages/fuse/src/io/read/vector_index/vector_index_reader.rs
@@ -0,0 +1,112 @@
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::time::Instant;
+
+use databend_common_exception::Result;
+use databend_common_metrics::storage::metrics_inc_block_vector_index_search_milliseconds;
+use databend_storages_common_index::DistanceType;
+use databend_storages_common_index::HNSWIndex;
+use databend_storages_common_index::ScoredPointOffset;
+use databend_storages_common_io::ReadSettings;
+use opendal::Operator;
+
+use crate::io::read::vector_index::vector_index_loader::load_vector_index_files;
+
+#[derive(Clone)]
+pub struct VectorIndexReader {
+    operator: Operator,
+    settings: ReadSettings,
+    dim: usize,
+    distance_type: DistanceType,
+    columns: Vec<String>,
+    query_values: Vec<f32>,
+}
+
+impl VectorIndexReader {
+    pub fn create(
+        operator: Operator,
+        settings: ReadSettings,
+        distance_type: DistanceType,
+        columns: Vec<String>,
+        query_values: Vec<f32>,
+    ) -> Self {
+        let dim = query_values.len();
+        let processed_query_values = HNSWIndex::preprocess_query(distance_type, query_values);
+
+        Self {
+            operator,
+            settings,
+            dim,
+            distance_type,
+            columns,
+            query_values: processed_query_values,
+        }
+    }
+
+    pub async fn prune(
+        &self,
+        limit: usize,
+        row_count: usize,
+        location: &str,
+    ) -> Result<Vec<ScoredPointOffset>> {
+        let start = Instant::now();
+
+        let binary_columns = load_vector_index_files(
+            self.operator.clone(),
+            &self.settings,
+            &self.columns,
+            location,
+        )
+        .await?;
+
+        let hnsw_index = HNSWIndex::open(self.distance_type, self.dim, row_count, binary_columns)?;
+
+        let res = hnsw_index.search(limit, &self.query_values)?;
+
+        // Perf.
+        {
+            metrics_inc_block_vector_index_search_milliseconds(start.elapsed().as_millis() as u64);
+        }
+
+        Ok(res)
+    }
+
+    pub async fn generate_scores(
+        &self,
+        row_count: usize,
+        location: &str,
+    ) -> Result<Vec<ScoredPointOffset>> {
+        let start = Instant::now();
+
+        let binary_columns = load_vector_index_files(
+            self.operator.clone(),
+            &self.settings,
+            &self.columns,
+            location,
+        )
+        .await?;
+
+        let hnsw_index = HNSWIndex::open(self.distance_type, self.dim, row_count, binary_columns)?;
+
+        let res = hnsw_index.generate_scores(row_count as u32, &self.query_values)?;
+
+        // Perf.
+        {
+            metrics_inc_block_vector_index_search_milliseconds(start.elapsed().as_millis() as u64);
+        }
+
+        Ok(res)
+    }
+}
diff --git a/src/query/storages/fuse/src/io/write/block_writer.rs b/src/query/storages/fuse/src/io/write/block_writer.rs
index c6593a248c8af..1f1b6a5579e25 100644
--- a/src/query/storages/fuse/src/io/write/block_writer.rs
+++ b/src/query/storages/fuse/src/io/write/block_writer.rs
@@ -32,6 +32,9 @@ use databend_common_metrics::storage::metrics_inc_block_index_write_nums;
 use databend_common_metrics::storage::metrics_inc_block_inverted_index_write_bytes;
 use databend_common_metrics::storage::metrics_inc_block_inverted_index_write_milliseconds;
 use databend_common_metrics::storage::metrics_inc_block_inverted_index_write_nums;
+use databend_common_metrics::storage::metrics_inc_block_vector_index_write_bytes;
+use databend_common_metrics::storage::metrics_inc_block_vector_index_write_milliseconds;
+use databend_common_metrics::storage::metrics_inc_block_vector_index_write_nums;
 use databend_common_metrics::storage::metrics_inc_block_virtual_column_write_bytes;
 use databend_common_metrics::storage::metrics_inc_block_virtual_column_write_milliseconds;
 use databend_common_metrics::storage::metrics_inc_block_virtual_column_write_nums;
@@ -52,6 +55,8 @@ use crate::io::write::virtual_column_builder::VirtualColumnBuilder;
 use crate::io::write::virtual_column_builder::VirtualColumnState;
 use crate::io::write::InvertedIndexBuilder;
 use crate::io::write::InvertedIndexState;
+use crate::io::write::VectorIndexBuilder;
+use crate::io::write::VectorIndexState;
 use crate::io::write::WriteSettings;
 use crate::io::BloomIndexState;
 use crate::io::TableMetaLocationGenerator;
@@ -130,6 +135,7 @@ pub struct BlockSerialization {
     pub bloom_index_state: Option<BloomIndexState>,
     pub inverted_index_states: Vec<InvertedIndexState>,
     pub virtual_column_state: Option<VirtualColumnState>,
+    pub vector_index_state: Option<VectorIndexState>,
 }
 
 #[derive(Clone)]
@@ -143,6 +149,7 @@ pub struct BlockBuilder {
     pub ngram_args: Vec<NgramArgs>,
     pub inverted_index_builders: Vec<InvertedIndexBuilder>,
     pub virtual_column_builder: Option<VirtualColumnBuilder>,
+    pub vector_index_builder: Option<VectorIndexBuilder>,
     pub table_meta_timestamps: TableMetaTimestamps,
 }
 
@@ -177,6 +184,15 @@ impl BlockBuilder {
             )?;
             inverted_index_states.push(inverted_index_state);
         }
+        let vector_index_state = if let Some(ref vector_index_builder) = self.vector_index_builder {
+            let vector_index_location = self.meta_locations.block_vector_index_location();
+            let mut vector_index_builder = vector_index_builder.clone();
+            vector_index_builder.add_block(&data_block)?;
+            let vector_index_state = vector_index_builder.finalize(&vector_index_location)?;
+            Some(vector_index_state)
+        } else {
+            None
+        };
 
         let virtual_column_state =
             if let Some(ref virtual_column_builder) = self.virtual_column_builder {
@@ -225,6 +241,8 @@ impl BlockBuilder {
                 .as_ref()
                 .map(|v| v.ngram_size)
                 .unwrap_or_default(),
+            vector_index_size: vector_index_state.as_ref().map(|v| v.size),
+            vector_index_location: vector_index_state.as_ref().map(|v| v.location.clone()),
             compression: self.write_settings.table_compression.into(),
             inverted_index_size,
             virtual_block_meta: None,
@@ -237,6 +255,7 @@ impl BlockBuilder {
             bloom_index_state,
             inverted_index_states,
             virtual_column_state,
+            vector_index_state,
         };
         Ok(serialized)
     }
@@ -268,6 +287,7 @@ impl BlockWriter {
 
         Self::write_down_data_block(dal, serialized.block_raw_data, &block_meta.location.0).await?;
         Self::write_down_bloom_index_state(dal, serialized.bloom_index_state).await?;
+        Self::write_down_vector_index_state(dal, serialized.vector_index_state).await?;
         Self::write_down_inverted_index_state(dal, serialized.inverted_index_states).await?;
         Self::write_down_virtual_column_state(dal, serialized.virtual_column_state).await?;
 
@@ -308,6 +328,24 @@ impl BlockWriter {
         Ok(())
     }
 
+    pub async fn write_down_vector_index_state(
+        dal: &Operator,
+        vector_index_state: Option<VectorIndexState>,
+    ) -> Result<()> {
+        if let Some(vector_index_state) = vector_index_state {
+            let start = Instant::now();
+
+            let location = &vector_index_state.location.0;
+            let index_size = vector_index_state.size;
+            write_data(vector_index_state.data, dal, location).await?;
+
+            metrics_inc_block_vector_index_write_nums(1);
+            metrics_inc_block_vector_index_write_bytes(index_size);
+            metrics_inc_block_vector_index_write_milliseconds(start.elapsed().as_millis() as u64);
+        }
+        Ok(())
+    }
+
     pub async fn write_down_inverted_index_state(
         dal: &Operator,
         inverted_index_states: Vec<InvertedIndexState>,
diff --git a/src/query/storages/fuse/src/io/write/mod.rs b/src/query/storages/fuse/src/io/write/mod.rs
index b0af3633055dc..e7f3bfbe82c2f 100644
--- a/src/query/storages/fuse/src/io/write/mod.rs
+++ b/src/query/storages/fuse/src/io/write/mod.rs
@@ -17,6 +17,7 @@ mod bloom_index_writer;
 mod inverted_index_writer;
 mod meta_writer;
 mod stream;
+mod vector_index_writer;
 mod virtual_column_builder;
 mod write_settings;
 
@@ -37,6 +38,8 @@ pub use meta_writer::CachedMetaWriter;
 pub use meta_writer::MetaWriter;
 pub(crate) use stream::StreamBlockBuilder;
 pub(crate) use stream::StreamBlockProperties;
+pub use vector_index_writer::VectorIndexBuilder;
+pub(crate) use vector_index_writer::VectorIndexState;
 pub use virtual_column_builder::VirtualColumnBuilder;
 pub use write_settings::WriteSettings;
 pub use write_settings::MAX_BLOCK_UNCOMPRESSED_SIZE;
diff --git a/src/query/storages/fuse/src/io/write/stream/block_builder.rs b/src/query/storages/fuse/src/io/write/stream/block_builder.rs
index 7193d988952e8..73ee3c1ded29e 100644
--- a/src/query/storages/fuse/src/io/write/stream/block_builder.rs
+++ b/src/query/storages/fuse/src/io/write/stream/block_builder.rs
@@ -34,6 +34,7 @@ use databend_common_expression::TableSchema;
 use databend_common_expression::TableSchemaRef;
 use databend_common_expression::ORIGIN_BLOCK_ROW_NUM_COLUMN_ID;
 use databend_common_io::constants::DEFAULT_BLOCK_BUFFER_SIZE;
+use databend_common_meta_app::schema::TableIndex;
 use databend_common_native::write::NativeWriter;
 use databend_storages_common_index::BloomIndex;
 use databend_storages_common_index::BloomIndexBuilder;
@@ -59,6 +60,7 @@ use crate::io::BloomIndexState;
 use crate::io::InvertedIndexBuilder;
 use crate::io::InvertedIndexWriter;
 use crate::io::TableMetaLocationGenerator;
+use crate::io::VectorIndexBuilder;
 use crate::io::VirtualColumnBuilder;
 use crate::io::WriteSettings;
 use crate::operations::column_parquet_metas;
@@ -150,6 +152,7 @@ pub struct StreamBlockBuilder {
     inverted_index_writers: Vec<InvertedIndexWriter>,
     bloom_index_builder: BloomIndexBuilder,
     virtual_column_builder: Option<VirtualColumnBuilder>,
+    vector_index_builder: Option<VectorIndexBuilder>,
 
     cluster_stats_state: ClusterStatisticsState,
     column_stats_state: ColumnStatisticsState,
@@ -228,6 +231,11 @@ impl StreamBlockBuilder {
         } else {
             None
         };
+        let vector_index_builder = VectorIndexBuilder::try_create(
+            properties.ctx.clone(),
+            &properties.table_indexes,
+            properties.source_schema.clone(),
+        );
 
         let cluster_stats_state =
             ClusterStatisticsState::new(properties.cluster_stats_builder.clone());
@@ -240,6 +248,7 @@ impl StreamBlockBuilder {
             inverted_index_writers,
             bloom_index_builder,
             virtual_column_builder,
+            vector_index_builder,
             row_count: 0,
             block_size: 0,
             column_stats_state,
@@ -278,7 +287,9 @@ impl StreamBlockBuilder {
         if let Some(ref mut virtual_column_builder) = self.virtual_column_builder {
             virtual_column_builder.add_block(&block)?;
         }
-
+        if let Some(ref mut vector_index_builder) = self.vector_index_builder {
+            vector_index_builder.add_block(&block)?;
+        }
         self.row_count += block.num_rows();
         self.block_size += block.estimate_block_size();
         self.block_writer
@@ -331,6 +342,17 @@ impl StreamBlockBuilder {
             } else {
                 None
             };
+        let vector_index_state =
+            if let Some(ref mut vector_index_builder) = self.vector_index_builder {
+                let vector_index_location =
+                    self.properties.meta_locations.block_vector_index_location();
+                let vector_index_state = vector_index_builder.finalize(&vector_index_location)?;
+                Some(vector_index_state)
+            } else {
+                None
+            };
+        let vector_index_size = vector_index_state.as_ref().map(|v| v.size);
+        let vector_index_location = vector_index_state.as_ref().map(|v| v.location.clone());
 
         let col_metas = self.block_writer.finish(&self.properties.source_schema)?;
         let block_raw_data = mem::take(self.block_writer.inner_mut());
@@ -361,6 +383,8 @@ impl StreamBlockBuilder {
                 .unwrap_or_default(),
             compression: self.properties.write_settings.table_compression.into(),
             inverted_index_size,
+            vector_index_size,
+            vector_index_location,
             create_on: Some(Utc::now()),
             ngram_filter_index_size: None,
             virtual_block_meta: None,
@@ -371,6 +395,7 @@ impl StreamBlockBuilder {
             bloom_index_state,
             inverted_index_states,
             virtual_column_state,
+            vector_index_state,
         };
         Ok(serialized)
     }
@@ -392,6 +417,7 @@ pub struct StreamBlockProperties {
     inverted_index_builders: Vec<InvertedIndexBuilder>,
     table_meta_timestamps: TableMetaTimestamps,
     support_virtual_columns: bool,
+    table_indexes: BTreeMap<String, TableIndex>,
 }
 
 impl StreamBlockProperties {
@@ -448,6 +474,7 @@ impl StreamBlockProperties {
             }
         }
         let support_virtual_columns = table.support_virtual_columns();
+        let table_indexes = table.table_info.meta.indexes.clone();
         Ok(Arc::new(StreamBlockProperties {
             ctx,
             meta_locations: table.meta_location_generator().clone(),
@@ -462,6 +489,7 @@ impl StreamBlockProperties {
             inverted_index_builders,
             table_meta_timestamps,
             support_virtual_columns,
+            table_indexes,
         }))
     }
 }
diff --git a/src/query/storages/fuse/src/io/write/vector_index_writer.rs b/src/query/storages/fuse/src/io/write/vector_index_writer.rs
new file mode 100644
index 0000000000000..ee21b769ae826
--- /dev/null
+++ b/src/query/storages/fuse/src/io/write/vector_index_writer.rs
@@ -0,0 +1,247 @@
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::BTreeMap;
+use std::collections::HashSet;
+use std::sync::Arc;
+use std::time::Instant;
+
+use databend_common_catalog::table_context::TableContext;
+use databend_common_exception::ErrorCode;
+use databend_common_exception::Result;
+use databend_common_expression::Column;
+use databend_common_expression::ColumnId;
+use databend_common_expression::DataBlock;
+use databend_common_expression::TableSchemaRef;
+use databend_common_expression::TableSchemaRefExt;
+use databend_common_io::constants::DEFAULT_BLOCK_INDEX_BUFFER_SIZE;
+use databend_common_license::license::Feature;
+use databend_common_license::license_manager::LicenseManagerSwitch;
+use databend_common_meta_app::schema::TableIndex;
+use databend_common_meta_app::schema::TableIndexType;
+use databend_common_metrics::storage::metrics_inc_block_vector_index_generate_milliseconds;
+use databend_storages_common_blocks::blocks_to_parquet;
+use databend_storages_common_index::DistanceType;
+use databend_storages_common_index::HNSWIndex;
+use databend_storages_common_table_meta::meta::Location;
+use databend_storages_common_table_meta::table::TableCompression;
+
+#[derive(Debug, Clone)]
+pub struct VectorIndexState {
+    pub location: Location,
+    pub size: u64,
+    pub data: Vec<u8>,
+}
+
+#[derive(Debug, Clone)]
+struct VectorIndexParam {
+    index_name: String,
+    index_version: String,
+    m: usize,
+    ef_construct: usize,
+    distances: Vec<DistanceType>,
+}
+
+#[derive(Clone)]
+pub struct VectorIndexBuilder {
+    // Parameters for each vector index
+    index_params: Vec<VectorIndexParam>,
+    field_offsets: Vec<Vec<(usize, ColumnId)>>,
+    field_offsets_set: HashSet<usize>,
+
+    // Collected vector columns
+    columns: BTreeMap<usize, Vec<Column>>,
+}
+
+impl VectorIndexBuilder {
+    pub fn try_create(
+        ctx: Arc<dyn TableContext>,
+        table_indexes: &BTreeMap<String, TableIndex>,
+        schema: TableSchemaRef,
+    ) -> Option<VectorIndexBuilder> {
+        LicenseManagerSwitch::instance()
+            .check_enterprise_enabled(ctx.get_license_key(), Feature::VectorIndex)
+            .ok()?;
+
+        let mut index_params = Vec::with_capacity(table_indexes.len());
+        let mut field_offsets = Vec::with_capacity(table_indexes.len());
+        let mut field_offsets_set = HashSet::new();
+
+        for index in table_indexes.values() {
+            if !matches!(index.index_type, TableIndexType::Vector) {
+                continue;
+            }
+            if !index.sync_creation {
+                continue;
+            }
+
+            let mut offsets = Vec::with_capacity(index.column_ids.len());
+            for column_id in &index.column_ids {
+                for (offset, field) in schema.fields.iter().enumerate() {
+                    if field.column_id() == *column_id {
+                        offsets.push((offset, *column_id));
+                        break;
+                    }
+                }
+            }
+            // ignore invalid index
+            if offsets.len() != index.column_ids.len() {
+                continue;
+            }
+            for (offset, _) in &offsets {
+                field_offsets_set.insert(*offset);
+            }
+            field_offsets.push(offsets);
+
+            // Parse index parameters
+            let m = match index.options.get("m") {
+                Some(value) => value.parse::<usize>().unwrap_or(16),
+                None => 16,
+            };
+
+            let ef_construct = match index.options.get("ef_construct") {
+                Some(value) => value.parse::<usize>().unwrap_or(64),
+                None => 64,
+            };
+
+            let mut distances = Vec::new();
+            match index.options.get("distance") {
+                Some(value) => {
+                    let distance_types: Vec<&str> = value.split(',').collect();
+                    for distance_type in distance_types {
+                        let distance = match distance_type {
+                            "cosine" => DistanceType::Dot,
+                            "l1" => DistanceType::L1,
+                            "l2" => DistanceType::L2,
+                            _ => continue,
+                        };
+                        distances.push(distance);
+                    }
+                }
+                None => continue,
+            };
+            if distances.is_empty() {
+                continue;
+            }
+            let index_param = VectorIndexParam {
+                index_name: index.name.clone(),
+                index_version: index.version.clone(),
+                m,
+                ef_construct,
+                distances,
+            };
+            index_params.push(index_param);
+        }
+
+        let mut columns = BTreeMap::new();
+        for offset in &field_offsets_set {
+            columns.insert(*offset, vec![]);
+        }
+
+        if !field_offsets.is_empty() {
+            Some(VectorIndexBuilder {
+                index_params,
+                field_offsets,
+                field_offsets_set,
+                columns,
+            })
+        } else {
+            None
+        }
+    }
+
+    pub fn add_block(&mut self, block: &DataBlock) -> Result<()> {
+        for offset in &self.field_offsets_set {
+            let block_entry = block.get_by_offset(*offset);
+            let column = block_entry.to_column();
+
+            if let Some(columns) = self.columns.get_mut(offset) {
+                columns.push(column);
+            } else {
+                return Err(ErrorCode::Internal("Can't find vector column"));
+            }
+        }
+        Ok(())
+    }
+
+    #[async_backtrace::framed]
+    pub fn finalize(&mut self, location: &Location) -> Result<VectorIndexState> {
+        let start = Instant::now();
+
+        let mut columns = BTreeMap::new();
+        for offset in &self.field_offsets_set {
+            columns.insert(*offset, vec![]);
+        }
+        std::mem::swap(&mut self.columns, &mut columns);
+
+        let mut concated_columns = BTreeMap::new();
+        for (offset, columns) in columns.into_iter() {
+            let concated_column = Column::concat_columns(columns.into_iter())?;
+            concated_columns.insert(offset, concated_column);
+        }
+
+        let mut index_fields = Vec::new();
+        let mut index_columns = Vec::new();
+        let mut metadata = BTreeMap::new();
+
+        for (field_offsets, index_param) in self.field_offsets.iter().zip(&self.index_params) {
+            for (offset, column_id) in field_offsets {
+                let Some(column) = concated_columns.get(offset) else {
+                    return Err(ErrorCode::Internal("Can't find vector column"));
+                };
+                for distance in &index_param.distances {
+                    let (mut hnsw_index_fields, mut hnsw_index_columns) = HNSWIndex::build(
+                        index_param.m,
+                        index_param.ef_construct,
+                        *column_id,
+                        column.clone(),
+                        *distance,
+                    )?;
+                    index_fields.append(&mut hnsw_index_fields);
+                    index_columns.append(&mut hnsw_index_columns);
+                }
+            }
+            metadata.insert(
+                index_param.index_name.clone(),
+                index_param.index_version.clone(),
+            );
+        }
+
+        let index_schema = TableSchemaRefExt::create(index_fields);
+        let index_block = DataBlock::new(index_columns, 1);
+
+        let mut data = Vec::with_capacity(DEFAULT_BLOCK_INDEX_BUFFER_SIZE);
+        let _ = blocks_to_parquet(
+            index_schema.as_ref(),
+            vec![index_block],
+            &mut data,
+            // Zstd has the best compression ratio
+            TableCompression::Zstd,
+        )?;
+
+        let size = data.len() as u64;
+        let state = VectorIndexState {
+            location: location.clone(),
+            size,
+            data,
+        };
+
+        // Perf.
+        {
+            metrics_inc_block_vector_index_generate_milliseconds(start.elapsed().as_millis() as u64);
+        }
+
+        Ok(state)
+    }
+}
diff --git a/src/query/storages/fuse/src/operations/common/processors/transform_serialize_block.rs b/src/query/storages/fuse/src/operations/common/processors/transform_serialize_block.rs
index 81dfc6dccbe26..f13ed6701482d 100644
--- a/src/query/storages/fuse/src/operations/common/processors/transform_serialize_block.rs
+++ b/src/query/storages/fuse/src/operations/common/processors/transform_serialize_block.rs
@@ -41,6 +41,7 @@ use crate::io::create_inverted_index_builders;
 use crate::io::BlockBuilder;
 use crate::io::BlockSerialization;
 use crate::io::BlockWriter;
+use crate::io::VectorIndexBuilder;
 use crate::io::VirtualColumnBuilder;
 use crate::operations::common::BlockMetaIndex;
 use crate::operations::common::MutationLogEntry;
@@ -168,6 +169,11 @@ impl TransformSerializeBlock {
         } else {
             None
         };
+        let vector_index_builder = VectorIndexBuilder::try_create(
+            ctx.clone(),
+            &table.table_info.meta.indexes,
+            source_schema.clone(),
+        );
 
         let block_builder = BlockBuilder {
             ctx,
@@ -179,6 +185,7 @@ impl TransformSerializeBlock {
             ngram_args,
             inverted_index_builders,
             virtual_column_builder,
+            vector_index_builder,
             table_meta_timestamps,
         };
         Ok(TransformSerializeBlock {
diff --git a/src/query/storages/fuse/src/operations/merge.rs b/src/query/storages/fuse/src/operations/merge.rs
index 49a6e2b89f300..e149196075dcd 100644
--- a/src/query/storages/fuse/src/operations/merge.rs
+++ b/src/query/storages/fuse/src/operations/merge.rs
@@ -28,6 +28,7 @@ use super::merge_into::MatchedAggregator;
 use super::mutation::SegmentIndex;
 use crate::io::create_inverted_index_builders;
 use crate::io::BlockBuilder;
+use crate::io::VectorIndexBuilder;
 use crate::statistics::ClusterStatsGenerator;
 use crate::FuseTable;
 
@@ -97,6 +98,11 @@ impl FuseTable {
             &self.table_info.meta.schema,
         )?;
         let inverted_index_builders = create_inverted_index_builders(&self.table_info.meta);
+        let vector_index_builder = VectorIndexBuilder::try_create(
+            ctx.clone(),
+            &self.table_info.meta.indexes,
+            new_schema.clone(),
+        );
 
         let block_builder = BlockBuilder {
             ctx: ctx.clone(),
@@ -107,6 +113,7 @@ impl FuseTable {
             bloom_columns_map,
             ngram_args,
             inverted_index_builders,
+            vector_index_builder,
             // todo
             virtual_column_builder: None,
             table_meta_timestamps,
diff --git a/src/query/storages/fuse/src/operations/read/util.rs b/src/query/storages/fuse/src/operations/read/util.rs
index e93dfe503c82d..0eed2a1250614 100644
--- a/src/query/storages/fuse/src/operations/read/util.rs
+++ b/src/query/storages/fuse/src/operations/read/util.rs
@@ -77,6 +77,7 @@ pub(crate) fn add_data_block_meta(
             base_block_ids,
             inner: meta,
             matched_rows: block_meta.matched_rows.clone(),
+            vector_scores: block_meta.vector_scores.clone(),
         };
         meta = Some(Box::new(internal_column_meta));
     }
diff --git a/src/query/storages/fuse/src/operations/read_partitions.rs b/src/query/storages/fuse/src/operations/read_partitions.rs
index d68885f5335ac..b42078783cb85 100644
--- a/src/query/storages/fuse/src/operations/read_partitions.rs
+++ b/src/query/storages/fuse/src/operations/read_partitions.rs
@@ -51,7 +51,7 @@ use databend_storages_common_cache::CachedObject;
 use databend_storages_common_index::BloomIndex;
 use databend_storages_common_index::NgramArgs;
 use databend_storages_common_pruner::BlockMetaIndex;
-use databend_storages_common_pruner::TopNPrunner;
+use databend_storages_common_pruner::TopNPruner;
 use databend_storages_common_table_meta::meta::column_oriented_segment::meta_name;
 use databend_storages_common_table_meta::meta::column_oriented_segment::stat_name;
 use databend_storages_common_table_meta::meta::column_oriented_segment::BLOCK_SIZE;
@@ -157,7 +157,15 @@ impl FuseTable {
                     nodes_num = cluster.nodes.len();
                 }
 
-                if self.is_column_oriented() || (segment_len > nodes_num && distributed_pruning) {
+                let has_vector_topn = if let Some(ref push_downs) = push_downs {
+                    push_downs.vector_topn()
+                } else {
+                    false
+                };
+
+                if (self.is_column_oriented() || (segment_len > nodes_num && distributed_pruning))
+                    && !has_vector_topn
+                {
                     let mut segments = Vec::with_capacity(segment_locs.len());
                     for (idx, segment_location) in segment_locs.into_iter().enumerate() {
                         segments.push(FuseLazyPartInfo::create(idx, segment_location))
@@ -476,7 +484,7 @@ impl FuseTable {
             let push_down = push_down.as_ref().unwrap();
             let limit = push_down.limit.unwrap();
             let sort = push_down.order_by.clone();
-            let topn_pruner = TopNPrunner::create(schema, sort, limit);
+            let topn_pruner = TopNPruner::create(schema, sort, limit);
             prune_pipeline.resize(1, false)?;
             prune_pipeline.add_transform(move |input, output| {
                 TopNPruneTransform::create(input, output, topn_pruner.clone())
diff --git a/src/query/storages/fuse/src/pruning/block_pruner.rs b/src/query/storages/fuse/src/pruning/block_pruner.rs
index a08e270689fc1..4db5d01d28d14 100644
--- a/src/query/storages/fuse/src/pruning/block_pruner.rs
+++ b/src/query/storages/fuse/src/pruning/block_pruner.rs
@@ -289,6 +289,7 @@ impl BlockPruner {
                         segment_location: segment_location.location.0.clone(),
                         snapshot_location: segment_location.snapshot_loc.clone(),
                         matched_rows: prune_result.matched_rows.clone(),
+                        vector_scores: None,
                         virtual_block_meta: prune_result.virtual_block_meta.clone(),
                     },
                     block,
@@ -357,6 +358,7 @@ impl BlockPruner {
                             segment_location: segment_location.location.0.clone(),
                             snapshot_location: segment_location.snapshot_loc.clone(),
                             matched_rows: None,
+                            vector_scores: None,
                             virtual_block_meta: None,
                         },
                         block_meta.clone(),
diff --git a/src/query/storages/fuse/src/pruning/fuse_pruner.rs b/src/query/storages/fuse/src/pruning/fuse_pruner.rs
index 333092da29b09..ec907dea94603 100644
--- a/src/query/storages/fuse/src/pruning/fuse_pruner.rs
+++ b/src/query/storages/fuse/src/pruning/fuse_pruner.rs
@@ -41,7 +41,7 @@ use databend_storages_common_pruner::PagePruner;
 use databend_storages_common_pruner::PagePrunerCreator;
 use databend_storages_common_pruner::RangePruner;
 use databend_storages_common_pruner::RangePrunerCreator;
-use databend_storages_common_pruner::TopNPrunner;
+use databend_storages_common_pruner::TopNPruner;
 use databend_storages_common_table_meta::meta::BlockMeta;
 use databend_storages_common_table_meta::meta::ClusterKey;
 use databend_storages_common_table_meta::meta::ColumnStatistics;
@@ -64,6 +64,7 @@ use crate::pruning::BloomPrunerCreator;
 use crate::pruning::FusePruningStatistics;
 use crate::pruning::InvertedIndexPruner;
 use crate::pruning::SegmentLocation;
+use crate::pruning::VectorIndexPruner;
 use crate::pruning::VirtualColumnPruner;
 
 const SMALL_DATASET_SAMPLE_THRESHOLD: usize = 100;
@@ -447,7 +448,8 @@ impl FusePruner {
             // Todo:: for now, all operation (contains other mutation other than delete, like select,update etc.)
             // will get here, we can prevent other mutations like update and so on.
             // TopN pruner.
-            self.topn_pruning(metas)
+            let metas = self.topn_pruning(metas)?;
+            self.vector_pruning(metas).await
         }
     }
 
@@ -516,7 +518,8 @@ impl FusePruner {
             let res = worker?;
             metas.extend(res);
         }
-        self.topn_pruning(metas)
+        let metas = self.topn_pruning(metas)?;
+        self.vector_pruning(metas).await
     }
 
     // topn pruner:
@@ -535,12 +538,44 @@ impl FusePruner {
             let push_down = push_down.as_ref().unwrap();
             let limit = push_down.limit.unwrap();
             let sort = push_down.order_by.clone();
-            let topn_pruner = TopNPrunner::create(schema, sort, limit);
+            let topn_pruner = TopNPruner::create(schema, sort, limit);
             return Ok(topn_pruner.prune(metas.clone()).unwrap_or(metas));
         }
         Ok(metas)
     }
 
+    async fn vector_pruning(
+        &self,
+        metas: Vec<(BlockMetaIndex, Arc<BlockMeta>)>,
+    ) -> Result<Vec<(BlockMetaIndex, Arc<BlockMeta>)>> {
+        let push_down = self.push_down.clone();
+        if push_down
+            .as_ref()
+            .filter(|p| p.vector_index.is_some())
+            .is_some()
+        {
+            let schema = self.table_schema.clone();
+            let push_down = push_down.as_ref().unwrap();
+            let filters = push_down.filters.clone();
+            let sort = push_down.order_by.clone();
+            let limit = push_down.limit;
+            let vector_index = push_down.vector_index.clone().unwrap();
+
+            let vector_pruner = VectorIndexPruner::create(
+                self.pruning_ctx.ctx.clone(),
+                self.pruning_ctx.dal.clone(),
+                schema,
+                vector_index,
+                filters,
+                sort,
+                limit,
+            )?;
+            let pruned_metas = vector_pruner.prune(metas.clone()).await?;
+            return Ok(pruned_metas);
+        }
+        Ok(metas)
+    }
+
     // Pruning stats.
     pub fn pruning_stats(&self) -> databend_common_catalog::plan::PruningStatistics {
         let stats = self.pruning_ctx.pruning_stats.clone();
diff --git a/src/query/storages/fuse/src/pruning/mod.rs b/src/query/storages/fuse/src/pruning/mod.rs
index 3c90972b2df5a..650e11aba4c7b 100644
--- a/src/query/storages/fuse/src/pruning/mod.rs
+++ b/src/query/storages/fuse/src/pruning/mod.rs
@@ -21,6 +21,7 @@ mod inverted_index_pruner;
 mod pruner_location;
 mod pruning_statistics;
 mod segment_pruner;
+mod vector_index_pruner;
 mod virtual_column_pruner;
 
 pub use block_pruner::BlockPruner;
@@ -37,4 +38,5 @@ pub use pruner_location::create_segment_location_vector;
 pub use pruner_location::SegmentLocation;
 pub use pruning_statistics::FusePruningStatistics;
 pub use segment_pruner::SegmentPruner;
+pub use vector_index_pruner::VectorIndexPruner;
 pub use virtual_column_pruner::VirtualColumnPruner;
diff --git a/src/query/storages/fuse/src/pruning/vector_index_pruner.rs b/src/query/storages/fuse/src/pruning/vector_index_pruner.rs
new file mode 100644
index 0000000000000..6257048c84891
--- /dev/null
+++ b/src/query/storages/fuse/src/pruning/vector_index_pruner.rs
@@ -0,0 +1,230 @@
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::cmp::Ordering;
+use std::collections::HashSet;
+use std::sync::Arc;
+
+use databend_common_catalog::plan::Filters;
+use databend_common_catalog::plan::VectorIndexInfo;
+use databend_common_catalog::table_context::TableContext;
+use databend_common_exception::ErrorCode;
+use databend_common_exception::Result;
+use databend_common_expression::types::F32;
+use databend_common_expression::RemoteExpr;
+use databend_common_expression::TableSchemaRef;
+use databend_common_expression::VECTOR_SCORE_COL_NAME;
+use databend_storages_common_index::DistanceType;
+use databend_storages_common_index::FixedLengthPriorityQueue;
+use databend_storages_common_io::ReadSettings;
+use databend_storages_common_pruner::BlockMetaIndex;
+use databend_storages_common_table_meta::meta::BlockMeta;
+use opendal::Operator;
+
+use crate::io::read::VectorIndexReader;
+
+/// Vector index pruner.
+#[derive(Clone)]
+pub struct VectorIndexPruner {
+    ctx: Arc<dyn TableContext>,
+    operator: Operator,
+    _schema: TableSchemaRef,
+    vector_index: VectorIndexInfo,
+    filters: Option<Filters>,
+    sort: Vec<(RemoteExpr<String>, bool, bool)>,
+    limit: Option<usize>,
+}
+
+impl VectorIndexPruner {
+    pub fn create(
+        ctx: Arc<dyn TableContext>,
+        operator: Operator,
+        schema: TableSchemaRef,
+        vector_index: VectorIndexInfo,
+        filters: Option<Filters>,
+        sort: Vec<(RemoteExpr<String>, bool, bool)>,
+        limit: Option<usize>,
+    ) -> Result<Self> {
+        Ok(Self {
+            ctx,
+            operator,
+            _schema: schema,
+            vector_index,
+            filters,
+            sort,
+            limit,
+        })
+    }
+}
+
+impl VectorIndexPruner {
+    pub async fn prune(
+        &self,
+        metas: Vec<(BlockMetaIndex, Arc<BlockMeta>)>,
+    ) -> Result<Vec<(BlockMetaIndex, Arc<BlockMeta>)>> {
+        let settings = ReadSettings::from_ctx(&self.ctx)?;
+        let distance_type = match self.vector_index.func_name.as_str() {
+            "cosine_distance" => DistanceType::Dot,
+            "l1_distance" => DistanceType::L1,
+            "l2_distance" => DistanceType::L2,
+            _ => unreachable!(),
+        };
+        let columns = vec![
+            format!(
+                "{}-{}_graph_links",
+                self.vector_index.column_id, distance_type
+            ),
+            format!(
+                "{}-{}_graph_data",
+                self.vector_index.column_id, distance_type
+            ),
+            format!(
+                "{}-{}_encoded_u8_meta",
+                self.vector_index.column_id, distance_type
+            ),
+            format!(
+                "{}-{}_encoded_u8_data",
+                self.vector_index.column_id, distance_type
+            ),
+        ];
+
+        let query_values = unsafe {
+            std::mem::transmute::<Vec<F32>, Vec<f32>>(self.vector_index.query_values.clone())
+        };
+
+        let vector_reader = VectorIndexReader::create(
+            self.operator.clone(),
+            settings,
+            distance_type,
+            columns,
+            query_values,
+        );
+
+        // @TODO support filters
+        if self.filters.is_none() && !self.sort.is_empty() && self.limit.is_some() {
+            let (sort, asc, _nulls_first) = &self.sort[0];
+            if let RemoteExpr::ColumnRef { id, .. } = sort {
+                if id == VECTOR_SCORE_COL_NAME && *asc {
+                    let limit = self.limit.unwrap();
+                    return self
+                        .vector_index_topn_prune(vector_reader, limit, metas)
+                        .await;
+                }
+            }
+        }
+
+        self.vector_index_prune(vector_reader, metas).await
+    }
+
+    async fn vector_index_topn_prune(
+        &self,
+        vector_reader: VectorIndexReader,
+        limit: usize,
+        metas: Vec<(BlockMetaIndex, Arc<BlockMeta>)>,
+    ) -> Result<Vec<(BlockMetaIndex, Arc<BlockMeta>)>> {
+        let mut top_queue = FixedLengthPriorityQueue::new(limit);
+
+        for (index, (_, block_meta)) in metas.iter().enumerate() {
+            let Some(location) = block_meta.vector_index_location.clone() else {
+                return Err(ErrorCode::StorageUnavailable(format!(
+                    "vector index {} file don't exist, need refresh",
+                    self.vector_index.index_name
+                )));
+            };
+
+            let row_count = block_meta.row_count as usize;
+            let score_offsets = vector_reader.prune(limit, row_count, &location.0).await?;
+
+            for score_offset in score_offsets {
+                let vector_score = VectorScore {
+                    index,
+                    row_idx: score_offset.idx,
+                    score: F32::from(score_offset.score),
+                };
+                top_queue.push(vector_score);
+            }
+        }
+        let top_scores = top_queue.into_sorted_vec();
+        let top_indexes: HashSet<usize> = top_scores.iter().map(|s| s.index).collect();
+
+        let mut pruned_metas = Vec::with_capacity(top_indexes.len());
+        for (index, (mut block_meta_index, block_meta)) in metas.into_iter().enumerate() {
+            if !top_indexes.contains(&index) {
+                continue;
+            }
+            let mut vector_scores = Vec::new();
+            for top_score in &top_scores {
+                if top_score.index == index {
+                    vector_scores.push((top_score.row_idx as usize, top_score.score));
+                }
+            }
+            block_meta_index.vector_scores = Some(vector_scores);
+            pruned_metas.push((block_meta_index, block_meta));
+        }
+
+        Ok(pruned_metas)
+    }
+
+    async fn vector_index_prune(
+        &self,
+        vector_reader: VectorIndexReader,
+        metas: Vec<(BlockMetaIndex, Arc<BlockMeta>)>,
+    ) -> Result<Vec<(BlockMetaIndex, Arc<BlockMeta>)>> {
+        // can't use vector index topn to prune, only generate vector scores.
+        let mut new_metas = Vec::with_capacity(metas.len());
+        for (mut block_meta_index, block_meta) in metas.into_iter() {
+            let Some(location) = block_meta.vector_index_location.clone() else {
+                return Err(ErrorCode::StorageUnavailable(format!(
+                    "vector index {} file don't exist, need refresh",
+                    self.vector_index.index_name
+                )));
+            };
+
+            let row_count = block_meta.row_count as usize;
+            // use row_count as limit to generate scores for all rows.
+            let score_offsets = vector_reader
+                .generate_scores(row_count, &location.0)
+                .await?;
+
+            let mut vector_scores = Vec::with_capacity(row_count);
+            for score_offset in &score_offsets {
+                vector_scores.push((score_offset.idx as usize, F32::from(score_offset.score)));
+            }
+            block_meta_index.vector_scores = Some(vector_scores);
+            new_metas.push((block_meta_index, block_meta));
+        }
+
+        Ok(new_metas)
+    }
+}
+
+#[derive(Clone, Debug, Eq, PartialEq)]
+struct VectorScore {
+    index: usize,
+    row_idx: u32,
+    score: F32,
+}
+
+impl Ord for VectorScore {
+    fn cmp(&self, other: &Self) -> Ordering {
+        // reverse order to keep lower score.
+        other.score.cmp(&self.score)
+    }
+}
+
+impl PartialOrd for VectorScore {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
diff --git a/src/query/storages/fuse/src/pruning_pipeline/column_oriented_block_prune.rs b/src/query/storages/fuse/src/pruning_pipeline/column_oriented_block_prune.rs
index d7f5369a79f7e..d65b7473a835f 100644
--- a/src/query/storages/fuse/src/pruning_pipeline/column_oriented_block_prune.rs
+++ b/src/query/storages/fuse/src/pruning_pipeline/column_oriented_block_prune.rs
@@ -237,6 +237,7 @@ impl AsyncSink for ColumnOrientedBlockPruneSink {
                         segment_location: segment_location.location.0.clone(),
                         snapshot_location: segment_location.snapshot_loc.clone(),
                         matched_rows: None,
+                        vector_scores: None,
                         virtual_block_meta: None,
                     };
 
diff --git a/src/query/storages/fuse/src/pruning_pipeline/topn_prune_transform.rs b/src/query/storages/fuse/src/pruning_pipeline/topn_prune_transform.rs
index 69e9681187967..d79a9d4dd7b8c 100644
--- a/src/query/storages/fuse/src/pruning_pipeline/topn_prune_transform.rs
+++ b/src/query/storages/fuse/src/pruning_pipeline/topn_prune_transform.rs
@@ -22,7 +22,7 @@ use databend_common_pipeline_core::processors::ProcessorPtr;
 use databend_common_pipeline_transforms::BlockMetaAccumulatingTransform;
 use databend_common_pipeline_transforms::BlockMetaAccumulatingTransformer;
 use databend_storages_common_pruner::BlockMetaIndex;
-use databend_storages_common_pruner::TopNPrunner;
+use databend_storages_common_pruner::TopNPruner;
 use databend_storages_common_table_meta::meta::BlockMeta;
 
 use crate::pruning_pipeline::block_prune_result_meta::BlockPruneResult;
@@ -30,7 +30,7 @@ use crate::pruning_pipeline::block_prune_result_meta::BlockPruneResult;
 // TopNPruneTransform is a processor that will accumulate the block meta and not push to
 // downstream until all data is received and pruned.
 pub struct TopNPruneTransform {
-    topn_pruner: TopNPrunner,
+    topn_pruner: TopNPruner,
     metas: Vec<(BlockMetaIndex, Arc<BlockMeta>)>,
 }
 
@@ -51,7 +51,7 @@ impl TopNPruneTransform {
     pub fn create(
         input: Arc<InputPort>,
         output: Arc<OutputPort>,
-        topn_pruner: TopNPrunner,
+        topn_pruner: TopNPruner,
     ) -> Result<ProcessorPtr> {
         Ok(ProcessorPtr::create(
             BlockMetaAccumulatingTransformer::create(input, output, TopNPruneTransform {
diff --git a/src/query/storages/parquet/src/parquet_reader/reader/row_group_reader.rs b/src/query/storages/parquet/src/parquet_reader/reader/row_group_reader.rs
index 39cd32b9e79f7..3d915f0800e0c 100644
--- a/src/query/storages/parquet/src/parquet_reader/reader/row_group_reader.rs
+++ b/src/query/storages/parquet/src/parquet_reader/reader/row_group_reader.rs
@@ -99,6 +99,7 @@ static DELETES_FILE_PUSHDOWN_INFO: LazyLock<PushDownInfo> = LazyLock::new(|| Pus
     agg_index: None,
     change_type: None,
     inverted_index: None,
+    vector_index: None,
     sample: None,
 });
 
diff --git a/tests/sqllogictests/suites/ee/09_ee_vector_index/09_0000_vector_index_base.test b/tests/sqllogictests/suites/ee/09_ee_vector_index/09_0000_vector_index_base.test
index 448295c44fa07..a9e43d7a43fd7 100644
--- a/tests/sqllogictests/suites/ee/09_ee_vector_index/09_0000_vector_index_base.test
+++ b/tests/sqllogictests/suites/ee/09_ee_vector_index/09_0000_vector_index_base.test
@@ -22,21 +22,24 @@ statement ok
 use test_vector_index
 
 statement ok
-CREATE TABLE IF NOT EXISTS t(id Int, embedding Vector(128), VECTOR INDEX idx (embedding) m=4 ef_construct=6) Engine = Fuse
+CREATE TABLE IF NOT EXISTS t(id Int, embedding Vector(8), VECTOR INDEX idx (embedding) m=10 ef_construct=40 distance='cosine') Engine = Fuse
 
 query TT
 SHOW CREATE TABLE t
 ----
-t CREATE TABLE t ( id INT NULL, embedding VECTOR(128) NULL, SYNC VECTOR INDEX idx (embedding) ef_construct = '6', m = '4' ) ENGINE=FUSE
+t CREATE TABLE t ( id INT NULL, embedding VECTOR(8) NULL, SYNC VECTOR INDEX idx (embedding) distance = 'cosine', ef_construct = '40', m = '10' ) ENGINE=FUSE
 
 statement ok
 DROP VECTOR INDEX idx ON t;
 
 statement error
-CREATE VECTOR INDEX idx2 ON t(embedding) m=0 ef_construct=5;
+CREATE VECTOR INDEX idx2 ON t(embedding) m=10 ef_construct=5
+
+statement error
+CREATE VECTOR INDEX idx2 ON t(embedding) m=0 ef_construct=5 distance='cosine,l1,l2';
 
 statement ok
-CREATE VECTOR INDEX idx2 ON t(embedding) m=5 ef_construct=7;
+CREATE VECTOR INDEX idx2 ON t(embedding) m=10 ef_construct=40 distance='cosine,l1,l2';
 
 statement error
 DROP INVERTED INDEX idx2 ON t;
@@ -44,7 +47,151 @@ DROP INVERTED INDEX idx2 ON t;
 query TT
 SHOW CREATE TABLE t
 ----
-t CREATE TABLE t ( id INT NULL, embedding VECTOR(128) NULL, SYNC VECTOR INDEX idx2 (embedding) ef_construct = '7', m = '5' ) ENGINE=FUSE
+t CREATE TABLE t ( id INT NULL, embedding VECTOR(8) NULL, SYNC VECTOR INDEX idx2 (embedding) distance = 'cosine,l1,l2', ef_construct = '40', m = '10' ) ENGINE=FUSE
+
+statement ok
+INSERT INTO t VALUES
+(1, [0.50515236, 0.8561939, 0.87169914, 0.55843271, 0.73689797, 0.49985862, 0.64527255, 0.29313098]),
+(2, [0.17790798, 0.0132427, 0.55352279, 0.49129727, 0.74246407, 0.97345777, 0.83489323, 0.86012174]),
+(3, [0.2703968, 0.26768266, 0.96587005, 0.04760408, 0.92289409, 0.15799311, 0.86381163, 0.2922287]),
+(4, [0.0810719, 0.27882267, 0.6015564, 0.34236571, 0.58889543, 0.83293431, 0.67012723, 0.76303241])
+
+statement ok
+INSERT INTO t VALUES
+(5, [0.66399931, 0.35041433, 0.2159864, 0.89537508, 0.44577037, 0.57896497, 0.36630178, 0.33816571]),
+(6, [0.32052319, 0.38567453, 0.62853221, 0.84816365, 0.15853234, 0.33207714, 0.7673085, 0.69513879]),
+(7, [0.82590676, 0.35860656, 0.6277274, 0.95148122, 0.81893313, 0.91440945, 0.15803721, 0.5866869]),
+(8, [0.42135513, 0.05637937, 0.88864157, 0.59217909, 0.98435169, 0.39234101, 0.41490889,  0.02760555])
+
+statement ok
+INSERT INTO t VALUES
+(9, [0.61418788, 0.34545306, 0.14638622, 0.53249639, 0.09139293, 0.84940919, 0.105433, 0.4156201]),
+(10, [0.21828953, 0.87852734, 0.64221122, 0.24536394, 0.81689593, 0.86341877, 0.7218334, 0.45028494]),
+(11, [0.43279006, 0.45523681, 0.76060274, 0.66284758, 0.19131476, 0.13564463, 0.88712212, 0.93279565]),
+(12, [0.79671359, 0.86079789, 0.94477631, 0.5116732, 0.29733205, 0.33645561, 0.41380333, 0.75909903])
+
+statement ok
+INSERT INTO t VALUES
+(13, [0.94666755, 0.39522571, 0.39857241, 0.88080323, 0.53470771, 0.09486194, 0.17524627, 0.86497559]),
+(14, [0.8397819, 0.37221789, 0.32885295, 0.20470829, 0.49838217, 0.00736057, 0.45418757, 0.6956924 ]),
+(15, [0.13230447, 0.630588, 0.10812326, 0.21558228, 0.83768057, 0.48870546, 0.65021806, 0.31626541]),
+(16, [0.2667851, 0.01529589, 0.98994706, 0.31870983, 0.31783372, 0.34863699, 0.30254189, 0.84441678])
+
+
+statement ok
+CREATE TABLE IF NOT EXISTS t1(id Int, embedding Vector(8)) Engine = Fuse
+
+statement ok
+INSERT INTO t1 SELECT id, embedding FROM t
+
+
+query IF
+SELECT id, cosine_distance(embedding, [0.50515236, 0.8561939, 0.87169914, 0.55843271, 0.73689797, 0.49985862, 0.64527255, 0.29313098]::vector(8)) AS similarity FROM t ORDER BY similarity ASC LIMIT 5;
+----
+1 0.009774268
+10 0.033747792
+12 0.060161233
+11 0.14048636
+8 0.14554787
+
+query IF
+SELECT id, cosine_distance(embedding, [0.50515236, 0.8561939, 0.87169914, 0.55843271, 0.73689797, 0.49985862, 0.64527255, 0.29313098]::vector(8)) AS similarity FROM t1 ORDER BY similarity ASC LIMIT 5;
+----
+1 0.0
+10 0.0592916
+12 0.085179806
+8 0.13477594
+3 0.13801938
+
+query IF
+SELECT id, cosine_distance(embedding, [0.02559146, 0.38549544, 0.77889671, 0.31591033, 0.48453478, 0.51902057, 0.74154714, 0.75059576]::vector(8)) AS similarity FROM t ORDER BY similarity ASC LIMIT 5;
+----
+4 0.043031156
+11 0.05496204
+10 0.056846976
+2 0.085320055
+16 0.111266375
+
+query IF
+SELECT id, cosine_distance(embedding, [0.02559146, 0.38549544, 0.77889671, 0.31591033, 0.48453478, 0.51902057, 0.74154714, 0.75059576]::vector(8)) AS similarity FROM t1 ORDER BY similarity ASC LIMIT 5;
+----
+4 0.031086385
+2 0.075579524
+10 0.09369081
+11 0.097252846
+16 0.105270445
+
+query IF
+SELECT id, l1_distance(embedding, [0.50515236, 0.8561939, 0.87169914, 0.55843271, 0.73689797, 0.49985862, 0.64527255, 0.29313098]::vector(8)) AS similarity FROM t ORDER BY similarity ASC LIMIT 5;
+----
+1 0.0
+10 1.5320582
+12 1.7202058
+8 1.770357
+15 1.8413826
+
+query IF
+SELECT id, l1_distance(embedding, [0.50515236, 0.8561939, 0.87169914, 0.55843271, 0.73689797, 0.49985862, 0.64527255, 0.29313098]::vector(8)) AS similarity FROM t1 ORDER BY similarity ASC LIMIT 5;
+----
+1 1.4901161e-7
+10 1.5290257
+12 1.7164081
+8 1.7851611
+15 1.8448958
+
+query IF
+SELECT id, l1_distance(embedding, [0.02559146, 0.38549544, 0.77889671, 0.31591033, 0.48453478, 0.51902057, 0.74154714, 0.75059576]::vector(8)) AS similarity FROM t ORDER BY similarity ASC LIMIT 5;
+----
+4 0.87704676
+6 1.559421
+16 1.6866446
+11 1.7874013
+10 1.8277186
+
+query IF
+SELECT id, l1_distance(embedding, [0.02559146, 0.38549544, 0.77889671, 0.31591033, 0.48453478, 0.51902057, 0.74154714, 0.75059576]::vector(8)) AS similarity FROM t1 ORDER BY similarity ASC LIMIT 5;
+----
+4 0.86807996
+6 1.571893
+16 1.6951541
+2 1.8405688
+11 1.8465424
+
+query IF
+SELECT id, l2_distance(embedding, [0.50515236, 0.8561939, 0.87169914, 0.55843271, 0.73689797, 0.49985862, 0.64527255, 0.29313098]::vector(8)) AS similarity FROM t ORDER BY similarity ASC LIMIT 5;
+----
+1 0.052975703
+10 0.73442644
+12 0.84724534
+8 0.92271036
+3 0.9368646
+
+query IF
+SELECT id, l2_distance(embedding, [0.50515236, 0.8561939, 0.87169914, 0.55843271, 0.73689797, 0.49985862, 0.64527255, 0.29313098]::vector(8)) AS similarity FROM t1 ORDER BY similarity ASC LIMIT 5;
+----
+1 8.940697e-8
+10 0.63450795
+12 0.7636615
+8 0.91897535
+3 0.93300396
+
+query IF
+SELECT id, l2_distance(embedding, [0.02559146, 0.38549544, 0.77889671, 0.31591033, 0.48453478, 0.51902057, 0.74154714, 0.75059576]::vector(8)) AS similarity FROM t ORDER BY similarity ASC LIMIT 5;
+----
+4 0.41017252
+16 0.69951516
+6 0.73418504
+2 0.7379028
+11 0.8144757
+
+query IF
+SELECT id, l2_distance(embedding, [0.02559146, 0.38549544, 0.77889671, 0.31591033, 0.48453478, 0.51902057, 0.74154714, 0.75059576]::vector(8)) AS similarity FROM t1 ORDER BY similarity ASC LIMIT 5;
+----
+4 0.40161562
+16 0.7057761
+2 0.7328551
+6 0.73338425
+11 0.76073563
 
 statement ok
 use default
diff --git a/tests/sqllogictests/suites/query/functions/02_0063_function_vector.test b/tests/sqllogictests/suites/query/functions/02_0063_function_vector.test
index a5e02dd865000..8256f5f6976a6 100644
--- a/tests/sqllogictests/suites/query/functions/02_0063_function_vector.test
+++ b/tests/sqllogictests/suites/query/functions/02_0063_function_vector.test
@@ -8,3 +8,33 @@ query F
 select  [1, 2] <-> [2, 3] as sim
 ----
 1.4142135
+
+query FF
+select cosine_distance([1.1,2.2,3], [1,1,1]), cosine_distance([1,2.2,3], [4,6,8])
+----
+0.062412794753543555 0.00699537571767439
+
+query FF
+select cosine_distance([1.1,2.2,3]::vector(3), [1,1,1]::vector(3)), cosine_distance([1,2.2,3]::vector(3), [4,6,8]::vector(3))
+----
+0.06241274 0.0069953203
+
+query FF
+select l1_distance([1.1,2.2,3], [1,1,1]), l1_distance([1,2.2,3], [4,6,8])
+----
+3.3000000000000003 11.8
+
+query FF
+select l1_distance([1.1,2.2,3]::vector(3), [1,1,1]::vector(3)), l1_distance([1,2.2,3]::vector(3), [4,6,8]::vector(3))
+----
+3.3000002 11.8
+
+query FF
+select l2_distance([1.1,2.2,3], [1,1,1]), l2_distance([1,2.2,3], [4,6,8])
+----
+2.3345235059857505 6.959885056522126 
+
+query FF
+select l2_distance([1.1,2.2,3]::vector(3), [1,1,1]::vector(3)), l2_distance([1,2.2,3]::vector(3), [4,6,8]::vector(3))
+----
+2.3345234 6.959885

From 779dfdfb2d34c6672d653a50ffcbf221c63c2840 Mon Sep 17 00:00:00 2001
From: baishen <baishen2009@gmail.com>
Date: Thu, 10 Jul 2025 13:57:11 +0800
Subject: [PATCH 2/4] support explain display vetor pruning, add write logs

---
 Cargo.lock                                    |  1 -
 src/common/metrics/src/metrics/storage.rs     | 24 ++++++++++
 .../catalog/src/plan/pruning_statistics.rs    |  6 +++
 src/query/expression/src/evaluator.rs         |  9 ++--
 src/query/expression/src/type_check.rs        |  5 ++-
 src/query/sql/src/executor/format.rs          | 12 +++++
 src/query/storages/common/index/Cargo.toml    |  1 -
 .../fuse/src/io/write/vector_index_writer.rs  | 45 ++++++++++++++++---
 .../storages/fuse/src/pruning/fuse_pruner.rs  | 31 +++++++++++++
 .../fuse/src/pruning/pruning_statistics.rs    | 24 ++++++++++
 .../mode/cluster/memo/aggregate_property.test |  4 +-
 .../mode/cluster/memo/join_property.test      |  8 ++--
 .../mode/cluster/memo/mix_property.test       |  2 +-
 13 files changed, 154 insertions(+), 18 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 551227dddbed7..312e321a41e5d 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -5486,7 +5486,6 @@ dependencies = [
  "databend-common-exception",
  "databend-common-expression",
  "databend-common-functions",
- "databend-common-vector",
  "databend-storages-common-table-meta",
  "divan",
  "fastrace",
diff --git a/src/common/metrics/src/metrics/storage.rs b/src/common/metrics/src/metrics/storage.rs
index b60a72acb416d..730e5ca093a7d 100644
--- a/src/common/metrics/src/metrics/storage.rs
+++ b/src/common/metrics/src/metrics/storage.rs
@@ -254,6 +254,14 @@ static BYTES_BLOCK_INVERTED_INDEX_PRUNING_BEFORE: LazyLock<Counter> =
     LazyLock::new(|| register_counter("fuse_bytes_block_inverted_index_pruning_before"));
 static BYTES_BLOCK_INVERTED_INDEX_PRUNING_AFTER: LazyLock<Counter> =
     LazyLock::new(|| register_counter("fuse_bytes_block_inverted_index_pruning_after"));
+static BLOCKS_VECTOR_INDEX_PRUNING_BEFORE: LazyLock<Counter> =
+    LazyLock::new(|| register_counter("fuse_blocks_vector_index_pruning_before"));
+static BLOCKS_VECTOR_INDEX_PRUNING_AFTER: LazyLock<Counter> =
+    LazyLock::new(|| register_counter("fuse_blocks_vector_index_pruning_after"));
+static BYTES_BLOCK_VECTOR_INDEX_PRUNING_BEFORE: LazyLock<Counter> =
+    LazyLock::new(|| register_counter("fuse_bytes_block_vector_index_pruning_before"));
+static BYTES_BLOCK_VECTOR_INDEX_PRUNING_AFTER: LazyLock<Counter> =
+    LazyLock::new(|| register_counter("fuse_bytes_block_vector_index_pruning_after"));
 static PRUNING_PREWHERE_NUMS: LazyLock<Counter> =
     LazyLock::new(|| register_counter("fuse_pruning_prewhere_nums"));
 static PRUNING_MILLISECONDS: LazyLock<Histogram> =
@@ -716,6 +724,22 @@ pub fn metrics_inc_bytes_block_inverted_index_pruning_after(c: u64) {
     BYTES_BLOCK_INVERTED_INDEX_PRUNING_AFTER.inc_by(c);
 }
 
+pub fn metrics_inc_blocks_vector_index_pruning_before(c: u64) {
+    BLOCKS_VECTOR_INDEX_PRUNING_BEFORE.inc_by(c);
+}
+
+pub fn metrics_inc_blocks_vector_index_pruning_after(c: u64) {
+    BLOCKS_VECTOR_INDEX_PRUNING_AFTER.inc_by(c);
+}
+
+pub fn metrics_inc_bytes_block_vector_index_pruning_before(c: u64) {
+    BYTES_BLOCK_VECTOR_INDEX_PRUNING_BEFORE.inc_by(c);
+}
+
+pub fn metrics_inc_bytes_block_vector_index_pruning_after(c: u64) {
+    BYTES_BLOCK_VECTOR_INDEX_PRUNING_AFTER.inc_by(c);
+}
+
 pub fn metrics_inc_pruning_prewhere_nums(c: u64) {
     PRUNING_PREWHERE_NUMS.inc_by(c);
 }
diff --git a/src/query/catalog/src/plan/pruning_statistics.rs b/src/query/catalog/src/plan/pruning_statistics.rs
index 9dee48cd5bbf4..13d59825143dc 100644
--- a/src/query/catalog/src/plan/pruning_statistics.rs
+++ b/src/query/catalog/src/plan/pruning_statistics.rs
@@ -29,6 +29,10 @@ pub struct PruningStatistics {
     /// Block inverted index filter pruning stats.
     pub blocks_inverted_index_pruning_before: usize,
     pub blocks_inverted_index_pruning_after: usize,
+
+    /// Block vector index filter pruning stats.
+    pub blocks_vector_index_pruning_before: usize,
+    pub blocks_vector_index_pruning_after: usize,
 }
 
 impl PruningStatistics {
@@ -41,5 +45,7 @@ impl PruningStatistics {
         self.blocks_bloom_pruning_after += other.blocks_bloom_pruning_after;
         self.blocks_inverted_index_pruning_before += other.blocks_inverted_index_pruning_before;
         self.blocks_inverted_index_pruning_after += other.blocks_inverted_index_pruning_after;
+        self.blocks_vector_index_pruning_before += other.blocks_vector_index_pruning_before;
+        self.blocks_vector_index_pruning_after += other.blocks_vector_index_pruning_after;
     }
 }
diff --git a/src/query/expression/src/evaluator.rs b/src/query/expression/src/evaluator.rs
index b4ebe56e3c48f..fccb97ddc43c7 100644
--- a/src/query/expression/src/evaluator.rs
+++ b/src/query/expression/src/evaluator.rs
@@ -921,11 +921,13 @@ impl<'a> Evaluator<'a> {
                 }
             }
             (DataType::Array(inner_src_ty), DataType::Vector(inner_dest_ty)) => {
-                if !matches!(&**inner_src_ty, DataType::Number(_) | DataType::Decimal(_))
-                    || matches!(inner_dest_ty, VectorDataType::Int8(_))
+                if !matches!(
+                    inner_src_ty.remove_nullable(),
+                    DataType::Number(_) | DataType::Decimal(_)
+                ) || matches!(inner_dest_ty, VectorDataType::Int8(_))
                 {
                     return Err(ErrorCode::BadArguments(format!(
-                        "unable to cast type `{src_type}` to type `{dest_type}`"
+                        "unable to cast type `{src_type}` to vector type `{dest_type}`"
                     ))
                     .set_span(span));
                 }
@@ -971,6 +973,7 @@ impl<'a> Evaluator<'a> {
                                 )
                                 .set_span(span));
                             }
+                            let col = col.remove_nullable();
                             match col {
                                 Column::Number(num_col) => {
                                     for i in 0..dimension {
diff --git a/src/query/expression/src/type_check.rs b/src/query/expression/src/type_check.rs
index 2b60877f13d12..2df907c4ac17e 100755
--- a/src/query/expression/src/type_check.rs
+++ b/src/query/expression/src/type_check.rs
@@ -639,7 +639,10 @@ fn can_cast_to(src_ty: &DataType, dest_ty: &DataType) -> bool {
             true
         }
         (DataType::Array(fields_src_ty), DataType::Vector(_))
-            if matches!(&**fields_src_ty, DataType::Number(_) | DataType::Decimal(_)) =>
+            if matches!(
+                fields_src_ty.remove_nullable(),
+                DataType::Number(_) | DataType::Decimal(_)
+            ) =>
         {
             true
         }
diff --git a/src/query/sql/src/executor/format.rs b/src/query/sql/src/executor/format.rs
index 684607c500e4e..b825e831de176 100644
--- a/src/query/sql/src/executor/format.rs
+++ b/src/query/sql/src/executor/format.rs
@@ -1734,6 +1734,18 @@ fn part_stats_info_to_format_tree(info: &PartStatistics) -> Vec<FormatTreeNode<S
         );
     }
 
+    // vector index pruning status.
+    if info.pruning_stats.blocks_vector_index_pruning_before > 0 {
+        if !blocks_pruning_description.is_empty() {
+            blocks_pruning_description += ", ";
+        }
+        blocks_pruning_description += &format!(
+            "vector pruning: {} to {}",
+            info.pruning_stats.blocks_vector_index_pruning_before,
+            info.pruning_stats.blocks_vector_index_pruning_after
+        );
+    }
+
     // Combine segment pruning and blocks pruning descriptions if any
     if info.pruning_stats.segments_range_pruning_before > 0
         || !blocks_pruning_description.is_empty()
diff --git a/src/query/storages/common/index/Cargo.toml b/src/query/storages/common/index/Cargo.toml
index 7f230d41573a1..07cf10ba70de2 100644
--- a/src/query/storages/common/index/Cargo.toml
+++ b/src/query/storages/common/index/Cargo.toml
@@ -14,7 +14,6 @@ databend-common-ast = { workspace = true }
 databend-common-exception = { workspace = true }
 databend-common-expression = { workspace = true }
 databend-common-functions = { workspace = true }
-databend-common-vector = { workspace = true }
 databend-storages-common-table-meta = { workspace = true }
 
 anyerror = { workspace = true }
diff --git a/src/query/storages/fuse/src/io/write/vector_index_writer.rs b/src/query/storages/fuse/src/io/write/vector_index_writer.rs
index ee21b769ae826..8a69b985b8084 100644
--- a/src/query/storages/fuse/src/io/write/vector_index_writer.rs
+++ b/src/query/storages/fuse/src/io/write/vector_index_writer.rs
@@ -36,6 +36,11 @@ use databend_storages_common_index::DistanceType;
 use databend_storages_common_index::HNSWIndex;
 use databend_storages_common_table_meta::meta::Location;
 use databend_storages_common_table_meta::table::TableCompression;
+use log::debug;
+use log::info;
+
+const DEFAULT_M: usize = 16;
+const DEFAULT_EF_CONSTRUCT: usize = 100;
 
 #[derive(Debug, Clone)]
 pub struct VectorIndexState {
@@ -70,6 +75,11 @@ impl VectorIndexBuilder {
         table_indexes: &BTreeMap<String, TableIndex>,
         schema: TableSchemaRef,
     ) -> Option<VectorIndexBuilder> {
+        info!(
+            "Starting vector index creation with {} table indexes",
+            table_indexes.len()
+        );
+
         LicenseManagerSwitch::instance()
             .check_enterprise_enabled(ctx.get_license_key(), Feature::VectorIndex)
             .ok()?;
@@ -86,6 +96,7 @@ impl VectorIndexBuilder {
                 continue;
             }
 
+            info!("Processing vector index: {}", index.name);
             let mut offsets = Vec::with_capacity(index.column_ids.len());
             for column_id in &index.column_ids {
                 for (offset, field) in schema.fields.iter().enumerate() {
@@ -97,6 +108,10 @@ impl VectorIndexBuilder {
             }
             // ignore invalid index
             if offsets.len() != index.column_ids.len() {
+                debug!(
+                    "Ignoring invalid vector index: {}, missing columns",
+                    index.name
+                );
                 continue;
             }
             for (offset, _) in &offsets {
@@ -106,13 +121,13 @@ impl VectorIndexBuilder {
 
             // Parse index parameters
             let m = match index.options.get("m") {
-                Some(value) => value.parse::<usize>().unwrap_or(16),
-                None => 16,
+                Some(value) => value.parse::<usize>().unwrap_or(DEFAULT_M),
+                None => DEFAULT_M,
             };
 
             let ef_construct = match index.options.get("ef_construct") {
-                Some(value) => value.parse::<usize>().unwrap_or(64),
-                None => 64,
+                Some(value) => value.parse::<usize>().unwrap_or(DEFAULT_EF_CONSTRUCT),
+                None => DEFAULT_EF_CONSTRUCT,
             };
 
             let mut distances = Vec::new();
@@ -132,8 +147,16 @@ impl VectorIndexBuilder {
                 None => continue,
             };
             if distances.is_empty() {
+                debug!(
+                    "Ignoring vector index: {}, no valid distance types",
+                    index.name
+                );
                 continue;
             }
+            info!(
+                "Added vector index parameters for {}: m={}, ef_construct={}, distances={:?}",
+                index.name, m, ef_construct, distances
+            );
             let index_param = VectorIndexParam {
                 index_name: index.name.clone(),
                 index_version: index.version.clone(),
@@ -162,6 +185,11 @@ impl VectorIndexBuilder {
     }
 
     pub fn add_block(&mut self, block: &DataBlock) -> Result<()> {
+        info!(
+            "Adding block with {} rows to vector index",
+            block.num_rows()
+        );
+
         for offset in &self.field_offsets_set {
             let block_entry = block.get_by_offset(*offset);
             let column = block_entry.to_column();
@@ -178,6 +206,7 @@ impl VectorIndexBuilder {
     #[async_backtrace::framed]
     pub fn finalize(&mut self, location: &Location) -> Result<VectorIndexState> {
         let start = Instant::now();
+        info!("Start build vector HNSW index for location: {}", location.0);
 
         let mut columns = BTreeMap::new();
         for offset in &self.field_offsets_set {
@@ -196,6 +225,7 @@ impl VectorIndexBuilder {
         let mut metadata = BTreeMap::new();
 
         for (field_offsets, index_param) in self.field_offsets.iter().zip(&self.index_params) {
+            debug!("Building HNSW index for {}", index_param.index_name);
             for (offset, column_id) in field_offsets {
                 let Some(column) = concated_columns.get(offset) else {
                     return Err(ErrorCode::Internal("Can't find vector column"));
@@ -238,9 +268,14 @@ impl VectorIndexBuilder {
         };
 
         // Perf.
+        let elapsed_ms = start.elapsed().as_millis() as u64;
         {
-            metrics_inc_block_vector_index_generate_milliseconds(start.elapsed().as_millis() as u64);
+            metrics_inc_block_vector_index_generate_milliseconds(elapsed_ms);
         }
+        info!(
+            "Finish build vector HNSW index: location={}, size={} bytes in {} ms",
+            location.0, size, elapsed_ms
+        );
 
         Ok(state)
     }
diff --git a/src/query/storages/fuse/src/pruning/fuse_pruner.rs b/src/query/storages/fuse/src/pruning/fuse_pruner.rs
index ec907dea94603..703edeb55fe54 100644
--- a/src/query/storages/fuse/src/pruning/fuse_pruner.rs
+++ b/src/query/storages/fuse/src/pruning/fuse_pruner.rs
@@ -26,6 +26,10 @@ use databend_common_expression::RemoteExpr;
 use databend_common_expression::TableSchemaRef;
 use databend_common_expression::SEGMENT_NAME_COL_NAME;
 use databend_common_functions::BUILTIN_FUNCTIONS;
+use databend_common_metrics::storage::metrics_inc_blocks_vector_index_pruning_after;
+use databend_common_metrics::storage::metrics_inc_blocks_vector_index_pruning_before;
+use databend_common_metrics::storage::metrics_inc_bytes_block_vector_index_pruning_after;
+use databend_common_metrics::storage::metrics_inc_bytes_block_vector_index_pruning_before;
 use databend_common_sql::BloomIndexColumns;
 use databend_common_sql::DefaultExprBinder;
 use databend_storages_common_cache::CacheAccessor;
@@ -570,7 +574,27 @@ impl FusePruner {
                 sort,
                 limit,
             )?;
+
+            // Perf.
+            {
+                let block_size = metas.iter().map(|(_, m)| m.block_size).sum();
+                metrics_inc_blocks_vector_index_pruning_before(metas.len() as u64);
+                metrics_inc_bytes_block_vector_index_pruning_before(block_size);
+                self.pruning_ctx
+                    .pruning_stats
+                    .set_blocks_vector_index_pruning_before(metas.len() as u64);
+            }
             let pruned_metas = vector_pruner.prune(metas.clone()).await?;
+
+            // Perf.
+            {
+                let block_size = pruned_metas.iter().map(|(_, m)| m.block_size).sum();
+                metrics_inc_blocks_vector_index_pruning_after(pruned_metas.len() as u64);
+                metrics_inc_bytes_block_vector_index_pruning_after(block_size);
+                self.pruning_ctx
+                    .pruning_stats
+                    .set_blocks_vector_index_pruning_after(pruned_metas.len() as u64);
+            }
             return Ok(pruned_metas);
         }
         Ok(metas)
@@ -594,6 +618,11 @@ impl FusePruner {
         let blocks_inverted_index_pruning_after =
             stats.get_blocks_inverted_index_pruning_after() as usize;
 
+        let blocks_vector_index_pruning_before =
+            stats.get_blocks_vector_index_pruning_before() as usize;
+        let blocks_vector_index_pruning_after =
+            stats.get_blocks_vector_index_pruning_after() as usize;
+
         databend_common_catalog::plan::PruningStatistics {
             segments_range_pruning_before,
             segments_range_pruning_after,
@@ -603,6 +632,8 @@ impl FusePruner {
             blocks_bloom_pruning_after,
             blocks_inverted_index_pruning_before,
             blocks_inverted_index_pruning_after,
+            blocks_vector_index_pruning_before,
+            blocks_vector_index_pruning_after,
         }
     }
 
diff --git a/src/query/storages/fuse/src/pruning/pruning_statistics.rs b/src/query/storages/fuse/src/pruning/pruning_statistics.rs
index 97838c30e2800..345baee8b22a4 100644
--- a/src/query/storages/fuse/src/pruning/pruning_statistics.rs
+++ b/src/query/storages/fuse/src/pruning/pruning_statistics.rs
@@ -32,6 +32,10 @@ pub struct FusePruningStatistics {
     /// Block inverted index filter pruning stats.
     pub blocks_inverted_index_pruning_before: AtomicU64,
     pub blocks_inverted_index_pruning_after: AtomicU64,
+
+    /// Block vector index filter pruning stats.
+    pub blocks_vector_index_pruning_before: AtomicU64,
+    pub blocks_vector_index_pruning_after: AtomicU64,
 }
 
 impl FusePruningStatistics {
@@ -108,4 +112,24 @@ impl FusePruningStatistics {
         self.blocks_inverted_index_pruning_after
             .load(Ordering::Relaxed)
     }
+
+    pub fn set_blocks_vector_index_pruning_before(&self, v: u64) {
+        self.blocks_vector_index_pruning_before
+            .fetch_add(v, Ordering::Relaxed);
+    }
+
+    pub fn get_blocks_vector_index_pruning_before(&self) -> u64 {
+        self.blocks_vector_index_pruning_before
+            .load(Ordering::Relaxed)
+    }
+
+    pub fn set_blocks_vector_index_pruning_after(&self, v: u64) {
+        self.blocks_vector_index_pruning_after
+            .fetch_add(v, Ordering::Relaxed);
+    }
+
+    pub fn get_blocks_vector_index_pruning_after(&self) -> u64 {
+        self.blocks_vector_index_pruning_after
+            .load(Ordering::Relaxed)
+    }
 }
diff --git a/tests/sqllogictests/suites/mode/cluster/memo/aggregate_property.test b/tests/sqllogictests/suites/mode/cluster/memo/aggregate_property.test
index 2e6590a9dfeb3..087095317314d 100644
--- a/tests/sqllogictests/suites/mode/cluster/memo/aggregate_property.test
+++ b/tests/sqllogictests/suites/mode/cluster/memo/aggregate_property.test
@@ -26,7 +26,7 @@ where t_10.a = t_1000.a and t_100.a = t_1000.a
 ----
 Memo
 ├── root group: #8
-├── estimated memory: 8.44 KiB
+├── estimated memory: 10.69 KiB
 ├── Group #0
 │   ├── Best properties
 │   │   ├── { dist: Any }: expr: #0, cost: 1000.000, children: []
@@ -89,7 +89,7 @@ group by t_10.a, t_100.a
 ----
 Memo
 ├── root group: #8
-├── estimated memory: 21.09 KiB
+├── estimated memory: 26.72 KiB
 ├── Group #0
 │   ├── Best properties
 │   │   ├── { dist: Any }: expr: #0, cost: 1000.000, children: []
diff --git a/tests/sqllogictests/suites/mode/cluster/memo/join_property.test b/tests/sqllogictests/suites/mode/cluster/memo/join_property.test
index 1e91a3baa62b6..18cc76ce805bc 100644
--- a/tests/sqllogictests/suites/mode/cluster/memo/join_property.test
+++ b/tests/sqllogictests/suites/mode/cluster/memo/join_property.test
@@ -25,7 +25,7 @@ select * from t_10, t_100, t_1000 where t_10.a = t_1000.a and t_100.a = t_1000.a
 ----
 Memo
 ├── root group: #5
-├── estimated memory: 6.56 KiB
+├── estimated memory: 8.31 KiB
 ├── Group #0
 │   ├── Best properties
 │   │   ├── { dist: Any }: expr: #0, cost: 1000.000, children: []
@@ -73,7 +73,7 @@ select * from t_1000 left join t_10 on t_1000.a = t_10.a left join t_100 on t_10
 ----
 Memo
 ├── root group: #5
-├── estimated memory: 6.09 KiB
+├── estimated memory: 7.72 KiB
 ├── Group #0
 │   ├── Best properties
 │   │   ├── { dist: Any }: expr: #0, cost: 1000.000, children: []
@@ -119,7 +119,7 @@ select * from t_1000 right join t_10 on t_1000.a = t_10.a right join t_100 on t_
 ----
 Memo
 ├── root group: #5
-├── estimated memory: 5.16 KiB
+├── estimated memory: 6.53 KiB
 ├── Group #0
 │   ├── Best properties
 │   │   ├── { dist: Any }: expr: #0, cost: 1000.000, children: []
@@ -161,7 +161,7 @@ select * from t_1000 full join t_10 on t_1000.a = t_10.a full join t_100 on t_10
 ----
 Memo
 ├── root group: #5
-├── estimated memory: 5.16 KiB
+├── estimated memory: 6.53 KiB
 ├── Group #0
 │   ├── Best properties
 │   │   ├── { dist: Any }: expr: #0, cost: 1000.000, children: []
diff --git a/tests/sqllogictests/suites/mode/cluster/memo/mix_property.test b/tests/sqllogictests/suites/mode/cluster/memo/mix_property.test
index b40dcef29861a..15274e1c00eed 100644
--- a/tests/sqllogictests/suites/mode/cluster/memo/mix_property.test
+++ b/tests/sqllogictests/suites/mode/cluster/memo/mix_property.test
@@ -29,7 +29,7 @@ limit 10
 ----
 Memo
 ├── root group: #10
-├── estimated memory: 22.03 KiB
+├── estimated memory: 27.91 KiB
 ├── Group #0
 │   ├── Best properties
 │   │   ├── { dist: Any }: expr: #0, cost: 1000.000, children: []

From eeeda237c640f7fdb64ae974a83089407d5b27dc Mon Sep 17 00:00:00 2001
From: baishen <baishen2009@gmail.com>
Date: Fri, 11 Jul 2025 13:11:00 +0800
Subject: [PATCH 3/4] fuse_block add vector_index_size

---
 .../fuse/src/table_functions/fuse_block.rs    |  7 +++++
 .../09_0000_vector_index_base.test            | 27 ++++++++++++++++++-
 .../mode/cluster/memo/join_property.test      |  2 +-
 3 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/src/query/storages/fuse/src/table_functions/fuse_block.rs b/src/query/storages/fuse/src/table_functions/fuse_block.rs
index 18950f522eff7..eaecb5425c809 100644
--- a/src/query/storages/fuse/src/table_functions/fuse_block.rs
+++ b/src/query/storages/fuse/src/table_functions/fuse_block.rs
@@ -67,6 +67,10 @@ impl TableMetaFunc for FuseBlock {
                 "ngram_index_size",
                 TableDataType::Nullable(Box::new(TableDataType::Number(NumberDataType::UInt64))),
             ),
+            TableField::new(
+                "vector_index_size",
+                TableDataType::Nullable(Box::new(TableDataType::Number(NumberDataType::UInt64))),
+            ),
             TableField::new(
                 "virtual_column_size",
                 TableDataType::Nullable(Box::new(TableDataType::Number(NumberDataType::UInt64))),
@@ -93,6 +97,7 @@ impl TableMetaFunc for FuseBlock {
         let mut bloom_filter_size = Vec::with_capacity(len);
         let mut inverted_index_size = Vec::with_capacity(len);
         let mut ngram_index_size = Vec::with_capacity(len);
+        let mut vector_index_size = Vec::with_capacity(len);
         let mut virtual_column_size = Vec::with_capacity(len);
 
         let segments_io = SegmentsIO::create(ctx.clone(), tbl.operator.clone(), tbl.schema());
@@ -122,6 +127,7 @@ impl TableMetaFunc for FuseBlock {
                     bloom_filter_size.push(block.bloom_filter_index_size);
                     inverted_index_size.push(block.inverted_index_size);
                     ngram_index_size.push(block.ngram_filter_index_size);
+                    vector_index_size.push(block.vector_index_size);
                     virtual_column_size.push(
                         block
                             .virtual_block_meta
@@ -149,6 +155,7 @@ impl TableMetaFunc for FuseBlock {
                 UInt64Type::from_data(bloom_filter_size).into(),
                 UInt64Type::from_opt_data(inverted_index_size).into(),
                 UInt64Type::from_opt_data(ngram_index_size).into(),
+                UInt64Type::from_opt_data(vector_index_size).into(),
                 UInt64Type::from_opt_data(virtual_column_size).into(),
             ],
             num_rows,
diff --git a/tests/sqllogictests/suites/ee/09_ee_vector_index/09_0000_vector_index_base.test b/tests/sqllogictests/suites/ee/09_ee_vector_index/09_0000_vector_index_base.test
index a9e43d7a43fd7..19f6103aba605 100644
--- a/tests/sqllogictests/suites/ee/09_ee_vector_index/09_0000_vector_index_base.test
+++ b/tests/sqllogictests/suites/ee/09_ee_vector_index/09_0000_vector_index_base.test
@@ -77,13 +77,38 @@ INSERT INTO t VALUES
 (15, [0.13230447, 0.630588, 0.10812326, 0.21558228, 0.83768057, 0.48870546, 0.65021806, 0.31626541]),
 (16, [0.2667851, 0.01529589, 0.98994706, 0.31870983, 0.31783372, 0.34863699, 0.30254189, 0.84441678])
 
-
 statement ok
 CREATE TABLE IF NOT EXISTS t1(id Int, embedding Vector(8)) Engine = Fuse
 
 statement ok
 INSERT INTO t1 SELECT id, embedding FROM t
 
+query T
+EXPLAIN SELECT id, cosine_distance(embedding, [0.50515236, 0.8561939, 0.87169914, 0.55843271, 0.73689797, 0.49985862, 0.64527255, 0.29313098]::vector(8)) AS similarity FROM t ORDER BY similarity ASC LIMIT 5;
+----
+RowFetch
+├── output columns: [t._vector_score (#2), t._row_id (#3), t.id (#0)]
+├── columns to fetch: [id]
+├── estimated rows: 5.00
+└── Limit
+    ├── output columns: [t._vector_score (#2), t._row_id (#3)]
+    ├── limit: 5
+    ├── offset: 0
+    ├── estimated rows: 5.00
+    └── Sort
+        ├── output columns: [t._vector_score (#2), t._row_id (#3)]
+        ├── sort keys: [_vector_score ASC NULLS LAST]
+        ├── estimated rows: 16.00
+        └── TableScan
+            ├── table: default.test_vector_index.t
+            ├── output columns: [_vector_score (#2), _row_id (#3)]
+            ├── read rows: 12
+            ├── read size: 0
+            ├── partitions total: 4
+            ├── partitions scanned: 3
+            ├── pruning stats: [segments: <range pruning: 4 to 4>, blocks: <range pruning: 4 to 4, vector pruning: 4 to 3>]
+            ├── push downs: [filters: [], limit: 5]
+            └── estimated rows: 16.00
 
 query IF
 SELECT id, cosine_distance(embedding, [0.50515236, 0.8561939, 0.87169914, 0.55843271, 0.73689797, 0.49985862, 0.64527255, 0.29313098]::vector(8)) AS similarity FROM t ORDER BY similarity ASC LIMIT 5;
diff --git a/tests/sqllogictests/suites/mode/cluster/memo/join_property.test b/tests/sqllogictests/suites/mode/cluster/memo/join_property.test
index 18cc76ce805bc..3d49805b0577b 100644
--- a/tests/sqllogictests/suites/mode/cluster/memo/join_property.test
+++ b/tests/sqllogictests/suites/mode/cluster/memo/join_property.test
@@ -203,7 +203,7 @@ select * from t_10, t_100, t_1000
 ----
 Memo
 ├── root group: #5
-├── estimated memory: 4.22 KiB
+├── estimated memory: 5.34 KiB
 ├── Group #0
 │   ├── Best properties
 │   │   └── { dist: Any }: expr: #0, cost: 10.000, children: []

From 16a7800ebd2c292dfa1448a03fab6358a8552e04 Mon Sep 17 00:00:00 2001
From: baishen <baishen2009@gmail.com>
Date: Sun, 13 Jul 2025 01:27:39 +0800
Subject: [PATCH 4/4] multi thread pruning

---
 src/common/metrics/src/metrics/storage.rs     |   8 +-
 src/query/ast/src/ast/statements/statement.rs |   2 +
 src/query/catalog/src/plan/pushdown.rs        |   4 -
 .../read/vector_index/vector_index_reader.rs  |  27 +-
 .../fuse/src/operations/read_partitions.rs    |  33 ++-
 .../storages/fuse/src/pruning/fuse_pruner.rs  |  26 +-
 .../fuse/src/pruning/vector_index_pruner.rs   | 252 ++++++++++++++----
 .../storages/fuse/src/pruning_pipeline/mod.rs |   2 +
 .../vector_index_prune_transform.rs           |  86 ++++++
 .../09_0000_vector_index_base.test            |  21 ++
 10 files changed, 345 insertions(+), 116 deletions(-)
 create mode 100644 src/query/storages/fuse/src/pruning_pipeline/vector_index_prune_transform.rs

diff --git a/src/common/metrics/src/metrics/storage.rs b/src/common/metrics/src/metrics/storage.rs
index 730e5ca093a7d..8059be0b39645 100644
--- a/src/common/metrics/src/metrics/storage.rs
+++ b/src/common/metrics/src/metrics/storage.rs
@@ -190,8 +190,8 @@ static BLOCK_VECTOR_INDEX_GENERATE_MILLISECONDS: LazyLock<Histogram> = LazyLock:
 static BLOCK_VECTOR_INDEX_READ_MILLISECONDS: LazyLock<Histogram> = LazyLock::new(|| {
     register_histogram_in_milliseconds("fuse_block_vector_index_read_milliseconds")
 });
-static BLOCK_VECTOR_INDEX_SEARCH_MILLISECONDS: LazyLock<Histogram> = LazyLock::new(|| {
-    register_histogram_in_milliseconds("fuse_block_vector_index_search_milliseconds")
+static BLOCK_VECTOR_INDEX_PRUNING_MILLISECONDS: LazyLock<Histogram> = LazyLock::new(|| {
+    register_histogram_in_milliseconds("fuse_block_vector_index_pruning_milliseconds")
 });
 static BLOCK_VECTOR_INDEX_READ_BYTES: LazyLock<Counter> =
     LazyLock::new(|| register_counter("fuse_block_vector_index_read_bytes"));
@@ -626,8 +626,8 @@ pub fn metrics_inc_block_vector_index_read_milliseconds(c: u64) {
     BLOCK_VECTOR_INDEX_READ_MILLISECONDS.observe(c as f64);
 }
 
-pub fn metrics_inc_block_vector_index_search_milliseconds(c: u64) {
-    BLOCK_VECTOR_INDEX_SEARCH_MILLISECONDS.observe(c as f64);
+pub fn metrics_inc_block_vector_index_pruning_milliseconds(c: u64) {
+    BLOCK_VECTOR_INDEX_PRUNING_MILLISECONDS.observe(c as f64);
 }
 
 pub fn metrics_inc_block_vector_index_read_bytes(c: u64) {
diff --git a/src/query/ast/src/ast/statements/statement.rs b/src/query/ast/src/ast/statements/statement.rs
index 8cc197aae5bb8..e1c0a9974f68f 100644
--- a/src/query/ast/src/ast/statements/statement.rs
+++ b/src/query/ast/src/ast/statements/statement.rs
@@ -675,6 +675,8 @@ impl Display for Statement {
                         unreachable!();
                     }
                     write!(f, ") ")?;
+                } else {
+                    write!(f, "SETTINGS ")?;
                 }
                 write!(f, "{stmt}")?;
             }
diff --git a/src/query/catalog/src/plan/pushdown.rs b/src/query/catalog/src/plan/pushdown.rs
index d090d4b47d59d..9bbf44e529315 100644
--- a/src/query/catalog/src/plan/pushdown.rs
+++ b/src/query/catalog/src/plan/pushdown.rs
@@ -256,10 +256,6 @@ impl PushDownInfo {
         }
     }
 
-    pub fn vector_topn(&self) -> bool {
-        !self.order_by.is_empty() && self.limit.is_some() && self.vector_index.is_some()
-    }
-
     pub fn prewhere_of_push_downs(push_downs: Option<&PushDownInfo>) -> Option<PrewhereInfo> {
         if let Some(PushDownInfo { prewhere, .. }) = push_downs {
             prewhere.clone()
diff --git a/src/query/storages/fuse/src/io/read/vector_index/vector_index_reader.rs b/src/query/storages/fuse/src/io/read/vector_index/vector_index_reader.rs
index bfb084787a126..13467fdef8870 100644
--- a/src/query/storages/fuse/src/io/read/vector_index/vector_index_reader.rs
+++ b/src/query/storages/fuse/src/io/read/vector_index/vector_index_reader.rs
@@ -12,10 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use std::time::Instant;
-
 use databend_common_exception::Result;
-use databend_common_metrics::storage::metrics_inc_block_vector_index_search_milliseconds;
 use databend_storages_common_index::DistanceType;
 use databend_storages_common_index::HNSWIndex;
 use databend_storages_common_index::ScoredPointOffset;
@@ -61,8 +58,6 @@ impl VectorIndexReader {
         row_count: usize,
         location: &str,
     ) -> Result<Vec<ScoredPointOffset>> {
-        let start = Instant::now();
-
         let binary_columns = load_vector_index_files(
             self.operator.clone(),
             &self.settings,
@@ -72,15 +67,7 @@ impl VectorIndexReader {
         .await?;
 
         let hnsw_index = HNSWIndex::open(self.distance_type, self.dim, row_count, binary_columns)?;
-
-        let res = hnsw_index.search(limit, &self.query_values)?;
-
-        // Perf.
-        {
-            metrics_inc_block_vector_index_search_milliseconds(start.elapsed().as_millis() as u64);
-        }
-
-        Ok(res)
+        hnsw_index.search(limit, &self.query_values)
     }
 
     pub async fn generate_scores(
@@ -88,8 +75,6 @@ impl VectorIndexReader {
         row_count: usize,
         location: &str,
     ) -> Result<Vec<ScoredPointOffset>> {
-        let start = Instant::now();
-
         let binary_columns = load_vector_index_files(
             self.operator.clone(),
             &self.settings,
@@ -99,14 +84,6 @@ impl VectorIndexReader {
         .await?;
 
         let hnsw_index = HNSWIndex::open(self.distance_type, self.dim, row_count, binary_columns)?;
-
-        let res = hnsw_index.generate_scores(row_count as u32, &self.query_values)?;
-
-        // Perf.
-        {
-            metrics_inc_block_vector_index_search_milliseconds(start.elapsed().as_millis() as u64);
-        }
-
-        Ok(res)
+        hnsw_index.generate_scores(row_count as u32, &self.query_values)
     }
 }
diff --git a/src/query/storages/fuse/src/operations/read_partitions.rs b/src/query/storages/fuse/src/operations/read_partitions.rs
index b42078783cb85..df8f10895316f 100644
--- a/src/query/storages/fuse/src/operations/read_partitions.rs
+++ b/src/query/storages/fuse/src/operations/read_partitions.rs
@@ -83,6 +83,7 @@ use crate::pruning::BlockPruner;
 use crate::pruning::FusePruner;
 use crate::pruning::SegmentLocation;
 use crate::pruning::SegmentPruner;
+use crate::pruning::VectorIndexPruner;
 use crate::pruning_pipeline::AsyncBlockPruneTransform;
 use crate::pruning_pipeline::ColumnOrientedBlockPruneSink;
 use crate::pruning_pipeline::ExtractSegmentTransform;
@@ -95,6 +96,7 @@ use crate::pruning_pipeline::SendPartInfoSink;
 use crate::pruning_pipeline::SendPartState;
 use crate::pruning_pipeline::SyncBlockPruneTransform;
 use crate::pruning_pipeline::TopNPruneTransform;
+use crate::pruning_pipeline::VectorIndexPruneTransform;
 use crate::segment_format_from_location;
 use crate::FuseLazyPartInfo;
 use crate::FuseSegmentFormat;
@@ -157,15 +159,7 @@ impl FuseTable {
                     nodes_num = cluster.nodes.len();
                 }
 
-                let has_vector_topn = if let Some(ref push_downs) = push_downs {
-                    push_downs.vector_topn()
-                } else {
-                    false
-                };
-
-                if (self.is_column_oriented() || (segment_len > nodes_num && distributed_pruning))
-                    && !has_vector_topn
-                {
+                if self.is_column_oriented() || (segment_len > nodes_num && distributed_pruning) {
                     let mut segments = Vec::with_capacity(segment_locs.len());
                     for (idx, segment_location) in segment_locs.into_iter().enumerate() {
                         segments.push(FuseLazyPartInfo::create(idx, segment_location))
@@ -491,6 +485,27 @@ impl FuseTable {
             })?;
         }
 
+        if push_down
+            .as_ref()
+            .filter(|p| p.vector_index.is_some())
+            .is_some()
+        {
+            let pruning_ctx = pruner.pruning_ctx.clone();
+            let schema = pruner.table_schema.clone();
+            let push_down = push_down.as_ref().unwrap();
+            let filters = push_down.filters.clone();
+            let sort = push_down.order_by.clone();
+            let limit = push_down.limit;
+            let vector_index = push_down.vector_index.clone().unwrap();
+
+            let vector_index_pruner =
+                VectorIndexPruner::create(pruning_ctx, schema, vector_index, filters, sort, limit)?;
+            prune_pipeline.resize(1, false)?;
+            prune_pipeline.add_transform(move |input, output| {
+                VectorIndexPruneTransform::create(input, output, vector_index_pruner.clone())
+            })?;
+        }
+
         let top_k = push_down
             .as_ref()
             .filter(|_| self.is_native()) // Only native format supports topk push down.
diff --git a/src/query/storages/fuse/src/pruning/fuse_pruner.rs b/src/query/storages/fuse/src/pruning/fuse_pruner.rs
index 703edeb55fe54..03e6cbfd51d94 100644
--- a/src/query/storages/fuse/src/pruning/fuse_pruner.rs
+++ b/src/query/storages/fuse/src/pruning/fuse_pruner.rs
@@ -26,10 +26,6 @@ use databend_common_expression::RemoteExpr;
 use databend_common_expression::TableSchemaRef;
 use databend_common_expression::SEGMENT_NAME_COL_NAME;
 use databend_common_functions::BUILTIN_FUNCTIONS;
-use databend_common_metrics::storage::metrics_inc_blocks_vector_index_pruning_after;
-use databend_common_metrics::storage::metrics_inc_blocks_vector_index_pruning_before;
-use databend_common_metrics::storage::metrics_inc_bytes_block_vector_index_pruning_after;
-use databend_common_metrics::storage::metrics_inc_bytes_block_vector_index_pruning_before;
 use databend_common_sql::BloomIndexColumns;
 use databend_common_sql::DefaultExprBinder;
 use databend_storages_common_cache::CacheAccessor;
@@ -566,8 +562,7 @@ impl FusePruner {
             let vector_index = push_down.vector_index.clone().unwrap();
 
             let vector_pruner = VectorIndexPruner::create(
-                self.pruning_ctx.ctx.clone(),
-                self.pruning_ctx.dal.clone(),
+                self.pruning_ctx.clone(),
                 schema,
                 vector_index,
                 filters,
@@ -575,26 +570,7 @@ impl FusePruner {
                 limit,
             )?;
 
-            // Perf.
-            {
-                let block_size = metas.iter().map(|(_, m)| m.block_size).sum();
-                metrics_inc_blocks_vector_index_pruning_before(metas.len() as u64);
-                metrics_inc_bytes_block_vector_index_pruning_before(block_size);
-                self.pruning_ctx
-                    .pruning_stats
-                    .set_blocks_vector_index_pruning_before(metas.len() as u64);
-            }
             let pruned_metas = vector_pruner.prune(metas.clone()).await?;
-
-            // Perf.
-            {
-                let block_size = pruned_metas.iter().map(|(_, m)| m.block_size).sum();
-                metrics_inc_blocks_vector_index_pruning_after(pruned_metas.len() as u64);
-                metrics_inc_bytes_block_vector_index_pruning_after(block_size);
-                self.pruning_ctx
-                    .pruning_stats
-                    .set_blocks_vector_index_pruning_after(pruned_metas.len() as u64);
-            }
             return Ok(pruned_metas);
         }
         Ok(metas)
diff --git a/src/query/storages/fuse/src/pruning/vector_index_pruner.rs b/src/query/storages/fuse/src/pruning/vector_index_pruner.rs
index 6257048c84891..41b9855403658 100644
--- a/src/query/storages/fuse/src/pruning/vector_index_pruner.rs
+++ b/src/query/storages/fuse/src/pruning/vector_index_pruner.rs
@@ -13,32 +13,45 @@
 // limitations under the License.
 
 use std::cmp::Ordering;
+use std::collections::HashMap;
 use std::collections::HashSet;
+use std::future::Future;
+use std::pin::Pin;
 use std::sync::Arc;
+use std::time::Instant;
 
+use databend_common_base::base::tokio::sync::OwnedSemaphorePermit;
 use databend_common_catalog::plan::Filters;
 use databend_common_catalog::plan::VectorIndexInfo;
-use databend_common_catalog::table_context::TableContext;
 use databend_common_exception::ErrorCode;
 use databend_common_exception::Result;
 use databend_common_expression::types::F32;
 use databend_common_expression::RemoteExpr;
 use databend_common_expression::TableSchemaRef;
 use databend_common_expression::VECTOR_SCORE_COL_NAME;
+use databend_common_metrics::storage::metrics_inc_block_vector_index_pruning_milliseconds;
+use databend_common_metrics::storage::metrics_inc_blocks_vector_index_pruning_after;
+use databend_common_metrics::storage::metrics_inc_blocks_vector_index_pruning_before;
+use databend_common_metrics::storage::metrics_inc_bytes_block_vector_index_pruning_after;
+use databend_common_metrics::storage::metrics_inc_bytes_block_vector_index_pruning_before;
 use databend_storages_common_index::DistanceType;
 use databend_storages_common_index::FixedLengthPriorityQueue;
 use databend_storages_common_io::ReadSettings;
 use databend_storages_common_pruner::BlockMetaIndex;
 use databend_storages_common_table_meta::meta::BlockMeta;
-use opendal::Operator;
+use futures_util::future;
 
 use crate::io::read::VectorIndexReader;
+use crate::pruning::PruningContext;
+
+type VectorPruningFutureReturn = Pin<Box<dyn Future<Output = Result<VectorPruneResult>> + Send>>;
+type VectorPruningFuture =
+    Box<dyn FnOnce(OwnedSemaphorePermit) -> VectorPruningFutureReturn + Send + 'static>;
 
 /// Vector index pruner.
 #[derive(Clone)]
 pub struct VectorIndexPruner {
-    ctx: Arc<dyn TableContext>,
-    operator: Operator,
+    pruning_ctx: Arc<PruningContext>,
     _schema: TableSchemaRef,
     vector_index: VectorIndexInfo,
     filters: Option<Filters>,
@@ -48,8 +61,7 @@ pub struct VectorIndexPruner {
 
 impl VectorIndexPruner {
     pub fn create(
-        ctx: Arc<dyn TableContext>,
-        operator: Operator,
+        pruning_ctx: Arc<PruningContext>,
         schema: TableSchemaRef,
         vector_index: VectorIndexInfo,
         filters: Option<Filters>,
@@ -57,8 +69,7 @@ impl VectorIndexPruner {
         limit: Option<usize>,
     ) -> Result<Self> {
         Ok(Self {
-            ctx,
-            operator,
+            pruning_ctx,
             _schema: schema,
             vector_index,
             filters,
@@ -73,7 +84,7 @@ impl VectorIndexPruner {
         &self,
         metas: Vec<(BlockMetaIndex, Arc<BlockMeta>)>,
     ) -> Result<Vec<(BlockMetaIndex, Arc<BlockMeta>)>> {
-        let settings = ReadSettings::from_ctx(&self.ctx)?;
+        let settings = ReadSettings::from_ctx(&self.pruning_ctx.ctx)?;
         let distance_type = match self.vector_index.func_name.as_str() {
             "cosine_distance" => DistanceType::Dot,
             "l1_distance" => DistanceType::L1,
@@ -104,7 +115,7 @@ impl VectorIndexPruner {
         };
 
         let vector_reader = VectorIndexReader::create(
-            self.operator.clone(),
+            self.pruning_ctx.dal.clone(),
             settings,
             distance_type,
             columns,
@@ -133,44 +144,117 @@ impl VectorIndexPruner {
         limit: usize,
         metas: Vec<(BlockMetaIndex, Arc<BlockMeta>)>,
     ) -> Result<Vec<(BlockMetaIndex, Arc<BlockMeta>)>> {
+        let pruning_runtime = &self.pruning_ctx.pruning_runtime;
+        let pruning_semaphore = &self.pruning_ctx.pruning_semaphore;
+
+        // Perf.
+        {
+            let block_size = metas.iter().map(|(_, m)| m.block_size).sum();
+            metrics_inc_blocks_vector_index_pruning_before(metas.len() as u64);
+            metrics_inc_bytes_block_vector_index_pruning_before(block_size);
+            self.pruning_ctx
+                .pruning_stats
+                .set_blocks_vector_index_pruning_before(metas.len() as u64);
+        }
+
+        let mut block_meta_indexes = metas.into_iter().enumerate();
+        let pruning_tasks = std::iter::from_fn(move || {
+            block_meta_indexes
+                .next()
+                .map(|(index, (block_meta_index, block_meta))| {
+                    let vector_reader = vector_reader.clone();
+                    let index_name = self.vector_index.index_name.clone();
+
+                    let v: VectorPruningFuture = Box::new(move |permit: OwnedSemaphorePermit| {
+                        Box::pin(async move {
+                            let _permit = permit;
+
+                            let Some(location) = &block_meta.vector_index_location else {
+                                return Err(ErrorCode::StorageUnavailable(format!(
+                                    "vector index {} file don't exist, need refresh",
+                                    index_name
+                                )));
+                            };
+
+                            let row_count = block_meta.row_count as usize;
+                            let score_offsets =
+                                vector_reader.prune(limit, row_count, &location.0).await?;
+
+                            let mut vector_scores = Vec::with_capacity(score_offsets.len());
+                            for score_offset in score_offsets {
+                                let vector_score = VectorScore {
+                                    index,
+                                    row_idx: score_offset.idx,
+                                    score: F32::from(score_offset.score),
+                                };
+                                vector_scores.push(vector_score);
+                            }
+
+                            Ok(VectorPruneResult {
+                                block_idx: index,
+                                scores: vector_scores,
+                                block_meta_index,
+                                block_meta,
+                            })
+                        })
+                    });
+                    v
+                })
+        });
+
+        let start = Instant::now();
+
+        let join_handlers = pruning_runtime
+            .try_spawn_batch_with_owned_semaphore(pruning_semaphore.clone(), pruning_tasks)
+            .await?;
+
+        let joint = future::try_join_all(join_handlers)
+            .await
+            .map_err(|e| ErrorCode::StorageOther(format!("vector topn pruning failure, {}", e)))?;
+
         let mut top_queue = FixedLengthPriorityQueue::new(limit);
+        let mut vector_prune_result_map = HashMap::with_capacity(joint.len());
+        for vector_prune_result in joint {
+            let vector_prune_result = vector_prune_result?;
 
-        for (index, (_, block_meta)) in metas.iter().enumerate() {
-            let Some(location) = block_meta.vector_index_location.clone() else {
-                return Err(ErrorCode::StorageUnavailable(format!(
-                    "vector index {} file don't exist, need refresh",
-                    self.vector_index.index_name
-                )));
-            };
-
-            let row_count = block_meta.row_count as usize;
-            let score_offsets = vector_reader.prune(limit, row_count, &location.0).await?;
-
-            for score_offset in score_offsets {
-                let vector_score = VectorScore {
-                    index,
-                    row_idx: score_offset.idx,
-                    score: F32::from(score_offset.score),
-                };
-                top_queue.push(vector_score);
+            for vector_score in &vector_prune_result.scores {
+                top_queue.push(vector_score.clone());
             }
+            vector_prune_result_map.insert(vector_prune_result.block_idx, vector_prune_result);
         }
+
         let top_scores = top_queue.into_sorted_vec();
         let top_indexes: HashSet<usize> = top_scores.iter().map(|s| s.index).collect();
 
         let mut pruned_metas = Vec::with_capacity(top_indexes.len());
-        for (index, (mut block_meta_index, block_meta)) in metas.into_iter().enumerate() {
+        let len = vector_prune_result_map.len();
+        for index in 0..len {
             if !top_indexes.contains(&index) {
                 continue;
             }
+            let vector_prune_result = vector_prune_result_map.remove(&index).unwrap();
+
             let mut vector_scores = Vec::new();
             for top_score in &top_scores {
                 if top_score.index == index {
                     vector_scores.push((top_score.row_idx as usize, top_score.score));
                 }
             }
+            let mut block_meta_index = vector_prune_result.block_meta_index;
             block_meta_index.vector_scores = Some(vector_scores);
-            pruned_metas.push((block_meta_index, block_meta));
+
+            pruned_metas.push((block_meta_index, vector_prune_result.block_meta));
+        }
+
+        // Perf.
+        {
+            let block_size = pruned_metas.iter().map(|(_, m)| m.block_size).sum();
+            metrics_inc_blocks_vector_index_pruning_after(pruned_metas.len() as u64);
+            metrics_inc_bytes_block_vector_index_pruning_after(block_size);
+            self.pruning_ctx
+                .pruning_stats
+                .set_blocks_vector_index_pruning_after(pruned_metas.len() as u64);
+            metrics_inc_block_vector_index_pruning_milliseconds(start.elapsed().as_millis() as u64);
         }
 
         Ok(pruned_metas)
@@ -182,33 +266,103 @@ impl VectorIndexPruner {
         metas: Vec<(BlockMetaIndex, Arc<BlockMeta>)>,
     ) -> Result<Vec<(BlockMetaIndex, Arc<BlockMeta>)>> {
         // can't use vector index topn to prune, only generate vector scores.
-        let mut new_metas = Vec::with_capacity(metas.len());
-        for (mut block_meta_index, block_meta) in metas.into_iter() {
-            let Some(location) = block_meta.vector_index_location.clone() else {
-                return Err(ErrorCode::StorageUnavailable(format!(
-                    "vector index {} file don't exist, need refresh",
-                    self.vector_index.index_name
-                )));
-            };
-
-            let row_count = block_meta.row_count as usize;
-            // use row_count as limit to generate scores for all rows.
-            let score_offsets = vector_reader
-                .generate_scores(row_count, &location.0)
-                .await?;
-
-            let mut vector_scores = Vec::with_capacity(row_count);
-            for score_offset in &score_offsets {
-                vector_scores.push((score_offset.idx as usize, F32::from(score_offset.score)));
+        let pruning_runtime = &self.pruning_ctx.pruning_runtime;
+        let pruning_semaphore = &self.pruning_ctx.pruning_semaphore;
+
+        let mut block_meta_indexes = metas.into_iter().enumerate();
+        let pruning_tasks = std::iter::from_fn(move || {
+            block_meta_indexes
+                .next()
+                .map(|(index, (block_meta_index, block_meta))| {
+                    let vector_reader = vector_reader.clone();
+                    let index_name = self.vector_index.index_name.clone();
+
+                    let v: VectorPruningFuture = Box::new(move |permit: OwnedSemaphorePermit| {
+                        Box::pin(async move {
+                            let _permit = permit;
+
+                            let Some(location) = &block_meta.vector_index_location else {
+                                return Err(ErrorCode::StorageUnavailable(format!(
+                                    "vector index {} file don't exist, need refresh",
+                                    index_name
+                                )));
+                            };
+                            let row_count = block_meta.row_count as usize;
+                            let score_offsets = vector_reader
+                                .generate_scores(row_count, &location.0)
+                                .await?;
+
+                            let mut vector_scores = Vec::with_capacity(score_offsets.len());
+                            for score_offset in score_offsets {
+                                let vector_score = VectorScore {
+                                    index,
+                                    row_idx: score_offset.idx,
+                                    score: F32::from(score_offset.score),
+                                };
+                                vector_scores.push(vector_score);
+                            }
+
+                            Ok(VectorPruneResult {
+                                block_idx: index,
+                                scores: vector_scores,
+                                block_meta_index,
+                                block_meta,
+                            })
+                        })
+                    });
+                    v
+                })
+        });
+
+        let start = Instant::now();
+
+        let join_handlers = pruning_runtime
+            .try_spawn_batch_with_owned_semaphore(pruning_semaphore.clone(), pruning_tasks)
+            .await?;
+
+        let joint = future::try_join_all(join_handlers)
+            .await
+            .map_err(|e| ErrorCode::StorageOther(format!("vector pruning failure, {}", e)))?;
+
+        let mut vector_prune_result_map = HashMap::with_capacity(joint.len());
+        for vector_prune_result in joint {
+            let vector_prune_result = vector_prune_result?;
+            vector_prune_result_map.insert(vector_prune_result.block_idx, vector_prune_result);
+        }
+
+        let len = vector_prune_result_map.len();
+        let mut new_metas = Vec::with_capacity(len);
+        for index in 0..len {
+            let vector_prune_result = vector_prune_result_map.remove(&index).unwrap();
+            let mut vector_scores =
+                Vec::with_capacity(vector_prune_result.block_meta.row_count as usize);
+            for score in &vector_prune_result.scores {
+                vector_scores.push((score.row_idx as usize, score.score));
             }
+            let mut block_meta_index = vector_prune_result.block_meta_index;
             block_meta_index.vector_scores = Some(vector_scores);
-            new_metas.push((block_meta_index, block_meta));
+
+            new_metas.push((block_meta_index, vector_prune_result.block_meta));
+        }
+
+        // Perf.
+        {
+            metrics_inc_block_vector_index_pruning_milliseconds(start.elapsed().as_millis() as u64);
         }
 
         Ok(new_metas)
     }
 }
 
+// result of block pruning
+struct VectorPruneResult {
+    // the block index in segment
+    block_idx: usize,
+    scores: Vec<VectorScore>,
+    block_meta_index: BlockMetaIndex,
+    block_meta: Arc<BlockMeta>,
+}
+
 #[derive(Clone, Debug, Eq, PartialEq)]
 struct VectorScore {
     index: usize,
diff --git a/src/query/storages/fuse/src/pruning_pipeline/mod.rs b/src/query/storages/fuse/src/pruning_pipeline/mod.rs
index 6256687c213c0..d3761ff8af2f7 100644
--- a/src/query/storages/fuse/src/pruning_pipeline/mod.rs
+++ b/src/query/storages/fuse/src/pruning_pipeline/mod.rs
@@ -25,6 +25,7 @@ mod segment_prune_transform;
 mod send_part_info_sink;
 mod sync_block_prune_transform;
 mod topn_prune_transform;
+mod vector_index_prune_transform;
 
 pub use async_block_prune_transform::AsyncBlockPruneTransform;
 pub use column_oriented_block_prune::ColumnOrientedBlockPruneSink;
@@ -40,3 +41,4 @@ pub use send_part_info_sink::SendPartInfoSink;
 pub use send_part_info_sink::SendPartState;
 pub use sync_block_prune_transform::SyncBlockPruneTransform;
 pub use topn_prune_transform::TopNPruneTransform;
+pub use vector_index_prune_transform::VectorIndexPruneTransform;
diff --git a/src/query/storages/fuse/src/pruning_pipeline/vector_index_prune_transform.rs b/src/query/storages/fuse/src/pruning_pipeline/vector_index_prune_transform.rs
new file mode 100644
index 0000000000000..96935cb56e707
--- /dev/null
+++ b/src/query/storages/fuse/src/pruning_pipeline/vector_index_prune_transform.rs
@@ -0,0 +1,86 @@
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+
+use databend_common_exception::ErrorCode;
+use databend_common_exception::Result;
+use databend_common_expression::BlockMetaInfoDowncast;
+use databend_common_expression::DataBlock;
+use databend_common_pipeline_core::processors::InputPort;
+use databend_common_pipeline_core::processors::OutputPort;
+use databend_common_pipeline_core::processors::ProcessorPtr;
+use databend_common_pipeline_transforms::AsyncAccumulatingTransform;
+use databend_common_pipeline_transforms::AsyncAccumulatingTransformer;
+use databend_storages_common_pruner::BlockMetaIndex;
+use databend_storages_common_table_meta::meta::BlockMeta;
+
+use crate::pruning::VectorIndexPruner;
+use crate::pruning_pipeline::block_prune_result_meta::BlockPruneResult;
+
+// VectorIndexPruneTransform is a processor that will accumulate the block meta and not push to
+// downstream until all data is received and pruned.
+pub struct VectorIndexPruneTransform {
+    vector_index_pruner: VectorIndexPruner,
+    metas: Vec<(BlockMetaIndex, Arc<BlockMeta>)>,
+}
+
+#[async_trait::async_trait]
+impl AsyncAccumulatingTransform for VectorIndexPruneTransform {
+    const NAME: &'static str = "VectorIndexPruneTransform";
+
+    async fn transform(&mut self, mut data: DataBlock) -> Result<Option<DataBlock>> {
+        if let Some(ptr) = data.take_meta() {
+            if let Some(meta) = BlockPruneResult::downcast_from(ptr) {
+                self.metas.extend(meta.block_metas);
+                return Ok(None);
+            }
+        }
+        Err(ErrorCode::Internal(
+            "Cannot downcast meta to BlockPruneResult",
+        ))
+    }
+
+    async fn on_finish(&mut self, _output: bool) -> Result<Option<DataBlock>> {
+        self.do_vector_index_prune().await
+    }
+}
+
+impl VectorIndexPruneTransform {
+    pub fn create(
+        input: Arc<InputPort>,
+        output: Arc<OutputPort>,
+        vector_index_pruner: VectorIndexPruner,
+    ) -> Result<ProcessorPtr> {
+        Ok(ProcessorPtr::create(AsyncAccumulatingTransformer::create(
+            input,
+            output,
+            VectorIndexPruneTransform {
+                vector_index_pruner,
+                metas: vec![],
+            },
+        )))
+    }
+
+    async fn do_vector_index_prune(&self) -> Result<Option<DataBlock>> {
+        let pruned = self.vector_index_pruner.prune(self.metas.clone()).await?;
+        if pruned.is_empty() {
+            Ok(None)
+        } else {
+            Ok(Some(DataBlock::empty_with_meta(BlockPruneResult::create(
+                pruned,
+            ))))
+        }
+    }
+}
diff --git a/tests/sqllogictests/suites/ee/09_ee_vector_index/09_0000_vector_index_base.test b/tests/sqllogictests/suites/ee/09_ee_vector_index/09_0000_vector_index_base.test
index 19f6103aba605..70e2ca4afb4de 100644
--- a/tests/sqllogictests/suites/ee/09_ee_vector_index/09_0000_vector_index_base.test
+++ b/tests/sqllogictests/suites/ee/09_ee_vector_index/09_0000_vector_index_base.test
@@ -218,6 +218,27 @@ SELECT id, l2_distance(embedding, [0.02559146, 0.38549544, 0.77889671, 0.3159103
 6 0.73338425
 11 0.76073563
 
+
+query IF
+SELECT id, cosine_distance(embedding, [0.50515236, 0.8561939, 0.87169914, 0.55843271, 0.73689797, 0.49985862, 0.64527255, 0.29313098]::vector(8)) AS similarity FROM t ORDER BY similarity DESC;
+----
+9 0.2568838
+16 0.25626028
+13 0.24121934
+2 0.2268933
+14 0.21996021
+5 0.17328858
+4 0.16786504
+6 0.1645267
+7 0.15616316
+15 0.150944
+3 0.14645952
+8 0.14554787
+11 0.14048636
+12 0.060161233
+10 0.033747792
+1 0.009774268
+
 statement ok
 use default