unum-cloud · ashvardanian · May 30, 2025 · Oct 24, 2024 · May 14, 2025 · May 14, 2025
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -122,7 +122,8 @@
     "xtree": "cpp",
     "xutility": "cpp",
     "execution": "cpp",
-    "text_encoding": "cpp"
+    "text_encoding": "cpp",
+    "__functional_03": "cpp"
   },
   "cSpell.words": [
     "allclose",
@@ -151,6 +152,7 @@
     "FAISS",
     "fbin",
     "furo",
+    "geospatial",
     "googleanalytics",
     "groundtruth",
     "hashable",
@@ -170,6 +172,7 @@
     "longlong",
     "memmap",
     "MSVC",
+    "Multimodal",
     "Napi",
     "ndarray",
     "NDCG",
@@ -208,7 +211,10 @@
     "usecases",
     "Vardanian",
     "vectorize",
-    "Xunit"
+    "Vincenty",
+    "Wasmer",
+    "Xunit",
+    "Yuga"
   ],
   "autoDocstring.docstringFormat": "sphinx",
   "java.configuration.updateBuildConfiguration": "interactive",
@@ -225,5 +231,11 @@
     "editor.formatOnSave": true,
     "editor.defaultFormatter": "golang.go"
   },
+  "editor.tabSize": 4,
+  "editor.insertSpaces": true,
+  "prettier.singleQuote": true,
+  "prettier.tabWidth": 4,
+  "prettier.useTabs": false
+
   "dotnet.defaultSolution": "csharp/Cloud.Unum.USearch.sln"
 }
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -226,10 +226,10 @@ nvm install 20
 Testing:
 
 ```sh
-npm install -g typescript
-npm install
-npm run build-js
-npm test
+npm install -g typescript   # Install TypeScript globally
+npm install                 # Compile `javascript/lib.cpp`
+npm run build-js            # Generate JS from TS
+npm test                    # Run the test suite
 ```
 
 To compile for AWS Lambda you'd need to recompile the binding.

diff --git a/README.md b/README.md
@@ -161,7 +161,7 @@ This can result in __20x cost reduction__ on AWS and other public clouds.
 index.save("index.usearch")
 
 loaded_copy = index.load("index.usearch")
-view = Index.restore("index.usearch", view=True)
+view = Index.restore("index.usearch", view=True, ...)
 
 other_view = Index(ndim=..., metric=...)
 other_view.view("index.usearch")
@@ -528,7 +528,11 @@ index = Index(ndim=ndim, metric=CompiledMetric(
 
 - [x] ClickHouse: [C++](https://github.com/ClickHouse/ClickHouse/pull/53447), [docs](https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/annindexes#usearch).
 - [x] DuckDB: [post](https://duckdb.org/2024/05/03/vector-similarity-search-vss.html).
+- [x] ScyllaDB: [Rust](https://github.com/scylladb/vector-store), [presentation](https://www.slideshare.net/slideshow/vector-search-with-scylladb-by-szymon-wasik/276571548).
+- [x] TiDB & TiFlash: [C++](https://github.com/pingcap/tiflash), [announcement](https://www.pingcap.com/article/introduce-vector-search-indexes-in-tidb/).
+- [x] YugaByte: [C++](https://github.com/yugabyte/yugabyte-db/blob/366b9f5e3c4df3a1a17d553db41d6dc50146f488/src/yb/vector_index/usearch_wrapper.cc).
 - [x] Google: [UniSim](https://github.com/google/unisim), [RetSim](https://arxiv.org/abs/2311.17264) paper.
+- [x] MemGraph: [C++](https://github.com/memgraph/memgraph/blob/784dd8520f65050d033aea8b29446e84e487d091/src/storage/v2/indices/vector_index.cpp), [announcement](https://memgraph.com/blog/simplify-data-retrieval-memgraph-vector-search).
 - [x] LanternDB: [C++](https://github.com/lanterndata/lantern), [Rust](https://github.com/lanterndata/lantern_extras), [docs](https://lantern.dev/blog/hnsw-index-creation).
 - [x] LangChain: [Python](https://github.com/langchain-ai/langchain/releases/tag/v0.0.257) and [JavaScript](https://github.com/hwchase17/langchainjs/releases/tag/0.0.125).
 - [x] Microsoft Semantic Kernel: [Python](https://github.com/microsoft/semantic-kernel/releases/tag/python-0.3.9.dev) and C#.

diff --git a/cpp/test.cpp b/cpp/test.cpp
@@ -877,7 +877,7 @@ void test_absurd(std::size_t dimensions, std::size_t connectivity, std::size_t e
 template <typename scalar_at>
 void test_exact_search(std::size_t dataset_count, std::size_t queries_count, std::size_t wanted_count) {
     std::size_t dimensions = 32;
-    metric_punned_t metric(dimensions, metric_kind_t::cos_k);
+    metric_punned_t metric(dimensions, metric_kind_t::cos_k, scalar_kind<scalar_at>());
 
     std::random_device rd;
     std::mt19937 gen(rd());
@@ -886,9 +886,9 @@ void test_exact_search(std::size_t dataset_count, std::size_t queries_count, std
     std::generate(dataset.begin(), dataset.end(), [&] { return static_cast<scalar_at>(dis(gen)); });
 
     exact_search_t search;
-    auto results = search(                                                        //
-        (byte_t const*)dataset.data(), dataset_count, dimensions * sizeof(float), //
-        (byte_t const*)dataset.data(), queries_count, dimensions * sizeof(float), //
+    auto results = search(                                                            //
+        (byte_t const*)dataset.data(), dataset_count, dimensions * sizeof(scalar_at), //
+        (byte_t const*)dataset.data(), queries_count, dimensions * sizeof(scalar_at), //
         wanted_count, metric);
 
     for (std::size_t i = 0; i < results.size(); ++i)
@@ -1098,6 +1098,51 @@ template <typename key_at, typename slot_at> void test_replacing_update() {
     expect_eq(final_search[2].member.key, 44);
 }
 
+/**
+ * Tests the filtered search functionality of the index.
+ */
+void test_filtered_search() {
+    constexpr std::size_t dataset_count = 2048;
+    constexpr std::size_t dimensions = 32;
+    metric_punned_t metric(dimensions, metric_kind_t::cos_k);
+
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_real_distribution<> dis(0.0, 1.0);
+    using vector_of_vectors_t = std::vector<std::vector<float>>;
+
+    vector_of_vectors_t vector_of_vectors(dataset_count);
+    for (auto& vector : vector_of_vectors) {
+        vector.resize(dimensions);
+        std::generate(vector.begin(), vector.end(), [&] { return dis(gen); });
+    }
+
+    index_dense_t index = index_dense_t::make(metric);
+    index.reserve(dataset_count);
+    for (std::size_t idx = 0; idx < dataset_count; ++idx)
+        index.add(idx, vector_of_vectors[idx].data());
+    expect_eq(index.size(), dataset_count);
+
+    {
+        auto predicate = [](index_dense_t::key_t key) { return key != 0; };
+        auto results = index.filtered_search(vector_of_vectors[0].data(), 10, predicate);
+        expect_eq(10, results.size()); // ! Should not contain 0
+        for (std::size_t i = 0; i != results.size(); ++i)
+            expect(0 != results[i].member.key);
+    }
+    {
+        auto predicate = [](index_dense_t::key_t) { return false; };
+        auto results = index.filtered_search(vector_of_vectors[0].data(), 10, predicate);
+        expect_eq(0, results.size()); // ! Should not contain 0
+    }
+    {
+        auto predicate = [](index_dense_t::key_t key) { return key == 10; };
+        auto results = index.filtered_search(vector_of_vectors[0].data(), 10, predicate);
+        expect_eq(1, results.size()); // ! Should not contain 0
+        expect_eq(10, results[0].member.key);
+    }
+}
+
 int main(int, char**) {
     test_uint40();
     test_cosine<float, std::int64_t, uint40_t>(10, 10);
@@ -1174,5 +1219,6 @@ int main(int, char**) {
         test_sets<std::int64_t, slot32_t>(set_size, 20, 30);
     test_strings<std::int64_t, slot32_t>();
 
+    test_filtered_search();
     return 0;
 }
diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
@@ -2183,6 +2183,7 @@ class index_gt {
      */
     struct usearch_align_m context_t {
         top_candidates_t top_candidates{};
+        top_candidates_t top_for_refine{};
         next_candidates_t next_candidates{};
         visits_hash_set_t visits{};
         std::default_random_engine level_generator{};
@@ -2498,6 +2499,13 @@ class index_gt {
         if (nodes_)
             std::memcpy(new_nodes.data(), nodes_.data(), sizeof(node_t) * size());
 
+        // Pre-reserve the capacity for `top_for_refine`, which always contains at most one more
+        // element than the connectivity factors.
+        std::size_t connectivity_max = (std::max)(config_.connectivity_base, config_.connectivity);
+        for (std::size_t i = 0; i != new_contexts.size(); ++i)
+            if (!new_contexts[i].top_for_refine.reserve(connectivity_max + 1))
+                return false;
+
         limits_ = limits;
         nodes_capacity_ = limits.members;
         nodes_ = std::move(new_nodes);
@@ -3179,17 +3187,11 @@ class index_gt {
 
     std::size_t memory_usage_per_node(level_t level) const noexcept { return node_bytes_(level); }
 
-    double inverse_log_connectivity() const {
-        return pre_.inverse_log_connectivity;
-    }
+    double inverse_log_connectivity() const { return pre_.inverse_log_connectivity; }
 
-    std::size_t neighbors_base_bytes() const {
-        return pre_.neighbors_base_bytes;
-    }
+    std::size_t neighbors_base_bytes() const { return pre_.neighbors_base_bytes; }
 
-    std::size_t neighbors_bytes() const {
-        return pre_.neighbors_bytes;
-    }
+    std::size_t neighbors_bytes() const { return pre_.neighbors_bytes; }
 
 #if defined(USEARCH_USE_PRAGMA_REGION)
 #pragma endregion
@@ -3790,7 +3792,7 @@ class index_gt {
         metric_at&& metric, compressed_slot_t new_slot, candidates_view_t new_neighbors, value_at&& value,
         level_t level, context_t& context) usearch_noexcept_m {
 
-        top_candidates_t& top = context.top_candidates;
+        top_candidates_t& top_for_refine = context.top_for_refine;
         std::size_t const connectivity_max = level ? config_.connectivity : config_.connectivity_base;
 
         // Reverse links from the neighbors:
@@ -3817,19 +3819,16 @@ class index_gt {
                 continue;
             }
 
-            // To fit a new connection we need to drop an existing one.
-            top.clear();
-            usearch_assert_m((top.capacity() >= (close_header.size() + 1)),
-                             "The memory must have been reserved in `add`");
-            top.insert_reserved({context.measure(value, citerator_at(close_slot), metric), new_slot});
+            top_for_refine.clear();
+            top_for_refine.insert_reserved({context.measure(value, citerator_at(close_slot), metric), new_slot});
             for (compressed_slot_t successor_slot : close_header)
-                top.insert_reserved(
+                top_for_refine.insert_reserved(
                     {context.measure(citerator_at(close_slot), citerator_at(successor_slot), metric), successor_slot});
 
             // Export the results:
             close_header.clear();
-            candidates_view_t top_view =
-                refine_(metric, connectivity_max, top, context, context.computed_distances_in_reverse_refines);
+            candidates_view_t top_view = refine_(metric, connectivity_max, top_for_refine, context,
+                                                 context.computed_distances_in_reverse_refines);
             usearch_assert_m(top_view.size(), "This would lead to isolated nodes");
             for (std::size_t idx = 0; idx != top_view.size(); idx++)
                 close_header.push_back(top_view[idx].slot);
@@ -4178,9 +4177,10 @@ class index_gt {
                     // This can substantially grow our priority queue:
                     next.insert({-successor_dist, successor_slot});
                     if (is_dummy<predicate_at>() ||
-                        predicate(member_cref_t{node_at_(successor_slot).ckey(), successor_slot}))
+                        predicate(member_cref_t{node_at_(successor_slot).ckey(), successor_slot})) {
                         top.insert({successor_dist, successor_slot}, top_limit);
-                    radius = top.top().distance;
+                        radius = top.top().distance;
+                    }
                 }
             }
         }

diff --git a/javascript/README.md b/javascript/README.md
@@ -78,6 +78,14 @@ const batchResults = index.search(vectors, 2);
 const firstMatch = batchResults.get(0);
 ```
 
+Multi-threading is supported for batch operations:
+
+```js
+const threads_count = 0; // Zero for auto-detection or pass an unsigned integer
+index.add(keys, vectors, threads_count);
+const batchResults = index.search(vectors, 2, threads_count);
+```
+
 ## Index Introspection
 
 Inspect and interact with the index: