From 2f34c1532b713d058fd242a204166675265393d5 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Tue, 3 Dec 2024 23:56:01 +0200
Subject: [PATCH 01/33] naive implementation of L2

---
 src/VecSim/spaces/L2/L2.cpp | 24 +++++++++++++++++++++++
 src/VecSim/types/int8.h     | 39 +++++++++++++++++++++++++++++++++++++
 2 files changed, 63 insertions(+)
 create mode 100644 src/VecSim/types/int8.h
diff --git a/src/VecSim/spaces/L2/L2.cpp b/src/VecSim/spaces/L2/L2.cpp
index 5fba0555e..379e21c52 100644
--- a/src/VecSim/spaces/L2/L2.cpp
+++ b/src/VecSim/spaces/L2/L2.cpp
@@ -7,6 +7,7 @@
 #include "L2.h"
 #include "VecSim/types/bfloat16.h"
 #include "VecSim/types/float16.h"
+#include "VecSim/types/int8.h"
 #include <cstring>
 
 using bfloat16 = vecsim_types::bfloat16;
@@ -70,3 +71,26 @@ float FP16_L2Sqr(const void *pVect1, const void *pVect2, size_t dimension) {
     }
     return res;
 }
+
+template <bool is_little>
+float INT8_L2Sqr(const void *pVect1v, const void *pVect2v, size_t dimension) {
+    int8_t *pVect1 = (int8_t *)pVect1v;
+    int8_t *pVect2 = (int8_t *)pVect2v;
+
+    int res = 0;
+    for (size_t i = 0; i < dimension; i++) {
+        int16_t a = vecsim_types::int8_to_int16<is_little>(pVect1[i]);
+        int16_t b = vecsim_types::int8_to_int16<is_little>(pVect2[i]);
+        int16_t diff = a - b;
+        res += diff * diff;
+    }
+    return float(res);
+}
+
+float INT8_L2Sqr_LittleEndian(const void *pVect1v, const void *pVect2v, size_t dimension) {
+    return INT8_L2Sqr<true>(pVect1v, pVect2v, dimension);
+}
+
+float INT8_L2Sqr_BigEndian(const void *pVect1v, const void *pVect2v, size_t dimension) {
+    return INT8_L2Sqr<false>(pVect1v, pVect2v, dimension);
+}
diff --git a/src/VecSim/types/int8.h b/src/VecSim/types/int8.h
new file mode 100644
index 000000000..73d6b188a
--- /dev/null
+++ b/src/VecSim/types/int8.h
@@ -0,0 +1,39 @@
+/*
+ *Copyright Redis Ltd. 2021 - present
+ *Licensed under your choice of the Redis Source Available License 2.0 (RSALv2) or
+ *the Server Side Public License v1 (SSPLv1).
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+#include <cmath>
+
+namespace vecsim_types {
+struct bfloat16 {
+    uint16_t val;
+    bfloat16() = default;
+    explicit constexpr bfloat16(uint16_t val) : val(val) {}
+    operator uint16_t() const { return val; }
+};
+
+static inline bfloat16 float_to_bf16(const float ff) {
+    uint32_t *p_f32 = (uint32_t *)&ff;
+    uint32_t f32 = *p_f32;
+    uint32_t lsb = (f32 >> 16) & 1;
+    uint32_t round = lsb + 0x7FFF;
+    f32 += round;
+    return bfloat16(f32 >> 16);
+}
+
+template <bool is_little = true>
+inline float bfloat16_to_float32(bfloat16 val) {
+    size_t constexpr bytes_offset = is_little ? 1 : 0;
+    float result = 0;
+    bfloat16 *p_result = (bfloat16 *)&result + bytes_offset;
+    *p_result = val;
+    return result;
+}
+
+} // namespace vecsim_types

From c641d2364975fe27413ec7cac5cb6db7e90488d8 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Wed, 4 Dec 2024 00:03:37 +0200
Subject: [PATCH 02/33] update

---
 src/VecSim/types/int8.h | 21 +++------------------
 1 file changed, 3 insertions(+), 18 deletions(-)

diff --git a/src/VecSim/types/int8.h b/src/VecSim/types/int8.h
index 73d6b188a..158f1622b 100644
--- a/src/VecSim/types/int8.h
+++ b/src/VecSim/types/int8.h
@@ -11,27 +11,12 @@
 #include <cmath>
 
 namespace vecsim_types {
-struct bfloat16 {
-    uint16_t val;
-    bfloat16() = default;
-    explicit constexpr bfloat16(uint16_t val) : val(val) {}
-    operator uint16_t() const { return val; }
-};
-
-static inline bfloat16 float_to_bf16(const float ff) {
-    uint32_t *p_f32 = (uint32_t *)&ff;
-    uint32_t f32 = *p_f32;
-    uint32_t lsb = (f32 >> 16) & 1;
-    uint32_t round = lsb + 0x7FFF;
-    f32 += round;
-    return bfloat16(f32 >> 16);
-}
 
 template <bool is_little = true>
-inline float bfloat16_to_float32(bfloat16 val) {
+inline int16_t int8_to_int16(int8_t val) {
     size_t constexpr bytes_offset = is_little ? 1 : 0;
-    float result = 0;
-    bfloat16 *p_result = (bfloat16 *)&result + bytes_offset;
+    int result = 0;
+    int16_t *p_result = (int16_t *)&result + bytes_offset;
     *p_result = val;
     return result;
 }

From 1c5eb9066083a990ddac89b7bcb3ce961d155159 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Fri, 6 Dec 2024 16:38:38 +0000
Subject: [PATCH 03/33] implment naive disatnce for int8

add cosine to spaces

fix typos in calculator
---
 src/VecSim/spaces/CMakeLists.txt              |  2 +
 src/VecSim/spaces/Cosine/Cosine.cpp           | 23 +++++++++
 src/VecSim/spaces/Cosine/Cosine.h             | 11 ++++
 src/VecSim/spaces/Cosine_space.cpp            | 27 ++++++++++
 src/VecSim/spaces/Cosine_space.h              | 13 +++++
 src/VecSim/spaces/IP/IP.cpp                   | 13 +++++
 src/VecSim/spaces/IP/IP.h                     |  2 +
 src/VecSim/spaces/IP_space.cpp                | 13 +++++
 src/VecSim/spaces/IP_space.h                  |  2 +
 src/VecSim/spaces/L2/L2.cpp                   | 14 +-----
 src/VecSim/spaces/L2/L2.h                     |  2 +
 src/VecSim/spaces/L2_space.cpp                | 14 ++++++
 src/VecSim/spaces/L2_space.h                  |  2 +
 src/VecSim/spaces/computer/calculator.h       |  4 +-
 .../spaces/functions/implementation_chooser.h |  4 +-
 src/VecSim/types/int8.h                       | 24 ---------
 tests/unit/test_spaces.cpp                    | 50 +++++++++++++++++++
 17 files changed, 180 insertions(+), 40 deletions(-)
 create mode 100644 src/VecSim/spaces/Cosine/Cosine.cpp
 create mode 100644 src/VecSim/spaces/Cosine/Cosine.h
 create mode 100644 src/VecSim/spaces/Cosine_space.cpp
 create mode 100644 src/VecSim/spaces/Cosine_space.h
 delete mode 100644 src/VecSim/types/int8.h

diff --git a/src/VecSim/spaces/CMakeLists.txt b/src/VecSim/spaces/CMakeLists.txt
index 9cc0baaaf..ad22e8187 100644
--- a/src/VecSim/spaces/CMakeLists.txt
+++ b/src/VecSim/spaces/CMakeLists.txt
@@ -3,6 +3,7 @@ project(VectorSimilaritySpaces_no_optimization)
 add_library(VectorSimilaritySpaces_no_optimization
 	L2/L2.cpp
 	IP/IP.cpp
+	Cosine/Cosine.cpp
 )
 
 include(${root}/cmake/cpu_features.cmake)
@@ -79,6 +80,7 @@ endif()
 add_library(VectorSimilaritySpaces
 	L2_space.cpp
 	IP_space.cpp
+	Cosine_space.cpp
 	spaces.cpp
 	${OPTIMIZATIONS}
 	computer/preprocessor_container.cpp
diff --git a/src/VecSim/spaces/Cosine/Cosine.cpp b/src/VecSim/spaces/Cosine/Cosine.cpp
new file mode 100644
index 000000000..1cbc9a191
--- /dev/null
+++ b/src/VecSim/spaces/Cosine/Cosine.cpp
@@ -0,0 +1,23 @@
+/*
+ *Copyright Redis Ltd. 2021 - present
+ *Licensed under your choice of the Redis Source Available License 2.0 (RSALv2) or
+ *the Server Side Public License v1 (SSPLv1).
+ */
+
+#include "Cosine.h"
+
+float INT8_Cosine(const void *pVect1v, const void *pVect2v, size_t dimension) {
+    int8_t *pVect1 = (int8_t *)pVect1v;
+    int8_t *pVect2 = (int8_t *)pVect2v;
+
+    int res = 0;
+    for (size_t i = 0; i < dimension; i++) {
+        int16_t a = pVect1[i];
+        int16_t b = pVect2[i];
+        res += a * b;
+    }
+
+    float norm_v1 = *(float *)pVect1v;
+    float norm_v2 = *(float *)pVect2v;
+    return 1.0f - float(res) / (norm_v1 * norm_v2);
+}
diff --git a/src/VecSim/spaces/Cosine/Cosine.h b/src/VecSim/spaces/Cosine/Cosine.h
new file mode 100644
index 000000000..c42f6c14f
--- /dev/null
+++ b/src/VecSim/spaces/Cosine/Cosine.h
@@ -0,0 +1,11 @@
+/*
+ *Copyright Redis Ltd. 2021 - present
+ *Licensed under your choice of the Redis Source Available License 2.0 (RSALv2) or
+ *the Server Side Public License v1 (SSPLv1).
+ */
+
+#pragma once
+
+#include <cstdlib>
+
+float INT8_Cosine(const void *pVect1, const void *pVect2, size_t dimension);
diff --git a/src/VecSim/spaces/Cosine_space.cpp b/src/VecSim/spaces/Cosine_space.cpp
new file mode 100644
index 000000000..7cace4c32
--- /dev/null
+++ b/src/VecSim/spaces/Cosine_space.cpp
@@ -0,0 +1,27 @@
+/*
+ *Copyright Redis Ltd. 2021 - present
+ *Licensed under your choice of the Redis Source Available License 2.0 (RSALv2) or
+ *the Server Side Public License v1 (SSPLv1).
+ */
+
+#include "VecSim/spaces/space_includes.h"
+#include "VecSim/spaces/Cosine_space.h"
+#include "VecSim/spaces/Cosine/Cosine.h"
+
+namespace spaces {
+dist_func_t<float> Cosine_INT8_GetDistFunc(size_t dim, unsigned char *alignment,
+                                           const void *arch_opt) {
+    unsigned char dummy_alignment;
+    if (alignment == nullptr) {
+        alignment = &dummy_alignment;
+    }
+
+    dist_func_t<float> ret_dist_func = INT8_Cosine;
+    // Optimizations assume at least 32 int8. If we have less, we use the naive implementation.
+    if (dim < 32) {
+        return ret_dist_func;
+    }
+    return ret_dist_func;
+}
+
+} // namespace spaces
diff --git a/src/VecSim/spaces/Cosine_space.h b/src/VecSim/spaces/Cosine_space.h
new file mode 100644
index 000000000..e139a5521
--- /dev/null
+++ b/src/VecSim/spaces/Cosine_space.h
@@ -0,0 +1,13 @@
+/*
+ *Copyright Redis Ltd. 2021 - present
+ *Licensed under your choice of the Redis Source Available License 2.0 (RSALv2) or
+ *the Server Side Public License v1 (SSPLv1).
+ */
+
+#pragma once
+#include "VecSim/spaces/spaces.h"
+
+namespace spaces {
+dist_func_t<float> Cosine_INT8_GetDistFunc(size_t dim, unsigned char *alignment = nullptr,
+                                           const void *arch_opt = nullptr);
+} // namespace spaces
diff --git a/src/VecSim/spaces/IP/IP.cpp b/src/VecSim/spaces/IP/IP.cpp
index 98ad07676..1562e5b1a 100644
--- a/src/VecSim/spaces/IP/IP.cpp
+++ b/src/VecSim/spaces/IP/IP.cpp
@@ -66,3 +66,16 @@ float FP16_InnerProduct(const void *pVect1, const void *pVect2, size_t dimension
     }
     return 1.0f - res;
 }
+
+float INT8_InnerProduct(const void *pVect1v, const void *pVect2v, size_t dimension) {
+    int8_t *pVect1 = (int8_t *)pVect1v;
+    int8_t *pVect2 = (int8_t *)pVect2v;
+
+    int res = 0;
+    for (size_t i = 0; i < dimension; i++) {
+        int16_t a = pVect1[i];
+        int16_t b = pVect2[i];
+        res += a * b;
+    }
+    return 1.0f - float(res);
+}
diff --git a/src/VecSim/spaces/IP/IP.h b/src/VecSim/spaces/IP/IP.h
index 50fecef33..64e11b52f 100644
--- a/src/VecSim/spaces/IP/IP.h
+++ b/src/VecSim/spaces/IP/IP.h
@@ -16,3 +16,5 @@ float FP16_InnerProduct(const void *pVect1, const void *pVect2, size_t dimension
 
 float BF16_InnerProduct_LittleEndian(const void *pVect1v, const void *pVect2v, size_t dimension);
 float BF16_InnerProduct_BigEndian(const void *pVect1v, const void *pVect2v, size_t dimension);
+
+float INT8_InnerProduct(const void *pVect1, const void *pVect2, size_t dimension);
diff --git a/src/VecSim/spaces/IP_space.cpp b/src/VecSim/spaces/IP_space.cpp
index e6da26947..699919dc2 100644
--- a/src/VecSim/spaces/IP_space.cpp
+++ b/src/VecSim/spaces/IP_space.cpp
@@ -196,4 +196,17 @@ dist_func_t<float> IP_FP16_GetDistFunc(size_t dim, unsigned char *alignment, con
     return ret_dist_func;
 }
 
+dist_func_t<float> IP_INT8_GetDistFunc(size_t dim, unsigned char *alignment, const void *arch_opt) {
+    unsigned char dummy_alignment;
+    if (alignment == nullptr) {
+        alignment = &dummy_alignment;
+    }
+
+    dist_func_t<float> ret_dist_func = INT8_InnerProduct;
+    // Optimizations assume at least 32 int8. If we have less, we use the naive implementation.
+    if (dim < 32) {
+        return ret_dist_func;
+    }
+    return ret_dist_func;
+}
 } // namespace spaces
diff --git a/src/VecSim/spaces/IP_space.h b/src/VecSim/spaces/IP_space.h
index a3ab0f4f6..87407c1a3 100644
--- a/src/VecSim/spaces/IP_space.h
+++ b/src/VecSim/spaces/IP_space.h
@@ -16,4 +16,6 @@ dist_func_t<float> IP_BF16_GetDistFunc(size_t dim, unsigned char *alignment = nu
                                        const void *arch_opt = nullptr);
 dist_func_t<float> IP_FP16_GetDistFunc(size_t dim, unsigned char *alignment = nullptr,
                                        const void *arch_opt = nullptr);
+dist_func_t<float> IP_INT8_GetDistFunc(size_t dim, unsigned char *alignment = nullptr,
+                                       const void *arch_opt = nullptr);
 } // namespace spaces
diff --git a/src/VecSim/spaces/L2/L2.cpp b/src/VecSim/spaces/L2/L2.cpp
index 379e21c52..ef310418b 100644
--- a/src/VecSim/spaces/L2/L2.cpp
+++ b/src/VecSim/spaces/L2/L2.cpp
@@ -7,7 +7,6 @@
 #include "L2.h"
 #include "VecSim/types/bfloat16.h"
 #include "VecSim/types/float16.h"
-#include "VecSim/types/int8.h"
 #include <cstring>
 
 using bfloat16 = vecsim_types::bfloat16;
@@ -72,25 +71,16 @@ float FP16_L2Sqr(const void *pVect1, const void *pVect2, size_t dimension) {
     return res;
 }
 
-template <bool is_little>
 float INT8_L2Sqr(const void *pVect1v, const void *pVect2v, size_t dimension) {
     int8_t *pVect1 = (int8_t *)pVect1v;
     int8_t *pVect2 = (int8_t *)pVect2v;
 
     int res = 0;
     for (size_t i = 0; i < dimension; i++) {
-        int16_t a = vecsim_types::int8_to_int16<is_little>(pVect1[i]);
-        int16_t b = vecsim_types::int8_to_int16<is_little>(pVect2[i]);
+        int16_t a = pVect1[i];
+        int16_t b = pVect2[i];
         int16_t diff = a - b;
         res += diff * diff;
     }
     return float(res);
 }
-
-float INT8_L2Sqr_LittleEndian(const void *pVect1v, const void *pVect2v, size_t dimension) {
-    return INT8_L2Sqr<true>(pVect1v, pVect2v, dimension);
-}
-
-float INT8_L2Sqr_BigEndian(const void *pVect1v, const void *pVect2v, size_t dimension) {
-    return INT8_L2Sqr<false>(pVect1v, pVect2v, dimension);
-}
diff --git a/src/VecSim/spaces/L2/L2.h b/src/VecSim/spaces/L2/L2.h
index c367f2ee1..65649d4eb 100644
--- a/src/VecSim/spaces/L2/L2.h
+++ b/src/VecSim/spaces/L2/L2.h
@@ -16,3 +16,5 @@ float BF16_L2Sqr_LittleEndian(const void *pVect1v, const void *pVect2v, size_t d
 float BF16_L2Sqr_BigEndian(const void *pVect1v, const void *pVect2v, size_t dimension);
 
 float FP16_L2Sqr(const void *pVect1, const void *pVect2, size_t dimension);
+
+float INT8_L2Sqr(const void *pVect1v, const void *pVect2v, size_t dimension);
diff --git a/src/VecSim/spaces/L2_space.cpp b/src/VecSim/spaces/L2_space.cpp
index 1c2b2b59f..3f9d83f03 100644
--- a/src/VecSim/spaces/L2_space.cpp
+++ b/src/VecSim/spaces/L2_space.cpp
@@ -189,4 +189,18 @@ dist_func_t<float> L2_FP16_GetDistFunc(size_t dim, unsigned char *alignment, con
     return ret_dist_func;
 }
 
+dist_func_t<float> L2_INT8_GetDistFunc(size_t dim, unsigned char *alignment, const void *arch_opt) {
+    unsigned char dummy_alignment;
+    if (alignment == nullptr) {
+        alignment = &dummy_alignment;
+    }
+
+    dist_func_t<float> ret_dist_func = INT8_L2Sqr;
+    // Optimizations assume at least 32 int8. If we have less, we use the naive implementation.
+    if (dim < 32) {
+        return ret_dist_func;
+    }
+    return ret_dist_func;
+}
+
 } // namespace spaces
diff --git a/src/VecSim/spaces/L2_space.h b/src/VecSim/spaces/L2_space.h
index 4a2ea801a..48e50a8c2 100644
--- a/src/VecSim/spaces/L2_space.h
+++ b/src/VecSim/spaces/L2_space.h
@@ -16,4 +16,6 @@ dist_func_t<float> L2_BF16_GetDistFunc(size_t dim, unsigned char *alignment = nu
                                        const void *arch_opt = nullptr);
 dist_func_t<float> L2_FP16_GetDistFunc(size_t dim, unsigned char *alignment = nullptr,
                                        const void *arch_opt = nullptr);
+dist_func_t<float> L2_INT8_GetDistFunc(size_t dim, unsigned char *alignment = nullptr,
+                                       const void *arch_opt = nullptr);
 } // namespace spaces
diff --git a/src/VecSim/spaces/computer/calculator.h b/src/VecSim/spaces/computer/calculator.h
index 36e76deed..64e0d8dae 100644
--- a/src/VecSim/spaces/computer/calculator.h
+++ b/src/VecSim/spaces/computer/calculator.h
@@ -26,10 +26,10 @@ class IndexCalculatorInterface : public VecsimBaseObject {
 /**
  * This object purpose is to calculate the distance between two vectors.
  * It extends the IndexCalculatorInterface class' type to hold the distance function.
- * Every specific implmentation of the distance claculater should hold by refrence or by value the
+ * Every specific implementation of the distance calculator should hold by reference or by value the
  * parameters required for the calculation. The distance calculation API of all DistanceCalculator
  * classes is: calc_dist(v1,v2,dim). Internally it calls the distance function according the
- * template signature, allowing fexability in the distance function arguments.
+ * template signature, allowing flexibility in the distance function arguments.
  */
 template <typename DistType, typename DistFuncType>
 class DistanceCalculatorInterface : public IndexCalculatorInterface<DistType> {
diff --git a/src/VecSim/spaces/functions/implementation_chooser.h b/src/VecSim/spaces/functions/implementation_chooser.h
index 2903b8cc4..6bb61815e 100644
--- a/src/VecSim/spaces/functions/implementation_chooser.h
+++ b/src/VecSim/spaces/functions/implementation_chooser.h
@@ -40,8 +40,8 @@
 // out:     The output variable that will be set to the chosen implementation.
 // dim:     The dimension.
 // chunk:   The chunk size. Can be 32, 16 or 8. 32 for 16-bit elements, 16 for 32-bit elements, 8
-// for 64-bit elements. func:    The templated function that we want to choose the implementation
-// for.
+// for 64-bit elements.
+// func:    The templated function that we want to choose the implementation for.
 #define CHOOSE_IMPLEMENTATION(out, dim, chunk, func)                                               \
     do {                                                                                           \
         decltype(out) __ret_dist_func;                                                             \
diff --git a/src/VecSim/types/int8.h b/src/VecSim/types/int8.h
deleted file mode 100644
index 158f1622b..000000000
--- a/src/VecSim/types/int8.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- *Copyright Redis Ltd. 2021 - present
- *Licensed under your choice of the Redis Source Available License 2.0 (RSALv2) or
- *the Server Side Public License v1 (SSPLv1).
- */
-
-#pragma once
-
-#include <cstdint>
-#include <cstring>
-#include <cmath>
-
-namespace vecsim_types {
-
-template <bool is_little = true>
-inline int16_t int8_to_int16(int8_t val) {
-    size_t constexpr bytes_offset = is_little ? 1 : 0;
-    int result = 0;
-    int16_t *p_result = (int16_t *)&result + bytes_offset;
-    *p_result = val;
-    return result;
-}
-
-} // namespace vecsim_types
diff --git a/tests/unit/test_spaces.cpp b/tests/unit/test_spaces.cpp
index 7cf7de92b..2a58d6072 100644
--- a/tests/unit/test_spaces.cpp
+++ b/tests/unit/test_spaces.cpp
@@ -12,10 +12,12 @@
 #include "VecSim/spaces/space_includes.h"
 #include "VecSim/spaces/IP/IP.h"
 #include "VecSim/spaces/L2/L2.h"
+#include "VecSim/spaces/Cosine/Cosine.h"
 #include "VecSim/utils/vec_utils.h"
 #include "VecSim/types/bfloat16.h"
 #include "VecSim/spaces/IP_space.h"
 #include "VecSim/spaces/L2_space.h"
+#include "VecSim/spaces/Cosine_space.h"
 #include "VecSim/types/float16.h"
 #include "VecSim/spaces/functions/AVX512F.h"
 #include "VecSim/spaces/functions/AVX.h"
@@ -102,6 +104,21 @@ TEST_F(SpacesTest, fp16_l2_no_optimization_func_test) {
     ASSERT_EQ(dist, FP32_L2Sqr((const void *)sanity_a, (const void *)sanity_b, dim));
 }
 
+TEST_F(SpacesTest, int8_l2_no_optimization_func_test) {
+    size_t dim = 5;
+
+    int8_t a[dim], b[dim];
+    for (size_t i = 0; i < dim; i++) {
+        a[i] = (i + 1);
+        b[i] = (i + 2);
+    }
+
+    float dist = INT8_L2Sqr((const void *)a, (const void *)b, dim);
+    ASSERT_EQ(dist, 5.0);
+}
+
+/* ======================== IP NO OPT ======================== */
+
 TEST_F(SpacesTest, float_ip_no_optimization_func_test) {
     size_t dim = 5;
 
@@ -211,6 +228,34 @@ TEST_F(SpacesTest, fp16_ip_no_optimization_func_test) {
     ASSERT_EQ(dist, FP32_InnerProduct((const void *)sanity_a, (const void *)sanity_b, dim));
 }
 
+TEST_F(SpacesTest, int8_ip_no_optimization_func_test) {
+    size_t dim = 4;
+    int8_t a[] = {1, 0, 0, 0};
+    int8_t b[] = {1, 0, 0, 0};
+
+    float dist = INT8_InnerProduct((const void *)a, (const void *)b, dim);
+    ASSERT_EQ(dist, 0.0);
+}
+
+/* ======================== Cosine NO OPT ======================== */
+
+TEST_F(SpacesTest, int8_Cosine_no_optimization_func_test) {
+    size_t dim = 4;
+    // create normalized vector with extra space for the norm
+    std::vector<int8_t> vec1(dim + sizeof(float), 0);
+    std::vector<int8_t> vec2(dim + sizeof(float), 0);
+
+    vec1[0] = 1; // {1, 0, 0, 0}
+    vec2[1] = 1; // {1, 0, 0, 0}
+
+    // write the norm at the end of the vector
+    *(float *)(vec1.data() + dim) = 1.0;
+    *(float *)(vec2.data() + dim) = 1.0;
+
+    float dist = INT8_InnerProduct((const void *)vec1.data(), (const void *)vec2.data(), dim);
+    ASSERT_EQ(dist, 1.0);
+}
+
 TEST_F(SpacesTest, GetDistFuncInvalidMetricFP32) {
     EXPECT_THROW(
         (spaces::GetDistFunc<float, float>((VecSimMetric)(VecSimMetric_Cosine + 1), 10, nullptr)),
@@ -231,6 +276,11 @@ TEST_F(SpacesTest, GetDistFuncInvalidMetricFP16) {
         (spaces::GetDistFunc<float16, float>((VecSimMetric)(VecSimMetric_Cosine + 1), 10, nullptr)),
         std::invalid_argument);
 }
+// TEST_F(SpacesTest, GetDistFuncInvalidMetricINT8) {
+//     EXPECT_THROW(
+//         (spaces::GetDistFunc<int8_t, float>((VecSimMetric)(VecSimMetric_Cosine + 1), 10,
+//         nullptr)), std::invalid_argument);
+// }
 
 using namespace spaces;
 

From fa8e9ff6856072b6d72fa4b4c0c84473b0268017 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Sun, 8 Dec 2024 05:58:56 +0000
Subject: [PATCH 04/33] imp choose L2 int8 with 256bit loop

add spaces unit tests for int8 L2
add compilation flags
introduce tests/utils for general utils
---
 cmake/x86_64InstructionFlags.cmake            |  5 ++
 src/VecSim/spaces/CMakeLists.txt              |  6 ++
 .../spaces/L2/L2_AVX512F_BW_VL_VNNI_INT8.h    | 58 +++++++++++++++++++
 src/VecSim/spaces/L2_space.cpp                | 13 +++++
 .../spaces/functions/AVX512F_BW_VL_VNNI.cpp   | 23 ++++++++
 .../spaces/functions/AVX512F_BW_VL_VNNI.h     | 15 +++++
 tests/unit/test_spaces.cpp                    | 49 ++++++++++++++++
 tests/utils/tests_utils.h                     | 24 ++++++++
 8 files changed, 193 insertions(+)
 create mode 100644 src/VecSim/spaces/L2/L2_AVX512F_BW_VL_VNNI_INT8.h
 create mode 100644 src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.cpp
 create mode 100644 src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.h
 create mode 100644 tests/utils/tests_utils.h

diff --git a/cmake/x86_64InstructionFlags.cmake b/cmake/x86_64InstructionFlags.cmake
index 1fedda7fe..1ff8f48f2 100644
--- a/cmake/x86_64InstructionFlags.cmake
+++ b/cmake/x86_64InstructionFlags.cmake
@@ -13,6 +13,7 @@ if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "(x86_64)|(AMD64|amd64)|(^i.86$)")
 	CHECK_CXX_COMPILER_FLAG(-mavx512vbmi2 CXX_AVX512VBMI2)
 	CHECK_CXX_COMPILER_FLAG(-mavx512fp16 CXX_AVX512FP16)
 	CHECK_CXX_COMPILER_FLAG(-mavx512f CXX_AVX512F)
+	CHECK_CXX_COMPILER_FLAG(-mavx512vnni CXX_AVX512VNNI)
 	CHECK_CXX_COMPILER_FLAG(-mavx2 CXX_AVX2)
 	CHECK_CXX_COMPILER_FLAG(-mavx CXX_AVX)
 	CHECK_CXX_COMPILER_FLAG(-mf16c CXX_F16C)
@@ -48,6 +49,10 @@ if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "(x86_64)|(AMD64|amd64)|(^i.86$)")
 		add_compile_definitions(OPT_AVX512_BW_VBMI2)
 	endif()
 
+	if(CXX_AVX512F AND CXX_AVX512BW AND CXX_AVX512VL AND CXX_AVX512VNNI)
+		add_compile_definitions(OPT_AVX512_F_BW_VL_VNNI)
+	endif()
+
 	if(CXX_F16C AND CXX_FMA AND CXX_AVX)
 		add_compile_definitions(OPT_F16C)
 	endif()
diff --git a/src/VecSim/spaces/CMakeLists.txt b/src/VecSim/spaces/CMakeLists.txt
index ad22e8187..fc23adc18 100644
--- a/src/VecSim/spaces/CMakeLists.txt
+++ b/src/VecSim/spaces/CMakeLists.txt
@@ -45,6 +45,12 @@ if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "(x86_64)|(AMD64|amd64)|(^i.86$)")
 		list(APPEND OPTIMIZATIONS functions/AVX512F.cpp)
 	endif()
 
+	if(CXX_AVX512F AND CXX_AVX512BW AND CXX_AVX512VL AND CXX_AVX512VNNI)
+		message("Building with AVX512F, AVX512BW, AVX512VL and AVX512VNNI")
+		set_source_files_properties(functions/AVX512F_BW_VL_VNNI.cpp PROPERTIES COMPILE_FLAGS "-mavx512f -mavx512bw -mavx512vl -mavx512vnni")
+		list(APPEND OPTIMIZATIONS functions/AVX512F_BW_VL_VNNI.cpp)
+	endif()
+
 	if(CXX_AVX2)
 		message("Building with AVX2")
 		set_source_files_properties(functions/AVX2.cpp PROPERTIES COMPILE_FLAGS -mavx2)
diff --git a/src/VecSim/spaces/L2/L2_AVX512F_BW_VL_VNNI_INT8.h b/src/VecSim/spaces/L2/L2_AVX512F_BW_VL_VNNI_INT8.h
new file mode 100644
index 000000000..9130d6414
--- /dev/null
+++ b/src/VecSim/spaces/L2/L2_AVX512F_BW_VL_VNNI_INT8.h
@@ -0,0 +1,58 @@
+/*
+ *Copyright Redis Ltd. 2021 - present
+ *Licensed under your choice of the Redis Source Available License 2.0 (RSALv2) or
+ *the Server Side Public License v1 (SSPLv1).
+ */
+
+#include "VecSim/spaces/space_includes.h"
+
+static inline void L2SqrStep(int8_t *&pVect1, int8_t *&pVect2, __m512i &sum) {
+    __m256i temp_a = _mm256_loadu_epi8(pVect1);
+    __m512i va = _mm512_cvtepi8_epi16(temp_a);
+    pVect1 += 32;
+
+    __m256i temp_b = _mm256_loadu_epi8(pVect2);
+    __m512i vb = _mm512_cvtepi8_epi16(temp_b);
+    pVect2 += 32;
+
+    __m512i diff = _mm512_sub_epi16(va, vb);
+    // _mm512_dpwssd_epi32(src, a, b)
+    // Multiply groups of 2 adjacent pairs of signed 16-bit integers in `a` with corresponding
+    // 16-bit integers in `b`, producing 2 intermediate signed 32-bit results. Sum these 2 results
+    // with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
+    sum = _mm512_dpwssd_epi32(sum, diff, diff);
+}
+
+template <unsigned char residual> // 0..32
+float INT8_L2SqrSIMD32_AVX512F_BW_VL_VNNI(const void *pVect1v, const void *pVect2v,
+                                          size_t dimension) {
+    int8_t *pVect1 = (int8_t *)pVect1v;
+    int8_t *pVect2 = (int8_t *)pVect2v;
+
+    const int8_t *pEnd1 = pVect1 + dimension;
+
+    __m512i sum = _mm512_setzero_epi32();
+
+    // Deal with remainder first. `dim` is more than 32, so we have at least one 32-int_8 block,
+    // so mask loading is guaranteed to be safe
+    if constexpr (residual) {
+        __mmask32 mask = (1LU << residual) - 1;
+        __m256i temp_a = _mm256_maskz_loadu_epi8(mask, pVect1);
+        __m512i va = _mm512_cvtepi8_epi16(temp_a);
+        pVect1 += residual;
+
+        __m256i temp_b = _mm256_maskz_loadu_epi8(mask, pVect2);
+        __m512i vb = _mm512_cvtepi8_epi16(temp_b);
+        pVect2 += residual;
+
+        __m512i diff = _mm512_sub_epi16(va, vb);
+        sum = _mm512_dpwssd_epi32(sum, diff, diff);
+    }
+
+    // We dealt with the residual part. We are left with some multiple of 32-int_8.
+    do {
+        L2SqrStep(pVect1, pVect2, sum);
+    } while (pVect1 < pEnd1);
+
+    return _mm512_reduce_add_epi32(sum);
+}
diff --git a/src/VecSim/spaces/L2_space.cpp b/src/VecSim/spaces/L2_space.cpp
index 3f9d83f03..3ae927224 100644
--- a/src/VecSim/spaces/L2_space.cpp
+++ b/src/VecSim/spaces/L2_space.cpp
@@ -15,6 +15,7 @@
 #include "VecSim/spaces/functions/SSE.h"
 #include "VecSim/spaces/functions/AVX512BW_VBMI2.h"
 #include "VecSim/spaces/functions/AVX512FP16_VL.h"
+#include "VecSim/spaces/functions/AVX512F_BW_VL_VNNI.h"
 #include "VecSim/spaces/functions/AVX2.h"
 #include "VecSim/spaces/functions/SSE3.h"
 
@@ -200,6 +201,18 @@ dist_func_t<float> L2_INT8_GetDistFunc(size_t dim, unsigned char *alignment, con
     if (dim < 32) {
         return ret_dist_func;
     }
+#ifdef CPU_FEATURES_ARCH_X86_64
+    auto features = (arch_opt == nullptr)
+                        ? cpu_features::GetX86Info().features
+                        : *static_cast<const cpu_features::X86Features *>(arch_opt);
+#ifdef OPT_AVX512_F_BW_VL_VNNI
+    if (features.avx512f && features.avx512bw && features.avx512vl && features.avx512vnni) {
+        if (dim % 32 == 0) // no point in aligning if we have an offsetting residual
+            *alignment = 32 * sizeof(int8_t); // align to 256 bits.
+        return Choose_INT8_L2_implementation_AVX512F_VW_CL_VNNI(dim);
+    }
+#endif
+#endif // __x86_64__
     return ret_dist_func;
 }
 
diff --git a/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.cpp b/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.cpp
new file mode 100644
index 000000000..d906a5775
--- /dev/null
+++ b/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.cpp
@@ -0,0 +1,23 @@
+/*
+ *Copyright Redis Ltd. 2021 - present
+ *Licensed under your choice of the Redis Source Available License 2.0 (RSALv2) or
+ *the Server Side Public License v1 (SSPLv1).
+ */
+
+#include "AVX512BW_VBMI2.h"
+
+#include "VecSim/spaces/L2/L2_AVX512F_BW_VL_VNNI_INT8.h"
+
+namespace spaces {
+
+#include "implementation_chooser.h"
+
+dist_func_t<float> Choose_INT8_L2_implementation_AVX512F_VW_CL_VNNI(size_t dim) {
+    dist_func_t<float> ret_dist_func;
+    CHOOSE_IMPLEMENTATION(ret_dist_func, dim, 32, INT8_L2SqrSIMD32_AVX512F_BW_VL_VNNI);
+    return ret_dist_func;
+}
+
+#include "implementation_chooser_cleanup.h"
+
+} // namespace spaces
diff --git a/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.h b/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.h
new file mode 100644
index 000000000..c1f32ff10
--- /dev/null
+++ b/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.h
@@ -0,0 +1,15 @@
+/*
+ *Copyright Redis Ltd. 2021 - present
+ *Licensed under your choice of the Redis Source Available License 2.0 (RSALv2) or
+ *the Server Side Public License v1 (SSPLv1).
+ */
+
+#pragma once
+
+#include "VecSim/spaces/spaces.h"
+
+namespace spaces {
+
+dist_func_t<float> Choose_INT8_L2_implementation_AVX512F_VW_CL_VNNI(size_t dim);
+
+} // namespace spaces
diff --git a/tests/unit/test_spaces.cpp b/tests/unit/test_spaces.cpp
index 2a58d6072..c9e8d68b8 100644
--- a/tests/unit/test_spaces.cpp
+++ b/tests/unit/test_spaces.cpp
@@ -25,9 +25,11 @@
 #include "VecSim/spaces/functions/AVX512BW_VBMI2.h"
 #include "VecSim/spaces/functions/AVX512BF16_VL.h"
 #include "VecSim/spaces/functions/AVX512FP16_VL.h"
+#include "VecSim/spaces/functions/AVX512F_BW_VL_VNNI.h"
 #include "VecSim/spaces/functions/AVX2.h"
 #include "VecSim/spaces/functions/SSE3.h"
 #include "VecSim/spaces/functions/F16C.h"
+#include "../utils/tests_utils.h"
 
 using bfloat16 = vecsim_types::bfloat16;
 using float16 = vecsim_types::float16;
@@ -256,6 +258,8 @@ TEST_F(SpacesTest, int8_Cosine_no_optimization_func_test) {
     ASSERT_EQ(dist, 1.0);
 }
 
+/* ======================== Test Getters ======================== */
+
 TEST_F(SpacesTest, GetDistFuncInvalidMetricFP32) {
     EXPECT_THROW(
         (spaces::GetDistFunc<float, float>((VecSimMetric)(VecSimMetric_Cosine + 1), 10, nullptr)),
@@ -291,6 +295,7 @@ TEST_F(SpacesTest, smallDimChooser) {
         ASSERT_EQ(L2_FP64_GetDistFunc(dim), FP64_L2Sqr);
         ASSERT_EQ(L2_BF16_GetDistFunc(dim), BF16_L2Sqr_LittleEndian);
         ASSERT_EQ(L2_FP16_GetDistFunc(dim), FP16_L2Sqr);
+        ASSERT_EQ(L2_INT8_GetDistFunc(dim), INT8_L2Sqr);
         ASSERT_EQ(IP_FP32_GetDistFunc(dim), FP32_InnerProduct);
         ASSERT_EQ(IP_FP64_GetDistFunc(dim), FP64_InnerProduct);
         ASSERT_EQ(IP_BF16_GetDistFunc(dim), BF16_InnerProduct_LittleEndian);
@@ -300,6 +305,7 @@ TEST_F(SpacesTest, smallDimChooser) {
         ASSERT_EQ(L2_FP32_GetDistFunc(dim), FP32_L2Sqr);
         ASSERT_EQ(L2_BF16_GetDistFunc(dim), BF16_L2Sqr_LittleEndian);
         ASSERT_EQ(L2_FP16_GetDistFunc(dim), FP16_L2Sqr);
+        ASSERT_EQ(L2_INT8_GetDistFunc(dim), INT8_L2Sqr);
         ASSERT_EQ(IP_FP32_GetDistFunc(dim), FP32_InnerProduct);
         ASSERT_EQ(IP_BF16_GetDistFunc(dim), BF16_InnerProduct_LittleEndian);
         ASSERT_EQ(IP_FP16_GetDistFunc(dim), FP16_InnerProduct);
@@ -307,11 +313,14 @@ TEST_F(SpacesTest, smallDimChooser) {
     for (size_t dim = 16; dim < 32; dim++) {
         ASSERT_EQ(L2_BF16_GetDistFunc(dim), BF16_L2Sqr_LittleEndian);
         ASSERT_EQ(L2_FP16_GetDistFunc(dim), FP16_L2Sqr);
+        ASSERT_EQ(L2_INT8_GetDistFunc(dim), INT8_L2Sqr);
         ASSERT_EQ(IP_BF16_GetDistFunc(dim), BF16_InnerProduct_LittleEndian);
         ASSERT_EQ(IP_FP16_GetDistFunc(dim), FP16_InnerProduct);
     }
 }
 
+/* ======================== Test SIMD Functions ======================== */
+
 // In this following tests we assume that compiler supports all X86 optimizations, so if we have
 // some hardware flag enabled, we check that the corresponding optimization function was chosen.
 #ifdef CPU_FEATURES_ARCH_X86_64
@@ -899,4 +908,44 @@ INSTANTIATE_TEST_SUITE_P(, FP16SpacesOptimizationTestAdvanced,
 
 #endif
 
+class INT8SpacesOptimizationTest : public testing::TestWithParam<size_t> {};
+
+TEST_P(INT8SpacesOptimizationTest, INT8L2SqrTest) {
+    auto optimization = cpu_features::GetX86Info().features;
+    size_t dim = GetParam();
+    auto v1 = test_utils::create_int8_vec(dim);
+    auto v2 = test_utils::create_int8_vec(dim);
+
+    auto expected_alignment = [](size_t reg_bit_size, size_t dim) {
+        size_t elements_in_reg = reg_bit_size / sizeof(int8_t) / 8;
+        return (dim % elements_in_reg == 0) ? elements_in_reg * sizeof(int8_t) : 0;
+    };
+
+    dist_func_t<float> arch_opt_func;
+    float baseline = INT8_L2Sqr(v1.data(), v2.data(), dim);
+#ifdef OPT_AVX512_F_BW_VL_VNNI
+    if (optimization.avx512f && optimization.avx512bw && optimization.avx512vl &&
+        optimization.avx512vnni) {
+        unsigned char alignment = 0;
+        arch_opt_func = L2_INT8_GetDistFunc(dim, &alignment, &optimization);
+        ASSERT_EQ(arch_opt_func, Choose_INT8_L2_implementation_AVX512F_VW_CL_VNNI(dim))
+            << "Unexpected distance function chosen for dim " << dim;
+        ASSERT_EQ(baseline, arch_opt_func(v1.data(), v2.data(), dim)) << "AVX512 with dim " << dim;
+        ASSERT_EQ(alignment, expected_alignment(256, dim)) << "AVX512 with dim " << dim;
+        // Unset optimizations flag, so we'll choose the next optimization.
+        optimization.avx512f = optimization.avx512bw = optimization.avx512vl =
+            optimization.avx512vnni = 0;
+    }
+#endif
+    unsigned char alignment = 0;
+    arch_opt_func = L2_INT8_GetDistFunc(dim, &alignment, &optimization);
+    ASSERT_EQ(arch_opt_func, INT8_L2Sqr) << "Unexpected distance function chosen for dim " << dim;
+    ASSERT_EQ(baseline, arch_opt_func(v1.data(), v2.data(), dim))
+        << "No optimization with dim " << dim;
+    ASSERT_EQ(alignment, 0) << "No optimization with dim " << dim;
+}
+
+INSTANTIATE_TEST_SUITE_P(INT8OptFuncs, INT8SpacesOptimizationTest,
+                         testing::Range(32UL, 32 * 2UL + 1));
+
 #endif // CPU_FEATURES_ARCH_X86_64
diff --git a/tests/utils/tests_utils.h b/tests/utils/tests_utils.h
new file mode 100644
index 000000000..568fc1a49
--- /dev/null
+++ b/tests/utils/tests_utils.h
@@ -0,0 +1,24 @@
+#pragma once
+
+#include <random>
+#include <vector>
+
+namespace test_utils {
+
+std::vector<int8_t> create_int8_vec(size_t dim) {
+
+    std::mt19937 gen(1234); // Mersenne Twister engine initialized with the fixed seed
+
+    // uniform_int_distribution doesn't support int8,
+    // Define a distribution range for int8_t
+    std::uniform_int_distribution<int16_t> dis(-128, 127);
+
+    std::vector<int8_t> vec(dim);
+    for (auto &num : vec) {
+        num = static_cast<int8_t>(dis(gen));
+    }
+
+    return vec;
+}
+
+} // namespace test_utils

From a7a556f43430ebba03121c7cf753464472150ed3 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Sun, 8 Dec 2024 11:23:30 +0200
Subject: [PATCH 05/33] imp space bm for int8

change INITIALIZE_BENCHMARKS_SET to INITIALIZE_BENCHMARKS_SET_L2_IP
introduce INITIALIZE_BENCHMARKS_SET_COSINE
fix typos in Choose_INT8_L2_implementation_AVX512F_BW_VL_VNNI name
---
 src/VecSim/spaces/L2_space.cpp                |  2 +-
 .../spaces/functions/AVX512F_BW_VL_VNNI.cpp   |  2 +-
 .../spaces/functions/AVX512F_BW_VL_VNNI.h     |  2 +-
 tests/benchmark/spaces_benchmarks/bm_spaces.h |  6 ++
 .../spaces_benchmarks/bm_spaces_bf16.cpp      |  8 +--
 .../spaces_benchmarks/bm_spaces_fp16.cpp      |  8 +--
 .../spaces_benchmarks/bm_spaces_fp32.cpp      |  6 +-
 .../spaces_benchmarks/bm_spaces_fp64.cpp      |  6 +-
 .../spaces_benchmarks/bm_spaces_int8.cpp      | 56 +++++++++++++++++++
 tests/unit/test_spaces.cpp                    |  2 +-
 10 files changed, 80 insertions(+), 18 deletions(-)
 create mode 100644 tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp

diff --git a/src/VecSim/spaces/L2_space.cpp b/src/VecSim/spaces/L2_space.cpp
index 3ae927224..c0bec428f 100644
--- a/src/VecSim/spaces/L2_space.cpp
+++ b/src/VecSim/spaces/L2_space.cpp
@@ -209,7 +209,7 @@ dist_func_t<float> L2_INT8_GetDistFunc(size_t dim, unsigned char *alignment, con
     if (features.avx512f && features.avx512bw && features.avx512vl && features.avx512vnni) {
         if (dim % 32 == 0) // no point in aligning if we have an offsetting residual
             *alignment = 32 * sizeof(int8_t); // align to 256 bits.
-        return Choose_INT8_L2_implementation_AVX512F_VW_CL_VNNI(dim);
+        return Choose_INT8_L2_implementation_AVX512F_BW_VL_VNNI(dim);
     }
 #endif
 #endif // __x86_64__
diff --git a/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.cpp b/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.cpp
index d906a5775..d82d4141d 100644
--- a/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.cpp
+++ b/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.cpp
@@ -12,7 +12,7 @@ namespace spaces {
 
 #include "implementation_chooser.h"
 
-dist_func_t<float> Choose_INT8_L2_implementation_AVX512F_VW_CL_VNNI(size_t dim) {
+dist_func_t<float> Choose_INT8_L2_implementation_AVX512F_BW_VL_VNNI(size_t dim) {
     dist_func_t<float> ret_dist_func;
     CHOOSE_IMPLEMENTATION(ret_dist_func, dim, 32, INT8_L2SqrSIMD32_AVX512F_BW_VL_VNNI);
     return ret_dist_func;
diff --git a/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.h b/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.h
index c1f32ff10..818b9529f 100644
--- a/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.h
+++ b/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.h
@@ -10,6 +10,6 @@
 
 namespace spaces {
 
-dist_func_t<float> Choose_INT8_L2_implementation_AVX512F_VW_CL_VNNI(size_t dim);
+dist_func_t<float> Choose_INT8_L2_implementation_AVX512F_BW_VL_VNNI(size_t dim);
 
 } // namespace spaces
diff --git a/tests/benchmark/spaces_benchmarks/bm_spaces.h b/tests/benchmark/spaces_benchmarks/bm_spaces.h
index 3b55a9032..8b42ac030 100644
--- a/tests/benchmark/spaces_benchmarks/bm_spaces.h
+++ b/tests/benchmark/spaces_benchmarks/bm_spaces.h
@@ -123,6 +123,12 @@ static constexpr size_t start = min_no_res_th_dim;
     INITIALIZE_EXACT_512BIT_BM(bm_class, type_prefix, arch, IP, dim_opt, arch_supported);          \
     INITIALIZE_RESIDUAL_BM(bm_class, type_prefix, arch, IP, dim_opt, arch_supported);
 
+#define INITIALIZE_BENCHMARKS_SET_COSINE(bm_class, type_prefix, arch, dim_opt, arch_supported)     \
+    INITIALIZE_HIGH_DIM(bm_class, type_prefix, arch, COSINE, arch_supported);                      \
+    INITIALIZE_LOW_DIM(bm_class, type_prefix, arch, COSINE, arch_supported);                       \
+    INITIALIZE_EXACT_512BIT_BM(bm_class, type_prefix, arch, COSINE, dim_opt, arch_supported);      \
+    INITIALIZE_RESIDUAL_BM(bm_class, type_prefix, arch, COSINE, dim_opt, arch_supported);
+
 #define INITIALIZE_BENCHMARKS_SET(bm_class, type_prefix, arch, dim_opt, arch_supported)            \
     INITIALIZE_BENCHMARKS_SET_L2(bm_class, type_prefix, arch, dim_opt, arch_supported)             \
     INITIALIZE_BENCHMARKS_SET_IP(bm_class, type_prefix, arch, dim_opt, arch_supported)
diff --git a/tests/benchmark/spaces_benchmarks/bm_spaces_bf16.cpp b/tests/benchmark/spaces_benchmarks/bm_spaces_bf16.cpp
index 8022c712a..27fe82a3d 100644
--- a/tests/benchmark/spaces_benchmarks/bm_spaces_bf16.cpp
+++ b/tests/benchmark/spaces_benchmarks/bm_spaces_bf16.cpp
@@ -26,20 +26,20 @@ INITIALIZE_BENCHMARKS_SET_IP(BM_VecSimSpaces_BF16, BF16, AVX512BF16_VL, 32,
 // AVX512 functions
 #ifdef OPT_AVX512_BW_VBMI2
 bool avx512_bw_vbmi2_supported = opt.avx512bw && opt.avx512vbmi2;
-INITIALIZE_BENCHMARKS_SET(BM_VecSimSpaces_BF16, BF16, AVX512BW_VBMI2, 32,
-                          avx512_bw_vbmi2_supported);
+INITIALIZE_BENCHMARKS_SET_L2_IP(BM_VecSimSpaces_BF16, BF16, AVX512BW_VBMI2, 32,
+                                avx512_bw_vbmi2_supported);
 #endif // AVX512F
 
 // AVX functions
 #ifdef OPT_AVX2
 bool avx2_supported = opt.avx2;
-INITIALIZE_BENCHMARKS_SET(BM_VecSimSpaces_BF16, BF16, AVX2, 32, avx2_supported);
+INITIALIZE_BENCHMARKS_SET_L2_IP(BM_VecSimSpaces_BF16, BF16, AVX2, 32, avx2_supported);
 #endif // AVX
 
 // SSE functions
 #ifdef OPT_SSE3
 bool sse3_supported = opt.sse3;
-INITIALIZE_BENCHMARKS_SET(BM_VecSimSpaces_BF16, BF16, SSE3, 32, sse3_supported);
+INITIALIZE_BENCHMARKS_SET_L2_IP(BM_VecSimSpaces_BF16, BF16, SSE3, 32, sse3_supported);
 #endif // SSE
 
 #endif // x86_64
diff --git a/tests/benchmark/spaces_benchmarks/bm_spaces_fp16.cpp b/tests/benchmark/spaces_benchmarks/bm_spaces_fp16.cpp
index c9bc42b0b..9457bc77d 100644
--- a/tests/benchmark/spaces_benchmarks/bm_spaces_fp16.cpp
+++ b/tests/benchmark/spaces_benchmarks/bm_spaces_fp16.cpp
@@ -22,8 +22,8 @@ cpu_features::X86Features opt = cpu_features::GetX86Info().features;
 class BM_VecSimSpaces_FP16_adv : public BM_VecSimSpaces<_Float16> {};
 
 bool avx512fp16_vl_supported = opt.avx512_fp16 && opt.avx512vl;
-INITIALIZE_BENCHMARKS_SET(BM_VecSimSpaces_FP16_adv, FP16, AVX512FP16_VL, 32,
-                          avx512fp16_vl_supported);
+INITIALIZE_BENCHMARKS_SET_L2_IP(BM_VecSimSpaces_FP16_adv, FP16, AVX512FP16_VL, 32,
+                                avx512fp16_vl_supported);
 
 INITIALIZE_NAIVE_BM(BM_VecSimSpaces_FP16_adv, FP16, InnerProduct, 32);
 INITIALIZE_NAIVE_BM(BM_VecSimSpaces_FP16_adv, FP16, L2Sqr, 32);
@@ -32,12 +32,12 @@ INITIALIZE_NAIVE_BM(BM_VecSimSpaces_FP16_adv, FP16, L2Sqr, 32);
 // OPT_AVX512F functions
 #ifdef OPT_AVX512F
 bool avx512f_supported = opt.avx512f;
-INITIALIZE_BENCHMARKS_SET(BM_VecSimSpaces_FP16, FP16, AVX512F, 32, avx512f_supported);
+INITIALIZE_BENCHMARKS_SET_L2_IP(BM_VecSimSpaces_FP16, FP16, AVX512F, 32, avx512f_supported);
 #endif // OPT_AVX512F
 // AVX functions
 #ifdef OPT_F16C
 bool avx512_bw_f16c_supported = opt.f16c && opt.fma3 && opt.avx;
-INITIALIZE_BENCHMARKS_SET(BM_VecSimSpaces_FP16, FP16, F16C, 32, avx512_bw_f16c_supported);
+INITIALIZE_BENCHMARKS_SET_L2_IP(BM_VecSimSpaces_FP16, FP16, F16C, 32, avx512_bw_f16c_supported);
 #endif // OPT_F16C
 
 #endif // x86_64
diff --git a/tests/benchmark/spaces_benchmarks/bm_spaces_fp32.cpp b/tests/benchmark/spaces_benchmarks/bm_spaces_fp32.cpp
index 289e42405..106b2abc8 100644
--- a/tests/benchmark/spaces_benchmarks/bm_spaces_fp32.cpp
+++ b/tests/benchmark/spaces_benchmarks/bm_spaces_fp32.cpp
@@ -13,19 +13,19 @@ cpu_features::X86Features opt = cpu_features::GetX86Info().features;
 // AVX512 functions
 #ifdef OPT_AVX512F
 bool avx512f_supported = opt.avx512f;
-INITIALIZE_BENCHMARKS_SET(BM_VecSimSpaces_FP32, FP32, AVX512F, 16, avx512f_supported);
+INITIALIZE_BENCHMARKS_SET_L2_IP(BM_VecSimSpaces_FP32, FP32, AVX512F, 16, avx512f_supported);
 #endif // AVX512F
 
 // AVX functions
 #ifdef OPT_AVX
 bool avx_supported = opt.avx;
-INITIALIZE_BENCHMARKS_SET(BM_VecSimSpaces_FP32, FP32, AVX, 16, avx_supported);
+INITIALIZE_BENCHMARKS_SET_L2_IP(BM_VecSimSpaces_FP32, FP32, AVX, 16, avx_supported);
 #endif // AVX
 
 // SSE functions
 #ifdef OPT_SSE
 bool sse_supported = opt.sse;
-INITIALIZE_BENCHMARKS_SET(BM_VecSimSpaces_FP32, FP32, SSE, 16, sse_supported);
+INITIALIZE_BENCHMARKS_SET_L2_IP(BM_VecSimSpaces_FP32, FP32, SSE, 16, sse_supported);
 #endif // SSE
 
 #endif // x86_64
diff --git a/tests/benchmark/spaces_benchmarks/bm_spaces_fp64.cpp b/tests/benchmark/spaces_benchmarks/bm_spaces_fp64.cpp
index 19157f03f..01052cebc 100644
--- a/tests/benchmark/spaces_benchmarks/bm_spaces_fp64.cpp
+++ b/tests/benchmark/spaces_benchmarks/bm_spaces_fp64.cpp
@@ -13,19 +13,19 @@ cpu_features::X86Features opt = cpu_features::GetX86Info().features;
 // AVX512 functions
 #ifdef OPT_AVX512F
 bool avx512f_supported = opt.avx512f;
-INITIALIZE_BENCHMARKS_SET(BM_VecSimSpaces_FP64, FP64, AVX512F, 8, avx512f_supported);
+INITIALIZE_BENCHMARKS_SET_L2_IP(BM_VecSimSpaces_FP64, FP64, AVX512F, 8, avx512f_supported);
 #endif // AVX512F
 
 // AVX functions
 #ifdef OPT_AVX
 bool avx_supported = opt.avx;
-INITIALIZE_BENCHMARKS_SET(BM_VecSimSpaces_FP64, FP64, AVX, 8, avx_supported);
+INITIALIZE_BENCHMARKS_SET_L2_IP(BM_VecSimSpaces_FP64, FP64, AVX, 8, avx_supported);
 #endif // AVX
 
 // SSE functions
 #ifdef OPT_SSE
 bool sse_supported = opt.sse;
-INITIALIZE_BENCHMARKS_SET(BM_VecSimSpaces_FP64, FP64, SSE, 8, sse_supported);
+INITIALIZE_BENCHMARKS_SET_L2_IP(BM_VecSimSpaces_FP64, FP64, SSE, 8, sse_supported);
 #endif // SSE
 
 #endif // x86_64
diff --git a/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp b/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp
new file mode 100644
index 000000000..def14a8bd
--- /dev/null
+++ b/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp
@@ -0,0 +1,56 @@
+/*
+ *Copyright Redis Ltd. 2021 - present
+ *Licensed under your choice of the Redis Source Available License 2.0 (RSALv2) or
+ *the Server Side Public License v1 (SSPLv1).
+ */
+#include <benchmark/benchmark.h>
+#include <random>
+#include "../../utils/tests_utils.h"
+#include "bm_spaces.h"
+
+class BM_VecSimSpaces_Integers_INT8 : public benchmark::Fixture {
+protected:
+    std::mt19937 rng;
+    size_t dim;
+    int8_t *v1, *v2;
+
+public:
+    BM_VecSimSpaces_Integers_INT8() { rng.seed(47); }
+    ~BM_VecSimSpaces_Integers_INT8() = default;
+
+    void SetUp(const ::benchmark::State &state) {
+        dim = state.range(0);
+        v1 = new int8_t[dim];
+        v2 = new int8_t[dim];
+
+        // random for int8_t and uint8_t is not provided by the standard library
+        memcpy(v1, test_utils::create_int8_vec(dim).data(), dim);
+        memcpy(v2, test_utils::create_int8_vec(dim).data(), dim);
+    }
+    void TearDown(const ::benchmark::State &state) {
+        delete v1;
+        delete v2;
+    }
+};
+
+
+#ifdef CPU_FEATURES_ARCH_X86_64
+cpu_features::X86Features opt = cpu_features::GetX86Info().features;
+
+// AVX512_BF16 functions
+#ifdef OPT_AVX512_F_BW_VL_VNNI
+bool avx512_f_bw_vl_vnni_supported = opt.avx512f && opt.avx512bw && opt.avx512vl && opt.avx512vnni;
+INITIALIZE_BENCHMARKS_SET_L2(BM_VecSimSpaces_Integers_INT8, INT8, AVX512F_BW_VL_VNNI, 32,
+                             avx512_f_bw_vl_vnni_supported);
+// INITIALIZE_BENCHMARKS_SET_L2_IP(BM_VecSimSpaces_Integers_INT8, INT8, AVX512_F_BW_VL_VNNI, 32,
+//                              avx512_f_bw_vl_vnni_supported);
+// INITIALIZE_BENCHMARKS_SET_COSINE(BM_VecSimSpaces_Integers_INT8, INT8, AVX512_F_BW_VL_VNNI, 32,
+//                              avx512_f_bw_vl_vnni_supported)
+#endif // AVX512_BF16
+
+
+#endif // x86_64
+
+INITIALIZE_NAIVE_BM(BM_VecSimSpaces_Integers_INT8, INT8, InnerProduct, 32);
+INITIALIZE_NAIVE_BM(BM_VecSimSpaces_Integers_INT8, INT8, L2Sqr, 32);
+BENCHMARK_MAIN();
diff --git a/tests/unit/test_spaces.cpp b/tests/unit/test_spaces.cpp
index c9e8d68b8..2968b1c9f 100644
--- a/tests/unit/test_spaces.cpp
+++ b/tests/unit/test_spaces.cpp
@@ -928,7 +928,7 @@ TEST_P(INT8SpacesOptimizationTest, INT8L2SqrTest) {
         optimization.avx512vnni) {
         unsigned char alignment = 0;
         arch_opt_func = L2_INT8_GetDistFunc(dim, &alignment, &optimization);
-        ASSERT_EQ(arch_opt_func, Choose_INT8_L2_implementation_AVX512F_VW_CL_VNNI(dim))
+        ASSERT_EQ(arch_opt_func, Choose_INT8_L2_implementation_AVX512F_BW_VL_VNNI(dim))
             << "Unexpected distance function chosen for dim " << dim;
         ASSERT_EQ(baseline, arch_opt_func(v1.data(), v2.data(), dim)) << "AVX512 with dim " << dim;
         ASSERT_EQ(alignment, expected_alignment(256, dim)) << "AVX512 with dim " << dim;

From 43064e865041ef58ffb3c272d5c0998c8ea9559c Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Sun, 8 Dec 2024 09:26:01 +0000
Subject: [PATCH 06/33] fix INITIALIZE_BENCHMARKS_SET_L2_IP and add include to
 F_BW_VL_VNNI

---
 tests/benchmark/spaces_benchmarks/bm_spaces.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/benchmark/spaces_benchmarks/bm_spaces.h b/tests/benchmark/spaces_benchmarks/bm_spaces.h
index 8b42ac030..86cb45553 100644
--- a/tests/benchmark/spaces_benchmarks/bm_spaces.h
+++ b/tests/benchmark/spaces_benchmarks/bm_spaces.h
@@ -19,6 +19,7 @@
 #include "VecSim/spaces/functions/AVX.h"
 #include "VecSim/spaces/functions/AVX512BW_VBMI2.h"
 #include "VecSim/spaces/functions/AVX512BF16_VL.h"
+#include "VecSim/spaces/functions/AVX512F_BW_VL_VNNI.h"
 #include "VecSim/spaces/functions/AVX2.h"
 #include "VecSim/spaces/functions/F16C.h"
 #include "VecSim/spaces/functions/SSE3.h"
@@ -129,6 +130,6 @@ static constexpr size_t start = min_no_res_th_dim;
     INITIALIZE_EXACT_512BIT_BM(bm_class, type_prefix, arch, COSINE, dim_opt, arch_supported);      \
     INITIALIZE_RESIDUAL_BM(bm_class, type_prefix, arch, COSINE, dim_opt, arch_supported);
 
-#define INITIALIZE_BENCHMARKS_SET(bm_class, type_prefix, arch, dim_opt, arch_supported)            \
+#define INITIALIZE_BENCHMARKS_SET_L2_IP(bm_class, type_prefix, arch, dim_opt, arch_supported)            \
     INITIALIZE_BENCHMARKS_SET_L2(bm_class, type_prefix, arch, dim_opt, arch_supported)             \
     INITIALIZE_BENCHMARKS_SET_IP(bm_class, type_prefix, arch, dim_opt, arch_supported)

From fb9f1ccf7064702b4e0b08b6633336223bbe4524 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Sun, 8 Dec 2024 09:47:20 +0000
Subject: [PATCH 07/33] rename unit/test_utuils to unit_test_utils

---
 tests/benchmark/CMakeLists.txt                     |  2 +-
 tests/benchmark/benchmarks.sh                      | 10 ++++++++++
 .../benchmark/spaces_benchmarks/bm_spaces_int8.cpp |  3 ++-
 tests/unit/CMakeLists.txt                          | 14 +++++++-------
 tests/unit/test_allocator.cpp                      |  4 ++--
 tests/unit/test_bf16.cpp                           |  2 +-
 tests/unit/test_bruteforce.cpp                     |  4 ++--
 tests/unit/test_bruteforce_multi.cpp               |  4 ++--
 tests/unit/test_common.cpp                         |  4 ++--
 tests/unit/test_fp16.cpp                           |  2 +-
 tests/unit/test_hnsw.cpp                           |  4 ++--
 tests/unit/test_hnsw_multi.cpp                     |  4 ++--
 tests/unit/test_hnsw_parallel.cpp                  |  4 ++--
 tests/unit/test_hnsw_tiered.cpp                    |  2 +-
 tests/unit/test_spaces.cpp                         |  2 +-
 tests/unit/{test_utils.cpp => unit_test_utils.cpp} |  2 +-
 tests/unit/{test_utils.h => unit_test_utils.h}     |  0
 17 files changed, 39 insertions(+), 28 deletions(-)
 rename tests/unit/{test_utils.cpp => unit_test_utils.cpp} (99%)
 rename tests/unit/{test_utils.h => unit_test_utils.h} (100%)

diff --git a/tests/benchmark/CMakeLists.txt b/tests/benchmark/CMakeLists.txt
index 4d25a5499..2fa066e82 100644
--- a/tests/benchmark/CMakeLists.txt
+++ b/tests/benchmark/CMakeLists.txt
@@ -31,7 +31,7 @@ endforeach()
 
 include(${root}/cmake/x86_64InstructionFlags.cmake)
 
-set(DATA_TYPE fp32 fp64 bf16 fp16)
+set(DATA_TYPE fp32 fp64 bf16 fp16 int8)
 foreach(data_type IN LISTS DATA_TYPE)
 	add_executable(bm_spaces_${data_type} spaces_benchmarks/bm_spaces_${data_type}.cpp)
 	target_link_libraries(bm_spaces_${data_type} VectorSimilarity benchmark::benchmark)
diff --git a/tests/benchmark/benchmarks.sh b/tests/benchmark/benchmarks.sh
index 11872e869..2e6664424 100755
--- a/tests/benchmark/benchmarks.sh
+++ b/tests/benchmark/benchmarks.sh
@@ -66,4 +66,14 @@ elif [ "$BM_TYPE" = "bm-spaces" ] ; then
     echo spaces_fp16
     echo spaces_fp64
     echo spaces_bf16
+elif [ "$BM_TYPE" = "bm-spaces-fp32" ] ; then
+    echo spaces_fp32
+elif [ "$BM_TYPE" = "bm-spaces-fp64" ] ; then
+    echo spaces_fp64
+elif [ "$BM_TYPE" = "bm-spaces-bf16" ] ; then
+    echo spaces_bf16
+elif [ "$BM_TYPE" = "bm-spaces-fp16" ] ; then
+    echo spaces_fp16
+elif [ "$BM_TYPE" = "bm-spaces-int8" ] ; then
+    echo spaces_int8
 fi
diff --git a/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp b/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp
index def14a8bd..8cb323043 100644
--- a/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp
+++ b/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp
@@ -5,7 +5,8 @@
  */
 #include <benchmark/benchmark.h>
 #include <random>
-#include "../../utils/tests_utils.h"
+#include <cstring>
+#include "utils/tests_utils.h"
 #include "bm_spaces.h"
 
 class BM_VecSimSpaces_Integers_INT8 : public benchmark::Fixture {
diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt
index b16bddac6..caa3fc522 100644
--- a/tests/unit/CMakeLists.txt
+++ b/tests/unit/CMakeLists.txt
@@ -30,15 +30,15 @@ endif()
 
 include(${root}/cmake/x86_64InstructionFlags.cmake)
 
-add_executable(test_hnsw ../utils/mock_thread_pool.cpp test_hnsw.cpp test_hnsw_multi.cpp test_hnsw_tiered.cpp test_utils.cpp)
-add_executable(test_hnsw_parallel test_hnsw_parallel.cpp test_utils.cpp)
-add_executable(test_bruteforce test_bruteforce.cpp test_bruteforce_multi.cpp test_utils.cpp)
-add_executable(test_allocator test_allocator.cpp test_utils.cpp)
+add_executable(test_hnsw ../utils/mock_thread_pool.cpp test_hnsw.cpp test_hnsw_multi.cpp test_hnsw_tiered.cpp unit_test_utils.cpp)
+add_executable(test_hnsw_parallel test_hnsw_parallel.cpp unit_test_utils.cpp)
+add_executable(test_bruteforce test_bruteforce.cpp test_bruteforce_multi.cpp unit_test_utils.cpp)
+add_executable(test_allocator test_allocator.cpp unit_test_utils.cpp)
 add_executable(test_spaces test_spaces.cpp)
 add_executable(test_types test_types.cpp)
-add_executable(test_common ../utils/mock_thread_pool.cpp test_utils.cpp test_common.cpp)
-add_executable(test_bf16 ../utils/mock_thread_pool.cpp test_bf16.cpp test_utils.cpp)
-add_executable(test_fp16 ../utils/mock_thread_pool.cpp test_fp16.cpp test_utils.cpp)
+add_executable(test_common ../utils/mock_thread_pool.cpp unit_test_utils.cpp test_common.cpp)
+add_executable(test_bf16 ../utils/mock_thread_pool.cpp test_bf16.cpp unit_test_utils.cpp)
+add_executable(test_fp16 ../utils/mock_thread_pool.cpp test_fp16.cpp unit_test_utils.cpp)
 
 target_link_libraries(test_hnsw PUBLIC gtest_main VectorSimilarity)
 target_link_libraries(test_hnsw_parallel PUBLIC gtest_main VectorSimilarity)
diff --git a/tests/unit/test_allocator.cpp b/tests/unit/test_allocator.cpp
index 4eb389260..03f0b4e52 100644
--- a/tests/unit/test_allocator.cpp
+++ b/tests/unit/test_allocator.cpp
@@ -10,7 +10,7 @@
 #include "VecSim/memory/vecsim_base.h"
 #include "VecSim/algorithms/brute_force/brute_force_single.h"
 #include "VecSim/algorithms/hnsw/hnsw_single.h"
-#include "test_utils.h"
+#include "unit_test_utils.h"
 #include "VecSim/utils/serializer.h"
 #include "VecSim/index_factories/hnsw_factory.h"
 
@@ -83,7 +83,7 @@ TEST_F(AllocatorTest, test_nested_object) {
 template <typename index_type_t>
 class IndexAllocatorTest : public ::testing::Test {};
 
-// DataTypeSet, TEST_DATA_T and TEST_DIST_T are defined in test_utils.h
+// DataTypeSet, TEST_DATA_T and TEST_DIST_T are defined in unit_test_utils.h
 
 TYPED_TEST_SUITE(IndexAllocatorTest, DataTypeSet);
 
diff --git a/tests/unit/test_bf16.cpp b/tests/unit/test_bf16.cpp
index 921c80c35..95e12c98b 100644
--- a/tests/unit/test_bf16.cpp
+++ b/tests/unit/test_bf16.cpp
@@ -2,7 +2,7 @@
 #include "VecSim/vec_sim.h"
 #include "VecSim/algorithms/hnsw/hnsw_single.h"
 #include "VecSim/index_factories/hnsw_factory.h"
-#include "test_utils.h"
+#include "unit_test_utils.h"
 #include "VecSim/utils/serializer.h"
 #include "mock_thread_pool.h"
 #include "VecSim/query_result_definitions.h"
diff --git a/tests/unit/test_bruteforce.cpp b/tests/unit/test_bruteforce.cpp
index c56415e3d..b3d5b1192 100644
--- a/tests/unit/test_bruteforce.cpp
+++ b/tests/unit/test_bruteforce.cpp
@@ -6,7 +6,7 @@
 
 #include "gtest/gtest.h"
 #include "VecSim/vec_sim.h"
-#include "test_utils.h"
+#include "unit_test_utils.h"
 #include "VecSim/algorithms/brute_force/brute_force.h"
 #include "VecSim/algorithms/brute_force/brute_force_single.h"
 #include "cpu_features_macros.h"
@@ -32,7 +32,7 @@ class BruteForceTest : public ::testing::Test {
     }
 };
 
-// DataTypeSet, TEST_DATA_T and TEST_DIST_T are defined in test_utils.h
+// DataTypeSet, TEST_DATA_T and TEST_DIST_T are defined in unit_test_utils.h
 
 TYPED_TEST_SUITE(BruteForceTest, DataTypeSet);
 
diff --git a/tests/unit/test_bruteforce_multi.cpp b/tests/unit/test_bruteforce_multi.cpp
index ef9cfc636..55aadedd4 100644
--- a/tests/unit/test_bruteforce_multi.cpp
+++ b/tests/unit/test_bruteforce_multi.cpp
@@ -6,7 +6,7 @@
 
 #include "gtest/gtest.h"
 #include "VecSim/vec_sim.h"
-#include "test_utils.h"
+#include "unit_test_utils.h"
 #include "VecSim/algorithms/brute_force/brute_force_multi.h"
 #include <cmath>
 
@@ -27,7 +27,7 @@ class BruteForceMultiTest : public ::testing::Test {
     }
 };
 
-// DataTypeSet, TEST_DATA_T and TEST_DIST_T are defined in test_utils.h
+// DataTypeSet, TEST_DATA_T and TEST_DIST_T are defined in unit_test_utils.h
 
 TYPED_TEST_SUITE(BruteForceMultiTest, DataTypeSet);
 
diff --git a/tests/unit/test_common.cpp b/tests/unit/test_common.cpp
index 58df46fba..bdfd6d9f2 100644
--- a/tests/unit/test_common.cpp
+++ b/tests/unit/test_common.cpp
@@ -10,7 +10,7 @@
 #include "VecSim/query_result_definitions.h"
 #include "VecSim/utils/updatable_heap.h"
 #include "VecSim/utils/vec_utils.h"
-#include "test_utils.h"
+#include "unit_test_utils.h"
 #include "VecSim/containers/vecsim_results_container.h"
 #include "VecSim/algorithms/hnsw/hnsw.h"
 #include "VecSim/index_factories/hnsw_factory.h"
@@ -32,7 +32,7 @@ using float16 = vecsim_types::float16;
 template <typename index_type_t>
 class CommonIndexTest : public ::testing::Test {};
 
-// DataTypeSet are defined in test_utils.h
+// DataTypeSet are defined in unit_test_utils.h
 
 TYPED_TEST_SUITE(CommonIndexTest, DataTypeSet);
 
diff --git a/tests/unit/test_fp16.cpp b/tests/unit/test_fp16.cpp
index 377ef8f32..244bb9d0c 100644
--- a/tests/unit/test_fp16.cpp
+++ b/tests/unit/test_fp16.cpp
@@ -2,7 +2,7 @@
 #include "VecSim/vec_sim.h"
 #include "VecSim/algorithms/hnsw/hnsw_single.h"
 #include "VecSim/index_factories/hnsw_factory.h"
-#include "test_utils.h"
+#include "unit_test_utils.h"
 #include "VecSim/utils/serializer.h"
 #include "mock_thread_pool.h"
 #include "VecSim/query_result_definitions.h"
diff --git a/tests/unit/test_hnsw.cpp b/tests/unit/test_hnsw.cpp
index f57a6d3e3..cc400d48a 100644
--- a/tests/unit/test_hnsw.cpp
+++ b/tests/unit/test_hnsw.cpp
@@ -9,7 +9,7 @@
 #include "VecSim/vec_sim_debug.h"
 #include "VecSim/algorithms/hnsw/hnsw_single.h"
 #include "VecSim/index_factories/hnsw_factory.h"
-#include "test_utils.h"
+#include "unit_test_utils.h"
 #include "VecSim/utils/serializer.h"
 #include "VecSim/query_result_definitions.h"
 #include <unistd.h>
@@ -36,7 +36,7 @@ class HNSWTest : public ::testing::Test {
     }
 };
 
-// DataTypeSet, TEST_DATA_T and TEST_DIST_T are defined in test_utils.h
+// DataTypeSet, TEST_DATA_T and TEST_DIST_T are defined in unit_test_utils.h
 
 TYPED_TEST_SUITE(HNSWTest, DataTypeSet);
 
diff --git a/tests/unit/test_hnsw_multi.cpp b/tests/unit/test_hnsw_multi.cpp
index ba87f1759..026f96e62 100644
--- a/tests/unit/test_hnsw_multi.cpp
+++ b/tests/unit/test_hnsw_multi.cpp
@@ -6,7 +6,7 @@
 
 #include "gtest/gtest.h"
 #include "VecSim/vec_sim.h"
-#include "test_utils.h"
+#include "unit_test_utils.h"
 #include "VecSim/algorithms/hnsw/hnsw_multi.h"
 #include <cmath>
 #include <map>
@@ -31,7 +31,7 @@ class HNSWMultiTest : public ::testing::Test {
     }
 };
 
-// DataTypeSet, TEST_DATA_T and TEST_DIST_T are defined in test_utils.h
+// DataTypeSet, TEST_DATA_T and TEST_DIST_T are defined in unit_test_utils.h
 
 TYPED_TEST_SUITE(HNSWMultiTest, DataTypeSet);
 
diff --git a/tests/unit/test_hnsw_parallel.cpp b/tests/unit/test_hnsw_parallel.cpp
index a2d4827ca..0354a6af1 100644
--- a/tests/unit/test_hnsw_parallel.cpp
+++ b/tests/unit/test_hnsw_parallel.cpp
@@ -7,7 +7,7 @@
 #include "gtest/gtest.h"
 #include "VecSim/vec_sim.h"
 #include "VecSim/algorithms/hnsw/hnsw_single.h"
-#include "test_utils.h"
+#include "unit_test_utils.h"
 #include "VecSim/query_result_definitions.h"
 #include "VecSim/vec_sim_debug.h"
 #include <unistd.h>
@@ -124,7 +124,7 @@ class HNSWTestParallel : public ::testing::Test {
     void parallelInsertSearch(bool is_multi);
 };
 
-// DataTypeSet, TEST_DATA_T and TEST_DIST_T are defined in test_utils.h
+// DataTypeSet, TEST_DATA_T and TEST_DIST_T are defined in unit_test_utils.h
 
 TYPED_TEST_SUITE(HNSWTestParallel, DataTypeSet);
 
diff --git a/tests/unit/test_hnsw_tiered.cpp b/tests/unit/test_hnsw_tiered.cpp
index 4b1df107a..db751b792 100644
--- a/tests/unit/test_hnsw_tiered.cpp
+++ b/tests/unit/test_hnsw_tiered.cpp
@@ -6,7 +6,7 @@
 #include <string>
 #include <array>
 
-#include "test_utils.h"
+#include "unit_test_utils.h"
 #include "mock_thread_pool.h"
 
 #include <thread>
diff --git a/tests/unit/test_spaces.cpp b/tests/unit/test_spaces.cpp
index 2968b1c9f..4a546b6ca 100644
--- a/tests/unit/test_spaces.cpp
+++ b/tests/unit/test_spaces.cpp
@@ -29,7 +29,7 @@
 #include "VecSim/spaces/functions/AVX2.h"
 #include "VecSim/spaces/functions/SSE3.h"
 #include "VecSim/spaces/functions/F16C.h"
-#include "../utils/tests_utils.h"
+#include "tests_utils.h"
 
 using bfloat16 = vecsim_types::bfloat16;
 using float16 = vecsim_types::float16;
diff --git a/tests/unit/test_utils.cpp b/tests/unit/unit_test_utils.cpp
similarity index 99%
rename from tests/unit/test_utils.cpp
rename to tests/unit/unit_test_utils.cpp
index 7b99eba22..89973d19d 100644
--- a/tests/unit/test_utils.cpp
+++ b/tests/unit/unit_test_utils.cpp
@@ -4,7 +4,7 @@
  *the Server Side Public License v1 (SSPLv1).
  */
 
-#include "test_utils.h"
+#include "unit_test_utils.h"
 #include "gtest/gtest.h"
 #include "VecSim/utils/vec_utils.h"
 #include "VecSim/memory/vecsim_malloc.h"
diff --git a/tests/unit/test_utils.h b/tests/unit/unit_test_utils.h
similarity index 100%
rename from tests/unit/test_utils.h
rename to tests/unit/unit_test_utils.h

From 602f8e94362d966c1f9f54de11618c94f7222363 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Sun, 8 Dec 2024 11:22:32 +0000
Subject: [PATCH 08/33] seed create vec

---
 tests/benchmark/spaces_benchmarks/bm_spaces.h        | 2 +-
 tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp | 5 ++---
 tests/unit/test_spaces.cpp                           | 8 ++++----
 tests/utils/tests_utils.h                            | 4 ++--
 4 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/tests/benchmark/spaces_benchmarks/bm_spaces.h b/tests/benchmark/spaces_benchmarks/bm_spaces.h
index 86cb45553..909f4d6dd 100644
--- a/tests/benchmark/spaces_benchmarks/bm_spaces.h
+++ b/tests/benchmark/spaces_benchmarks/bm_spaces.h
@@ -130,6 +130,6 @@ static constexpr size_t start = min_no_res_th_dim;
     INITIALIZE_EXACT_512BIT_BM(bm_class, type_prefix, arch, COSINE, dim_opt, arch_supported);      \
     INITIALIZE_RESIDUAL_BM(bm_class, type_prefix, arch, COSINE, dim_opt, arch_supported);
 
-#define INITIALIZE_BENCHMARKS_SET_L2_IP(bm_class, type_prefix, arch, dim_opt, arch_supported)            \
+#define INITIALIZE_BENCHMARKS_SET_L2_IP(bm_class, type_prefix, arch, dim_opt, arch_supported)      \
     INITIALIZE_BENCHMARKS_SET_L2(bm_class, type_prefix, arch, dim_opt, arch_supported)             \
     INITIALIZE_BENCHMARKS_SET_IP(bm_class, type_prefix, arch, dim_opt, arch_supported)
diff --git a/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp b/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp
index 8cb323043..234550202 100644
--- a/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp
+++ b/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp
@@ -25,8 +25,8 @@ class BM_VecSimSpaces_Integers_INT8 : public benchmark::Fixture {
         v2 = new int8_t[dim];
 
         // random for int8_t and uint8_t is not provided by the standard library
-        memcpy(v1, test_utils::create_int8_vec(dim).data(), dim);
-        memcpy(v2, test_utils::create_int8_vec(dim).data(), dim);
+        memcpy(v1, test_utils::create_int8_vec(dim, 123).data(), dim);
+        memcpy(v2, test_utils::create_int8_vec(dim, 1234).data(), dim);
     }
     void TearDown(const ::benchmark::State &state) {
         delete v1;
@@ -34,7 +34,6 @@ class BM_VecSimSpaces_Integers_INT8 : public benchmark::Fixture {
     }
 };
 
-
 #ifdef CPU_FEATURES_ARCH_X86_64
 cpu_features::X86Features opt = cpu_features::GetX86Info().features;
 
diff --git a/tests/unit/test_spaces.cpp b/tests/unit/test_spaces.cpp
index 4a546b6ca..00f2a2d2f 100644
--- a/tests/unit/test_spaces.cpp
+++ b/tests/unit/test_spaces.cpp
@@ -913,8 +913,8 @@ class INT8SpacesOptimizationTest : public testing::TestWithParam<size_t> {};
 TEST_P(INT8SpacesOptimizationTest, INT8L2SqrTest) {
     auto optimization = cpu_features::GetX86Info().features;
     size_t dim = GetParam();
-    auto v1 = test_utils::create_int8_vec(dim);
-    auto v2 = test_utils::create_int8_vec(dim);
+    auto v1 = test_utils::create_int8_vec(dim, 123);
+    auto v2 = test_utils::create_int8_vec(dim, 1234);
 
     auto expected_alignment = [](size_t reg_bit_size, size_t dim) {
         size_t elements_in_reg = reg_bit_size / sizeof(int8_t) / 8;
@@ -931,7 +931,7 @@ TEST_P(INT8SpacesOptimizationTest, INT8L2SqrTest) {
         ASSERT_EQ(arch_opt_func, Choose_INT8_L2_implementation_AVX512F_BW_VL_VNNI(dim))
             << "Unexpected distance function chosen for dim " << dim;
         ASSERT_EQ(baseline, arch_opt_func(v1.data(), v2.data(), dim)) << "AVX512 with dim " << dim;
-        ASSERT_EQ(alignment, expected_alignment(256, dim)) << "AVX512 with dim " << dim;
+        ASSERT_EQ(alignment, expected_alignment(512, dim)) << "AVX512 with dim " << dim;
         // Unset optimizations flag, so we'll choose the next optimization.
         optimization.avx512f = optimization.avx512bw = optimization.avx512vl =
             optimization.avx512vnni = 0;
@@ -946,6 +946,6 @@ TEST_P(INT8SpacesOptimizationTest, INT8L2SqrTest) {
 }
 
 INSTANTIATE_TEST_SUITE_P(INT8OptFuncs, INT8SpacesOptimizationTest,
-                         testing::Range(32UL, 32 * 2UL + 1));
+                         testing::Range(64UL, 64 * 2UL + 1));
 
 #endif // CPU_FEATURES_ARCH_X86_64
diff --git a/tests/utils/tests_utils.h b/tests/utils/tests_utils.h
index 568fc1a49..0231a9838 100644
--- a/tests/utils/tests_utils.h
+++ b/tests/utils/tests_utils.h
@@ -5,9 +5,9 @@
 
 namespace test_utils {
 
-std::vector<int8_t> create_int8_vec(size_t dim) {
+static std::vector<int8_t> create_int8_vec(size_t dim, int seed = 1234) {
 
-    std::mt19937 gen(1234); // Mersenne Twister engine initialized with the fixed seed
+    std::mt19937 gen(seed); // Mersenne Twister engine initialized with the fixed seed
 
     // uniform_int_distribution doesn't support int8,
     // Define a distribution range for int8_t

From cde5e2d020910ce15e03fff6990bf2a70048acc6 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Sun, 8 Dec 2024 12:22:22 +0000
Subject: [PATCH 09/33] format

---
 tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp b/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp
index 234550202..96c02a44c 100644
--- a/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp
+++ b/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp
@@ -37,7 +37,7 @@ class BM_VecSimSpaces_Integers_INT8 : public benchmark::Fixture {
 #ifdef CPU_FEATURES_ARCH_X86_64
 cpu_features::X86Features opt = cpu_features::GetX86Info().features;
 
-// AVX512_BF16 functions
+// AVX512_F_BW_VL_VNNI functions
 #ifdef OPT_AVX512_F_BW_VL_VNNI
 bool avx512_f_bw_vl_vnni_supported = opt.avx512f && opt.avx512bw && opt.avx512vl && opt.avx512vnni;
 INITIALIZE_BENCHMARKS_SET_L2(BM_VecSimSpaces_Integers_INT8, INT8, AVX512F_BW_VL_VNNI, 32,
@@ -46,8 +46,7 @@ INITIALIZE_BENCHMARKS_SET_L2(BM_VecSimSpaces_Integers_INT8, INT8, AVX512F_BW_VL_
 //                              avx512_f_bw_vl_vnni_supported);
 // INITIALIZE_BENCHMARKS_SET_COSINE(BM_VecSimSpaces_Integers_INT8, INT8, AVX512_F_BW_VL_VNNI, 32,
 //                              avx512_f_bw_vl_vnni_supported)
-#endif // AVX512_BF16
-
+#endif // AVX512_F_BW_VL_VNNI
 
 #endif // x86_64
 

From cdb4d7f621513c70c5bdd00fafda2eb566ec014f Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Sun, 8 Dec 2024 12:46:29 +0000
Subject: [PATCH 10/33] implmenet IP + unit test

---
 .../spaces/IP/IP_AVX512F_BW_VL_VNNI_INT8.h    | 56 +++++++++++++++++++
 src/VecSim/spaces/IP_space.cpp                | 13 +++++
 .../spaces/functions/AVX512F_BW_VL_VNNI.cpp   |  6 ++
 .../spaces/functions/AVX512F_BW_VL_VNNI.h     |  1 +
 tests/unit/test_spaces.cpp                    | 43 +++++++++++++-
 5 files changed, 117 insertions(+), 2 deletions(-)
 create mode 100644 src/VecSim/spaces/IP/IP_AVX512F_BW_VL_VNNI_INT8.h

diff --git a/src/VecSim/spaces/IP/IP_AVX512F_BW_VL_VNNI_INT8.h b/src/VecSim/spaces/IP/IP_AVX512F_BW_VL_VNNI_INT8.h
new file mode 100644
index 000000000..a7b99fcb8
--- /dev/null
+++ b/src/VecSim/spaces/IP/IP_AVX512F_BW_VL_VNNI_INT8.h
@@ -0,0 +1,56 @@
+/*
+ *Copyright Redis Ltd. 2021 - present
+ *Licensed under your choice of the Redis Source Available License 2.0 (RSALv2) or
+ *the Server Side Public License v1 (SSPLv1).
+ */
+
+#include "VecSim/spaces/space_includes.h"
+
+static inline void InnerProductStep(int8_t *&pVect1, int8_t *&pVect2, __m512i &sum) {
+    __m256i temp_a = _mm256_loadu_epi8(pVect1);
+    __m512i va = _mm512_cvtepi8_epi16(temp_a);
+    pVect1 += 32;
+
+    __m256i temp_b = _mm256_loadu_epi8(pVect2);
+    __m512i vb = _mm512_cvtepi8_epi16(temp_b);
+    pVect2 += 32;
+
+    // _mm512_dpwssd_epi32(src, a, b)
+    // Multiply groups of 2 adjacent pairs of signed 16-bit integers in `a` with corresponding
+    // 16-bit integers in `b`, producing 2 intermediate signed 32-bit results. Sum these 2 results
+    // with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
+    sum = _mm512_dpwssd_epi32(sum, va, vb);
+}
+
+template <unsigned char residual> // 0..32
+float INT8_InnerProductSIMD32_AVX512F_BW_VL_VNNI(const void *pVect1v, const void *pVect2v,
+                                                 size_t dimension) {
+    int8_t *pVect1 = (int8_t *)pVect1v;
+    int8_t *pVect2 = (int8_t *)pVect2v;
+
+    const int8_t *pEnd1 = pVect1 + dimension;
+
+    __m512i sum = _mm512_setzero_epi32();
+
+    // Deal with remainder first. `dim` is more than 32, so we have at least one 32-int_8 block,
+    // so mask loading is guaranteed to be safe
+    if constexpr (residual) {
+        __mmask32 mask = (1LU << residual) - 1;
+        __m256i temp_a = _mm256_maskz_loadu_epi8(mask, pVect1);
+        __m512i va = _mm512_cvtepi8_epi16(temp_a);
+        pVect1 += residual;
+
+        __m256i temp_b = _mm256_maskz_loadu_epi8(mask, pVect2);
+        __m512i vb = _mm512_cvtepi8_epi16(temp_b);
+        pVect2 += residual;
+
+        sum = _mm512_dpwssd_epi32(sum, va, vb);
+    }
+
+    // We dealt with the residual part. We are left with some multiple of 32-int_8.
+    do {
+        InnerProductStep(pVect1, pVect2, sum);
+    } while (pVect1 < pEnd1);
+
+    return 1.0f - float(_mm512_reduce_add_epi32(sum));
+}
diff --git a/src/VecSim/spaces/IP_space.cpp b/src/VecSim/spaces/IP_space.cpp
index 699919dc2..0cebb3bfb 100644
--- a/src/VecSim/spaces/IP_space.cpp
+++ b/src/VecSim/spaces/IP_space.cpp
@@ -16,6 +16,7 @@
 #include "VecSim/spaces/functions/AVX512BW_VBMI2.h"
 #include "VecSim/spaces/functions/AVX512FP16_VL.h"
 #include "VecSim/spaces/functions/AVX512BF16_VL.h"
+#include "VecSim/spaces/functions/AVX512F_BW_VL_VNNI.h"
 #include "VecSim/spaces/functions/AVX2.h"
 #include "VecSim/spaces/functions/SSE3.h"
 
@@ -207,6 +208,18 @@ dist_func_t<float> IP_INT8_GetDistFunc(size_t dim, unsigned char *alignment, con
     if (dim < 32) {
         return ret_dist_func;
     }
+#ifdef CPU_FEATURES_ARCH_X86_64
+    auto features = (arch_opt == nullptr)
+                        ? cpu_features::GetX86Info().features
+                        : *static_cast<const cpu_features::X86Features *>(arch_opt);
+#ifdef OPT_AVX512_F_BW_VL_VNNI
+    if (features.avx512f && features.avx512bw && features.avx512vl && features.avx512vnni) {
+        if (dim % 32 == 0) // no point in aligning if we have an offsetting residual
+            *alignment = 32 * sizeof(int8_t); // align to 256 bits.
+        return Choose_INT8_IP_implementation_AVX512F_BW_VL_VNNI(dim);
+    }
+#endif
+#endif // __x86_64__
     return ret_dist_func;
 }
 } // namespace spaces
diff --git a/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.cpp b/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.cpp
index d82d4141d..3d3da5546 100644
--- a/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.cpp
+++ b/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.cpp
@@ -7,6 +7,7 @@
 #include "AVX512BW_VBMI2.h"
 
 #include "VecSim/spaces/L2/L2_AVX512F_BW_VL_VNNI_INT8.h"
+#include "VecSim/spaces/IP/IP_AVX512F_BW_VL_VNNI_INT8.h"
 
 namespace spaces {
 
@@ -17,6 +18,11 @@ dist_func_t<float> Choose_INT8_L2_implementation_AVX512F_BW_VL_VNNI(size_t dim)
     CHOOSE_IMPLEMENTATION(ret_dist_func, dim, 32, INT8_L2SqrSIMD32_AVX512F_BW_VL_VNNI);
     return ret_dist_func;
 }
+dist_func_t<float> Choose_INT8_IP_implementation_AVX512F_BW_VL_VNNI(size_t dim) {
+    dist_func_t<float> ret_dist_func;
+    CHOOSE_IMPLEMENTATION(ret_dist_func, dim, 32, INT8_InnerProductSIMD32_AVX512F_BW_VL_VNNI);
+    return ret_dist_func;
+}
 
 #include "implementation_chooser_cleanup.h"
 
diff --git a/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.h b/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.h
index 818b9529f..c1ef5d6b8 100644
--- a/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.h
+++ b/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.h
@@ -11,5 +11,6 @@
 namespace spaces {
 
 dist_func_t<float> Choose_INT8_L2_implementation_AVX512F_BW_VL_VNNI(size_t dim);
+dist_func_t<float> Choose_INT8_IP_implementation_AVX512F_BW_VL_VNNI(size_t dim);
 
 } // namespace spaces
diff --git a/tests/unit/test_spaces.cpp b/tests/unit/test_spaces.cpp
index 00f2a2d2f..61e6f167c 100644
--- a/tests/unit/test_spaces.cpp
+++ b/tests/unit/test_spaces.cpp
@@ -300,6 +300,7 @@ TEST_F(SpacesTest, smallDimChooser) {
         ASSERT_EQ(IP_FP64_GetDistFunc(dim), FP64_InnerProduct);
         ASSERT_EQ(IP_BF16_GetDistFunc(dim), BF16_InnerProduct_LittleEndian);
         ASSERT_EQ(IP_FP16_GetDistFunc(dim), FP16_InnerProduct);
+        ASSERT_EQ(IP_INT8_GetDistFunc(dim), INT8_InnerProduct);
     }
     for (size_t dim = 8; dim < 16; dim++) {
         ASSERT_EQ(L2_FP32_GetDistFunc(dim), FP32_L2Sqr);
@@ -309,6 +310,7 @@ TEST_F(SpacesTest, smallDimChooser) {
         ASSERT_EQ(IP_FP32_GetDistFunc(dim), FP32_InnerProduct);
         ASSERT_EQ(IP_BF16_GetDistFunc(dim), BF16_InnerProduct_LittleEndian);
         ASSERT_EQ(IP_FP16_GetDistFunc(dim), FP16_InnerProduct);
+        ASSERT_EQ(IP_INT8_GetDistFunc(dim), INT8_InnerProduct);
     }
     for (size_t dim = 16; dim < 32; dim++) {
         ASSERT_EQ(L2_BF16_GetDistFunc(dim), BF16_L2Sqr_LittleEndian);
@@ -316,6 +318,7 @@ TEST_F(SpacesTest, smallDimChooser) {
         ASSERT_EQ(L2_INT8_GetDistFunc(dim), INT8_L2Sqr);
         ASSERT_EQ(IP_BF16_GetDistFunc(dim), BF16_InnerProduct_LittleEndian);
         ASSERT_EQ(IP_FP16_GetDistFunc(dim), FP16_InnerProduct);
+        ASSERT_EQ(IP_INT8_GetDistFunc(dim), INT8_InnerProduct);
     }
 }
 
@@ -931,7 +934,7 @@ TEST_P(INT8SpacesOptimizationTest, INT8L2SqrTest) {
         ASSERT_EQ(arch_opt_func, Choose_INT8_L2_implementation_AVX512F_BW_VL_VNNI(dim))
             << "Unexpected distance function chosen for dim " << dim;
         ASSERT_EQ(baseline, arch_opt_func(v1.data(), v2.data(), dim)) << "AVX512 with dim " << dim;
-        ASSERT_EQ(alignment, expected_alignment(512, dim)) << "AVX512 with dim " << dim;
+        ASSERT_EQ(alignment, expected_alignment(256, dim)) << "AVX512 with dim " << dim;
         // Unset optimizations flag, so we'll choose the next optimization.
         optimization.avx512f = optimization.avx512bw = optimization.avx512vl =
             optimization.avx512vnni = 0;
@@ -945,7 +948,43 @@ TEST_P(INT8SpacesOptimizationTest, INT8L2SqrTest) {
     ASSERT_EQ(alignment, 0) << "No optimization with dim " << dim;
 }
 
+TEST_P(INT8SpacesOptimizationTest, INT8InnerProductTest) {
+    auto optimization = cpu_features::GetX86Info().features;
+    size_t dim = GetParam();
+    auto v1 = test_utils::create_int8_vec(dim, 123);
+    auto v2 = test_utils::create_int8_vec(dim, 1234);
+
+    auto expected_alignment = [](size_t reg_bit_size, size_t dim) {
+        size_t elements_in_reg = reg_bit_size / sizeof(int8_t) / 8;
+        return (dim % elements_in_reg == 0) ? elements_in_reg * sizeof(int8_t) : 0;
+    };
+
+    dist_func_t<float> arch_opt_func;
+    float baseline = INT8_InnerProduct(v1.data(), v2.data(), dim);
+#ifdef OPT_AVX512_F_BW_VL_VNNI
+    if (optimization.avx512f && optimization.avx512bw && optimization.avx512vl &&
+        optimization.avx512vnni) {
+        unsigned char alignment = 0;
+        arch_opt_func = IP_INT8_GetDistFunc(dim, &alignment, &optimization);
+        ASSERT_EQ(arch_opt_func, Choose_INT8_IP_implementation_AVX512F_BW_VL_VNNI(dim))
+            << "Unexpected distance function chosen for dim " << dim;
+        ASSERT_EQ(baseline, arch_opt_func(v1.data(), v2.data(), dim)) << "AVX512 with dim " << dim;
+        ASSERT_EQ(alignment, expected_alignment(256, dim)) << "AVX512 with dim " << dim;
+        // Unset optimizations flag, so we'll choose the next optimization.
+        optimization.avx512f = optimization.avx512bw = optimization.avx512vl =
+            optimization.avx512vnni = 0;
+    }
+#endif
+    unsigned char alignment = 0;
+    arch_opt_func = IP_INT8_GetDistFunc(dim, &alignment, &optimization);
+    ASSERT_EQ(arch_opt_func, INT8_InnerProduct)
+        << "Unexpected distance function chosen for dim " << dim;
+    ASSERT_EQ(baseline, arch_opt_func(v1.data(), v2.data(), dim))
+        << "No optimization with dim " << dim;
+    ASSERT_EQ(alignment, 0) << "No optimization with dim " << dim;
+}
+
 INSTANTIATE_TEST_SUITE_P(INT8OptFuncs, INT8SpacesOptimizationTest,
-                         testing::Range(64UL, 64 * 2UL + 1));
+                         testing::Range(32UL, 32 * 2UL + 1));
 
 #endif // CPU_FEATURES_ARCH_X86_64

From 5f018903fd7b6ca1da042ffd24571839d363d1f0 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Sun, 8 Dec 2024 12:50:45 +0000
Subject: [PATCH 11/33] ip bm

---
 tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp b/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp
index 96c02a44c..5ab3ebf7f 100644
--- a/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp
+++ b/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp
@@ -40,7 +40,7 @@ cpu_features::X86Features opt = cpu_features::GetX86Info().features;
 // AVX512_F_BW_VL_VNNI functions
 #ifdef OPT_AVX512_F_BW_VL_VNNI
 bool avx512_f_bw_vl_vnni_supported = opt.avx512f && opt.avx512bw && opt.avx512vl && opt.avx512vnni;
-INITIALIZE_BENCHMARKS_SET_L2(BM_VecSimSpaces_Integers_INT8, INT8, AVX512F_BW_VL_VNNI, 32,
+INITIALIZE_BENCHMARKS_SET_L2_IP(BM_VecSimSpaces_Integers_INT8, INT8, AVX512F_BW_VL_VNNI, 32,
                              avx512_f_bw_vl_vnni_supported);
 // INITIALIZE_BENCHMARKS_SET_L2_IP(BM_VecSimSpaces_Integers_INT8, INT8, AVX512_F_BW_VL_VNNI, 32,
 //                              avx512_f_bw_vl_vnni_supported);

From 2dce6f0d3da611587f87001a9c7be9d7ea61e384 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Sun, 8 Dec 2024 12:54:20 +0000
Subject: [PATCH 12/33] format

---
 tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp b/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp
index 5ab3ebf7f..cb12f10cb 100644
--- a/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp
+++ b/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp
@@ -41,7 +41,7 @@ cpu_features::X86Features opt = cpu_features::GetX86Info().features;
 #ifdef OPT_AVX512_F_BW_VL_VNNI
 bool avx512_f_bw_vl_vnni_supported = opt.avx512f && opt.avx512bw && opt.avx512vl && opt.avx512vnni;
 INITIALIZE_BENCHMARKS_SET_L2_IP(BM_VecSimSpaces_Integers_INT8, INT8, AVX512F_BW_VL_VNNI, 32,
-                             avx512_f_bw_vl_vnni_supported);
+                                avx512_f_bw_vl_vnni_supported);
 // INITIALIZE_BENCHMARKS_SET_L2_IP(BM_VecSimSpaces_Integers_INT8, INT8, AVX512_F_BW_VL_VNNI, 32,
 //                              avx512_f_bw_vl_vnni_supported);
 // INITIALIZE_BENCHMARKS_SET_COSINE(BM_VecSimSpaces_Integers_INT8, INT8, AVX512_F_BW_VL_VNNI, 32,

From 3d3b3758e3e2f701917cd680cc20a267641ecad0 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Sun, 8 Dec 2024 15:34:34 +0000
Subject: [PATCH 13/33] implement cosine in ip API

change create_int8_vec to  populate_int8_vec

add compute norm
---
 src/VecSim/spaces/CMakeLists.txt              |  2 -
 src/VecSim/spaces/Cosine/Cosine.cpp           | 23 -----
 src/VecSim/spaces/Cosine/Cosine.h             | 11 ---
 src/VecSim/spaces/Cosine_space.cpp            | 27 ------
 src/VecSim/spaces/Cosine_space.h              | 13 ---
 src/VecSim/spaces/IP/IP.cpp                   | 14 ++-
 src/VecSim/spaces/IP/IP.h                     |  1 +
 .../spaces/IP/IP_AVX512F_BW_VL_VNNI_INT8.h    | 20 +++-
 src/VecSim/spaces/IP_space.cpp                | 27 ++++++
 src/VecSim/spaces/IP_space.h                  |  2 +
 .../spaces/functions/AVX512F_BW_VL_VNNI.cpp   |  7 ++
 .../spaces/functions/AVX512F_BW_VL_VNNI.h     |  1 +
 src/VecSim/spaces/spaces.cpp                  | 13 +++
 tests/benchmark/spaces_benchmarks/bm_spaces.h | 10 +-
 .../spaces_benchmarks/bm_spaces_int8.cpp      | 23 ++---
 tests/unit/test_spaces.cpp                    | 95 +++++++++++++------
 tests/utils/tests_utils.h                     | 17 +++-
 17 files changed, 176 insertions(+), 130 deletions(-)
 delete mode 100644 src/VecSim/spaces/Cosine/Cosine.cpp
 delete mode 100644 src/VecSim/spaces/Cosine/Cosine.h
 delete mode 100644 src/VecSim/spaces/Cosine_space.cpp
 delete mode 100644 src/VecSim/spaces/Cosine_space.h

diff --git a/src/VecSim/spaces/CMakeLists.txt b/src/VecSim/spaces/CMakeLists.txt
index fc23adc18..1fc9473b2 100644
--- a/src/VecSim/spaces/CMakeLists.txt
+++ b/src/VecSim/spaces/CMakeLists.txt
@@ -3,7 +3,6 @@ project(VectorSimilaritySpaces_no_optimization)
 add_library(VectorSimilaritySpaces_no_optimization
 	L2/L2.cpp
 	IP/IP.cpp
-	Cosine/Cosine.cpp
 )
 
 include(${root}/cmake/cpu_features.cmake)
@@ -86,7 +85,6 @@ endif()
 add_library(VectorSimilaritySpaces
 	L2_space.cpp
 	IP_space.cpp
-	Cosine_space.cpp
 	spaces.cpp
 	${OPTIMIZATIONS}
 	computer/preprocessor_container.cpp
diff --git a/src/VecSim/spaces/Cosine/Cosine.cpp b/src/VecSim/spaces/Cosine/Cosine.cpp
deleted file mode 100644
index 1cbc9a191..000000000
--- a/src/VecSim/spaces/Cosine/Cosine.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *Copyright Redis Ltd. 2021 - present
- *Licensed under your choice of the Redis Source Available License 2.0 (RSALv2) or
- *the Server Side Public License v1 (SSPLv1).
- */
-
-#include "Cosine.h"
-
-float INT8_Cosine(const void *pVect1v, const void *pVect2v, size_t dimension) {
-    int8_t *pVect1 = (int8_t *)pVect1v;
-    int8_t *pVect2 = (int8_t *)pVect2v;
-
-    int res = 0;
-    for (size_t i = 0; i < dimension; i++) {
-        int16_t a = pVect1[i];
-        int16_t b = pVect2[i];
-        res += a * b;
-    }
-
-    float norm_v1 = *(float *)pVect1v;
-    float norm_v2 = *(float *)pVect2v;
-    return 1.0f - float(res) / (norm_v1 * norm_v2);
-}
diff --git a/src/VecSim/spaces/Cosine/Cosine.h b/src/VecSim/spaces/Cosine/Cosine.h
deleted file mode 100644
index c42f6c14f..000000000
--- a/src/VecSim/spaces/Cosine/Cosine.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/*
- *Copyright Redis Ltd. 2021 - present
- *Licensed under your choice of the Redis Source Available License 2.0 (RSALv2) or
- *the Server Side Public License v1 (SSPLv1).
- */
-
-#pragma once
-
-#include <cstdlib>
-
-float INT8_Cosine(const void *pVect1, const void *pVect2, size_t dimension);
diff --git a/src/VecSim/spaces/Cosine_space.cpp b/src/VecSim/spaces/Cosine_space.cpp
deleted file mode 100644
index 7cace4c32..000000000
--- a/src/VecSim/spaces/Cosine_space.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- *Copyright Redis Ltd. 2021 - present
- *Licensed under your choice of the Redis Source Available License 2.0 (RSALv2) or
- *the Server Side Public License v1 (SSPLv1).
- */
-
-#include "VecSim/spaces/space_includes.h"
-#include "VecSim/spaces/Cosine_space.h"
-#include "VecSim/spaces/Cosine/Cosine.h"
-
-namespace spaces {
-dist_func_t<float> Cosine_INT8_GetDistFunc(size_t dim, unsigned char *alignment,
-                                           const void *arch_opt) {
-    unsigned char dummy_alignment;
-    if (alignment == nullptr) {
-        alignment = &dummy_alignment;
-    }
-
-    dist_func_t<float> ret_dist_func = INT8_Cosine;
-    // Optimizations assume at least 32 int8. If we have less, we use the naive implementation.
-    if (dim < 32) {
-        return ret_dist_func;
-    }
-    return ret_dist_func;
-}
-
-} // namespace spaces
diff --git a/src/VecSim/spaces/Cosine_space.h b/src/VecSim/spaces/Cosine_space.h
deleted file mode 100644
index e139a5521..000000000
--- a/src/VecSim/spaces/Cosine_space.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/*
- *Copyright Redis Ltd. 2021 - present
- *Licensed under your choice of the Redis Source Available License 2.0 (RSALv2) or
- *the Server Side Public License v1 (SSPLv1).
- */
-
-#pragma once
-#include "VecSim/spaces/spaces.h"
-
-namespace spaces {
-dist_func_t<float> Cosine_INT8_GetDistFunc(size_t dim, unsigned char *alignment = nullptr,
-                                           const void *arch_opt = nullptr);
-} // namespace spaces
diff --git a/src/VecSim/spaces/IP/IP.cpp b/src/VecSim/spaces/IP/IP.cpp
index 1562e5b1a..c3856abda 100644
--- a/src/VecSim/spaces/IP/IP.cpp
+++ b/src/VecSim/spaces/IP/IP.cpp
@@ -67,7 +67,7 @@ float FP16_InnerProduct(const void *pVect1, const void *pVect2, size_t dimension
     return 1.0f - res;
 }
 
-float INT8_InnerProduct(const void *pVect1v, const void *pVect2v, size_t dimension) {
+static inline int INT8_InnerProductImp(const void *pVect1v, const void *pVect2v, size_t dimension) {
     int8_t *pVect1 = (int8_t *)pVect1v;
     int8_t *pVect2 = (int8_t *)pVect2v;
 
@@ -77,5 +77,15 @@ float INT8_InnerProduct(const void *pVect1v, const void *pVect2v, size_t dimensi
         int16_t b = pVect2[i];
         res += a * b;
     }
-    return 1.0f - float(res);
+    return res;
+}
+
+float INT8_InnerProduct(const void *pVect1v, const void *pVect2v, size_t dimension) {
+    return 1 - INT8_InnerProductImp(pVect1v, pVect2v, dimension);
+}
+
+float INT8_Cosine(const void *pVect1v, const void *pVect2v, size_t dimension) {
+    float norm_v1 = *(float *)((int8_t *)pVect1v + dimension);
+    float norm_v2 = *(float *)((int8_t *)pVect2v + dimension);
+    return 1.0f - float(INT8_InnerProductImp(pVect1v, pVect2v, dimension)) / (norm_v1 * norm_v2);
 }
diff --git a/src/VecSim/spaces/IP/IP.h b/src/VecSim/spaces/IP/IP.h
index 64e11b52f..d712499ed 100644
--- a/src/VecSim/spaces/IP/IP.h
+++ b/src/VecSim/spaces/IP/IP.h
@@ -18,3 +18,4 @@ float BF16_InnerProduct_LittleEndian(const void *pVect1v, const void *pVect2v, s
 float BF16_InnerProduct_BigEndian(const void *pVect1v, const void *pVect2v, size_t dimension);
 
 float INT8_InnerProduct(const void *pVect1, const void *pVect2, size_t dimension);
+float INT8_Cosine(const void *pVect1, const void *pVect2, size_t dimension);
diff --git a/src/VecSim/spaces/IP/IP_AVX512F_BW_VL_VNNI_INT8.h b/src/VecSim/spaces/IP/IP_AVX512F_BW_VL_VNNI_INT8.h
index a7b99fcb8..fcd33c00c 100644
--- a/src/VecSim/spaces/IP/IP_AVX512F_BW_VL_VNNI_INT8.h
+++ b/src/VecSim/spaces/IP/IP_AVX512F_BW_VL_VNNI_INT8.h
@@ -23,8 +23,7 @@ static inline void InnerProductStep(int8_t *&pVect1, int8_t *&pVect2, __m512i &s
 }
 
 template <unsigned char residual> // 0..32
-float INT8_InnerProductSIMD32_AVX512F_BW_VL_VNNI(const void *pVect1v, const void *pVect2v,
-                                                 size_t dimension) {
+static inline int INT8_InnerProductImp(const void *pVect1v, const void *pVect2v, size_t dimension) {
     int8_t *pVect1 = (int8_t *)pVect1v;
     int8_t *pVect2 = (int8_t *)pVect2v;
 
@@ -52,5 +51,20 @@ float INT8_InnerProductSIMD32_AVX512F_BW_VL_VNNI(const void *pVect1v, const void
         InnerProductStep(pVect1, pVect2, sum);
     } while (pVect1 < pEnd1);
 
-    return 1.0f - float(_mm512_reduce_add_epi32(sum));
+    return _mm512_reduce_add_epi32(sum);
+}
+
+template <unsigned char residual> // 0..32
+float INT8_InnerProductSIMD32_AVX512F_BW_VL_VNNI(const void *pVect1v, const void *pVect2v,
+                                                 size_t dimension) {
+
+    return 1 - INT8_InnerProductImp<residual>(pVect1v, pVect2v, dimension);
+}
+template <unsigned char residual> // 0..32
+float INT8_CosineSIMD32_AVX512F_BW_VL_VNNI(const void *pVect1v, const void *pVect2v,
+                                           size_t dimension) {
+    float norm_v1 = *(float *)((int8_t *)pVect1v + dimension);
+    float norm_v2 = *(float *)((int8_t *)pVect2v + dimension);
+    return 1.0f -
+           float(INT8_InnerProductImp<residual>(pVect1v, pVect2v, dimension)) / (norm_v1 * norm_v2);
 }
diff --git a/src/VecSim/spaces/IP_space.cpp b/src/VecSim/spaces/IP_space.cpp
index 0cebb3bfb..b168b2d7d 100644
--- a/src/VecSim/spaces/IP_space.cpp
+++ b/src/VecSim/spaces/IP_space.cpp
@@ -222,4 +222,31 @@ dist_func_t<float> IP_INT8_GetDistFunc(size_t dim, unsigned char *alignment, con
 #endif // __x86_64__
     return ret_dist_func;
 }
+
+dist_func_t<float> Cosine_INT8_GetDistFunc(size_t dim, unsigned char *alignment,
+                                           const void *arch_opt) {
+    unsigned char dummy_alignment;
+    if (alignment == nullptr) {
+        alignment = &dummy_alignment;
+    }
+
+    dist_func_t<float> ret_dist_func = INT8_Cosine;
+    // Optimizations assume at least 32 int8. If we have less, we use the naive implementation.
+    if (dim < 32) {
+        return ret_dist_func;
+    }
+#ifdef CPU_FEATURES_ARCH_X86_64
+    auto features = (arch_opt == nullptr)
+                        ? cpu_features::GetX86Info().features
+                        : *static_cast<const cpu_features::X86Features *>(arch_opt);
+#ifdef OPT_AVX512_F_BW_VL_VNNI
+    if (features.avx512f && features.avx512bw && features.avx512vl && features.avx512vnni) {
+        if (dim % 32 == 0) // no point in aligning if we have an offsetting residual
+            *alignment = 32 * sizeof(int8_t); // align to 256 bits.
+        return Choose_INT8_Cosine_implementation_AVX512F_BW_VL_VNNI(dim);
+    }
+#endif
+#endif // __x86_64__
+    return ret_dist_func;
+}
 } // namespace spaces
diff --git a/src/VecSim/spaces/IP_space.h b/src/VecSim/spaces/IP_space.h
index 87407c1a3..0d8c3a836 100644
--- a/src/VecSim/spaces/IP_space.h
+++ b/src/VecSim/spaces/IP_space.h
@@ -18,4 +18,6 @@ dist_func_t<float> IP_FP16_GetDistFunc(size_t dim, unsigned char *alignment = nu
                                        const void *arch_opt = nullptr);
 dist_func_t<float> IP_INT8_GetDistFunc(size_t dim, unsigned char *alignment = nullptr,
                                        const void *arch_opt = nullptr);
+dist_func_t<float> Cosine_INT8_GetDistFunc(size_t dim, unsigned char *alignment = nullptr,
+                                           const void *arch_opt = nullptr);
 } // namespace spaces
diff --git a/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.cpp b/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.cpp
index 3d3da5546..cd66a6096 100644
--- a/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.cpp
+++ b/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.cpp
@@ -18,12 +18,19 @@ dist_func_t<float> Choose_INT8_L2_implementation_AVX512F_BW_VL_VNNI(size_t dim)
     CHOOSE_IMPLEMENTATION(ret_dist_func, dim, 32, INT8_L2SqrSIMD32_AVX512F_BW_VL_VNNI);
     return ret_dist_func;
 }
+
 dist_func_t<float> Choose_INT8_IP_implementation_AVX512F_BW_VL_VNNI(size_t dim) {
     dist_func_t<float> ret_dist_func;
     CHOOSE_IMPLEMENTATION(ret_dist_func, dim, 32, INT8_InnerProductSIMD32_AVX512F_BW_VL_VNNI);
     return ret_dist_func;
 }
 
+dist_func_t<float> Choose_INT8_Cosine_implementation_AVX512F_BW_VL_VNNI(size_t dim) {
+    dist_func_t<float> ret_dist_func;
+    CHOOSE_IMPLEMENTATION(ret_dist_func, dim, 32, INT8_CosineSIMD32_AVX512F_BW_VL_VNNI);
+    return ret_dist_func;
+}
+
 #include "implementation_chooser_cleanup.h"
 
 } // namespace spaces
diff --git a/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.h b/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.h
index c1ef5d6b8..532a33c76 100644
--- a/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.h
+++ b/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.h
@@ -12,5 +12,6 @@ namespace spaces {
 
 dist_func_t<float> Choose_INT8_L2_implementation_AVX512F_BW_VL_VNNI(size_t dim);
 dist_func_t<float> Choose_INT8_IP_implementation_AVX512F_BW_VL_VNNI(size_t dim);
+dist_func_t<float> Choose_INT8_Cosine_implementation_AVX512F_BW_VL_VNNI(size_t dim);
 
 } // namespace spaces
diff --git a/src/VecSim/spaces/spaces.cpp b/src/VecSim/spaces/spaces.cpp
index 84f71b463..b512c9750 100644
--- a/src/VecSim/spaces/spaces.cpp
+++ b/src/VecSim/spaces/spaces.cpp
@@ -69,6 +69,19 @@ dist_func_t<double> GetDistFunc<double, double>(VecSimMetric metric, size_t dim,
     }
     throw std::invalid_argument("Invalid metric");
 }
+template <>
+dist_func_t<float> GetDistFunc<int8_t, float>(VecSimMetric metric, size_t dim,
+                                              unsigned char *alignment) {
+    switch (metric) {
+    case VecSimMetric_Cosine:
+        return Cosine_INT8_GetDistFunc(dim, alignment);
+    case VecSimMetric_IP:
+        return IP_INT8_GetDistFunc(dim, alignment);
+    case VecSimMetric_L2:
+        return L2_INT8_GetDistFunc(dim, alignment);
+    }
+    throw std::invalid_argument("Invalid metric");
+}
 
 template <>
 normalizeVector_f<float> GetNormalizeFunc<float>(void) {
diff --git a/tests/benchmark/spaces_benchmarks/bm_spaces.h b/tests/benchmark/spaces_benchmarks/bm_spaces.h
index 909f4d6dd..b7431c43c 100644
--- a/tests/benchmark/spaces_benchmarks/bm_spaces.h
+++ b/tests/benchmark/spaces_benchmarks/bm_spaces.h
@@ -124,11 +124,11 @@ static constexpr size_t start = min_no_res_th_dim;
     INITIALIZE_EXACT_512BIT_BM(bm_class, type_prefix, arch, IP, dim_opt, arch_supported);          \
     INITIALIZE_RESIDUAL_BM(bm_class, type_prefix, arch, IP, dim_opt, arch_supported);
 
-#define INITIALIZE_BENCHMARKS_SET_COSINE(bm_class, type_prefix, arch, dim_opt, arch_supported)     \
-    INITIALIZE_HIGH_DIM(bm_class, type_prefix, arch, COSINE, arch_supported);                      \
-    INITIALIZE_LOW_DIM(bm_class, type_prefix, arch, COSINE, arch_supported);                       \
-    INITIALIZE_EXACT_512BIT_BM(bm_class, type_prefix, arch, COSINE, dim_opt, arch_supported);      \
-    INITIALIZE_RESIDUAL_BM(bm_class, type_prefix, arch, COSINE, dim_opt, arch_supported);
+#define INITIALIZE_BENCHMARKS_SET_Cosine(bm_class, type_prefix, arch, dim_opt, arch_supported)     \
+    INITIALIZE_HIGH_DIM(bm_class, type_prefix, arch, Cosine, arch_supported);                      \
+    INITIALIZE_LOW_DIM(bm_class, type_prefix, arch, Cosine, arch_supported);                       \
+    INITIALIZE_EXACT_512BIT_BM(bm_class, type_prefix, arch, Cosine, dim_opt, arch_supported);      \
+    INITIALIZE_RESIDUAL_BM(bm_class, type_prefix, arch, Cosine, dim_opt, arch_supported);
 
 #define INITIALIZE_BENCHMARKS_SET_L2_IP(bm_class, type_prefix, arch, dim_opt, arch_supported)      \
     INITIALIZE_BENCHMARKS_SET_L2(bm_class, type_prefix, arch, dim_opt, arch_supported)             \
diff --git a/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp b/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp
index cb12f10cb..0adde8972 100644
--- a/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp
+++ b/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp
@@ -21,12 +21,15 @@ class BM_VecSimSpaces_Integers_INT8 : public benchmark::Fixture {
 
     void SetUp(const ::benchmark::State &state) {
         dim = state.range(0);
-        v1 = new int8_t[dim];
-        v2 = new int8_t[dim];
-
-        // random for int8_t and uint8_t is not provided by the standard library
-        memcpy(v1, test_utils::create_int8_vec(dim, 123).data(), dim);
-        memcpy(v2, test_utils::create_int8_vec(dim, 1234).data(), dim);
+        // Allocate vector with extra space for cosine calculations
+        v1 = new int8_t[dim + sizeof(float)];
+        v2 = new int8_t[dim + sizeof(float)];
+        test_utils::populate_int8_vec(v1, dim, 123);
+        test_utils::populate_int8_vec(v2, dim, 1234);
+
+        // Store the norm in the extra space for cosine calculations
+        *(float *)(v1 + dim) = test_utils::compute_norm(v1, dim);
+        *(float *)(v2 + dim) = test_utils::compute_norm(v2, dim);
     }
     void TearDown(const ::benchmark::State &state) {
         delete v1;
@@ -42,14 +45,12 @@ cpu_features::X86Features opt = cpu_features::GetX86Info().features;
 bool avx512_f_bw_vl_vnni_supported = opt.avx512f && opt.avx512bw && opt.avx512vl && opt.avx512vnni;
 INITIALIZE_BENCHMARKS_SET_L2_IP(BM_VecSimSpaces_Integers_INT8, INT8, AVX512F_BW_VL_VNNI, 32,
                                 avx512_f_bw_vl_vnni_supported);
-// INITIALIZE_BENCHMARKS_SET_L2_IP(BM_VecSimSpaces_Integers_INT8, INT8, AVX512_F_BW_VL_VNNI, 32,
-//                              avx512_f_bw_vl_vnni_supported);
-// INITIALIZE_BENCHMARKS_SET_COSINE(BM_VecSimSpaces_Integers_INT8, INT8, AVX512_F_BW_VL_VNNI, 32,
-//                              avx512_f_bw_vl_vnni_supported)
+INITIALIZE_BENCHMARKS_SET_Cosine(BM_VecSimSpaces_Integers_INT8, INT8, AVX512F_BW_VL_VNNI, 32,
+                                 avx512_f_bw_vl_vnni_supported)
 #endif // AVX512_F_BW_VL_VNNI
 
 #endif // x86_64
 
-INITIALIZE_NAIVE_BM(BM_VecSimSpaces_Integers_INT8, INT8, InnerProduct, 32);
+    INITIALIZE_NAIVE_BM(BM_VecSimSpaces_Integers_INT8, INT8, InnerProduct, 32);
 INITIALIZE_NAIVE_BM(BM_VecSimSpaces_Integers_INT8, INT8, L2Sqr, 32);
 BENCHMARK_MAIN();
diff --git a/tests/unit/test_spaces.cpp b/tests/unit/test_spaces.cpp
index 61e6f167c..f76069c46 100644
--- a/tests/unit/test_spaces.cpp
+++ b/tests/unit/test_spaces.cpp
@@ -12,12 +12,10 @@
 #include "VecSim/spaces/space_includes.h"
 #include "VecSim/spaces/IP/IP.h"
 #include "VecSim/spaces/L2/L2.h"
-#include "VecSim/spaces/Cosine/Cosine.h"
 #include "VecSim/utils/vec_utils.h"
 #include "VecSim/types/bfloat16.h"
 #include "VecSim/spaces/IP_space.h"
 #include "VecSim/spaces/L2_space.h"
-#include "VecSim/spaces/Cosine_space.h"
 #include "VecSim/types/float16.h"
 #include "VecSim/spaces/functions/AVX512F.h"
 #include "VecSim/spaces/functions/AVX.h"
@@ -243,19 +241,19 @@ TEST_F(SpacesTest, int8_ip_no_optimization_func_test) {
 
 TEST_F(SpacesTest, int8_Cosine_no_optimization_func_test) {
     size_t dim = 4;
-    // create normalized vector with extra space for the norm
-    std::vector<int8_t> vec1(dim + sizeof(float), 0);
-    std::vector<int8_t> vec2(dim + sizeof(float), 0);
+    // create a vector with extra space for the norm
+    int8_t *v1 = new int8_t[dim + sizeof(float)];
+    int8_t *v2 = new int8_t[dim + sizeof(float)];
 
-    vec1[0] = 1; // {1, 0, 0, 0}
-    vec2[1] = 1; // {1, 0, 0, 0}
+    test_utils::populate_int8_vec(v1, dim, 123);
+    test_utils::populate_int8_vec(v2, dim, 123);
 
     // write the norm at the end of the vector
-    *(float *)(vec1.data() + dim) = 1.0;
-    *(float *)(vec2.data() + dim) = 1.0;
+    *(float *)(v1 + dim) = test_utils::compute_norm(v1, dim);
+    *(float *)(v2 + dim) = test_utils::compute_norm(v2, dim);
 
-    float dist = INT8_InnerProduct((const void *)vec1.data(), (const void *)vec2.data(), dim);
-    ASSERT_EQ(dist, 1.0);
+    float dist = INT8_Cosine((const void *)v1, (const void *)v2, dim);
+    ASSERT_NEAR(dist, 0.0, 0.000001);
 }
 
 /* ======================== Test Getters ======================== */
@@ -280,11 +278,11 @@ TEST_F(SpacesTest, GetDistFuncInvalidMetricFP16) {
         (spaces::GetDistFunc<float16, float>((VecSimMetric)(VecSimMetric_Cosine + 1), 10, nullptr)),
         std::invalid_argument);
 }
-// TEST_F(SpacesTest, GetDistFuncInvalidMetricINT8) {
-//     EXPECT_THROW(
-//         (spaces::GetDistFunc<int8_t, float>((VecSimMetric)(VecSimMetric_Cosine + 1), 10,
-//         nullptr)), std::invalid_argument);
-// }
+TEST_F(SpacesTest, GetDistFuncInvalidMetricINT8) {
+    EXPECT_THROW(
+        (spaces::GetDistFunc<int8_t, float>((VecSimMetric)(VecSimMetric_Cosine + 1), 10, nullptr)),
+        std::invalid_argument);
+}
 
 using namespace spaces;
 
@@ -916,8 +914,10 @@ class INT8SpacesOptimizationTest : public testing::TestWithParam<size_t> {};
 TEST_P(INT8SpacesOptimizationTest, INT8L2SqrTest) {
     auto optimization = cpu_features::GetX86Info().features;
     size_t dim = GetParam();
-    auto v1 = test_utils::create_int8_vec(dim, 123);
-    auto v2 = test_utils::create_int8_vec(dim, 1234);
+    int8_t *v1 = new int8_t[dim];
+    int8_t *v2 = new int8_t[dim];
+    test_utils::populate_int8_vec(v1, dim, 123);
+    test_utils::populate_int8_vec(v2, dim, 1234);
 
     auto expected_alignment = [](size_t reg_bit_size, size_t dim) {
         size_t elements_in_reg = reg_bit_size / sizeof(int8_t) / 8;
@@ -925,7 +925,7 @@ TEST_P(INT8SpacesOptimizationTest, INT8L2SqrTest) {
     };
 
     dist_func_t<float> arch_opt_func;
-    float baseline = INT8_L2Sqr(v1.data(), v2.data(), dim);
+    float baseline = INT8_L2Sqr(v1, v2, dim);
 #ifdef OPT_AVX512_F_BW_VL_VNNI
     if (optimization.avx512f && optimization.avx512bw && optimization.avx512vl &&
         optimization.avx512vnni) {
@@ -933,7 +933,7 @@ TEST_P(INT8SpacesOptimizationTest, INT8L2SqrTest) {
         arch_opt_func = L2_INT8_GetDistFunc(dim, &alignment, &optimization);
         ASSERT_EQ(arch_opt_func, Choose_INT8_L2_implementation_AVX512F_BW_VL_VNNI(dim))
             << "Unexpected distance function chosen for dim " << dim;
-        ASSERT_EQ(baseline, arch_opt_func(v1.data(), v2.data(), dim)) << "AVX512 with dim " << dim;
+        ASSERT_EQ(baseline, arch_opt_func(v1, v2, dim)) << "AVX512 with dim " << dim;
         ASSERT_EQ(alignment, expected_alignment(256, dim)) << "AVX512 with dim " << dim;
         // Unset optimizations flag, so we'll choose the next optimization.
         optimization.avx512f = optimization.avx512bw = optimization.avx512vl =
@@ -943,16 +943,17 @@ TEST_P(INT8SpacesOptimizationTest, INT8L2SqrTest) {
     unsigned char alignment = 0;
     arch_opt_func = L2_INT8_GetDistFunc(dim, &alignment, &optimization);
     ASSERT_EQ(arch_opt_func, INT8_L2Sqr) << "Unexpected distance function chosen for dim " << dim;
-    ASSERT_EQ(baseline, arch_opt_func(v1.data(), v2.data(), dim))
-        << "No optimization with dim " << dim;
+    ASSERT_EQ(baseline, arch_opt_func(v1, v2, dim)) << "No optimization with dim " << dim;
     ASSERT_EQ(alignment, 0) << "No optimization with dim " << dim;
 }
 
 TEST_P(INT8SpacesOptimizationTest, INT8InnerProductTest) {
     auto optimization = cpu_features::GetX86Info().features;
     size_t dim = GetParam();
-    auto v1 = test_utils::create_int8_vec(dim, 123);
-    auto v2 = test_utils::create_int8_vec(dim, 1234);
+    int8_t *v1 = new int8_t[dim];
+    int8_t *v2 = new int8_t[dim];
+    test_utils::populate_int8_vec(v1, dim, 123);
+    test_utils::populate_int8_vec(v2, dim, 1234);
 
     auto expected_alignment = [](size_t reg_bit_size, size_t dim) {
         size_t elements_in_reg = reg_bit_size / sizeof(int8_t) / 8;
@@ -960,7 +961,7 @@ TEST_P(INT8SpacesOptimizationTest, INT8InnerProductTest) {
     };
 
     dist_func_t<float> arch_opt_func;
-    float baseline = INT8_InnerProduct(v1.data(), v2.data(), dim);
+    float baseline = INT8_InnerProduct(v1, v2, dim);
 #ifdef OPT_AVX512_F_BW_VL_VNNI
     if (optimization.avx512f && optimization.avx512bw && optimization.avx512vl &&
         optimization.avx512vnni) {
@@ -968,7 +969,7 @@ TEST_P(INT8SpacesOptimizationTest, INT8InnerProductTest) {
         arch_opt_func = IP_INT8_GetDistFunc(dim, &alignment, &optimization);
         ASSERT_EQ(arch_opt_func, Choose_INT8_IP_implementation_AVX512F_BW_VL_VNNI(dim))
             << "Unexpected distance function chosen for dim " << dim;
-        ASSERT_EQ(baseline, arch_opt_func(v1.data(), v2.data(), dim)) << "AVX512 with dim " << dim;
+        ASSERT_EQ(baseline, arch_opt_func(v1, v2, dim)) << "AVX512 with dim " << dim;
         ASSERT_EQ(alignment, expected_alignment(256, dim)) << "AVX512 with dim " << dim;
         // Unset optimizations flag, so we'll choose the next optimization.
         optimization.avx512f = optimization.avx512bw = optimization.avx512vl =
@@ -979,8 +980,46 @@ TEST_P(INT8SpacesOptimizationTest, INT8InnerProductTest) {
     arch_opt_func = IP_INT8_GetDistFunc(dim, &alignment, &optimization);
     ASSERT_EQ(arch_opt_func, INT8_InnerProduct)
         << "Unexpected distance function chosen for dim " << dim;
-    ASSERT_EQ(baseline, arch_opt_func(v1.data(), v2.data(), dim))
-        << "No optimization with dim " << dim;
+    ASSERT_EQ(baseline, arch_opt_func(v1, v2, dim)) << "No optimization with dim " << dim;
+    ASSERT_EQ(alignment, 0) << "No optimization with dim " << dim;
+}
+
+TEST_P(INT8SpacesOptimizationTest, INT8CosineTest) {
+    auto optimization = cpu_features::GetX86Info().features;
+    size_t dim = GetParam();
+    int8_t *v1 = new int8_t[dim + sizeof(float)];
+    int8_t *v2 = new int8_t[dim + sizeof(float)];
+    test_utils::populate_int8_vec(v1, dim, 123);
+    test_utils::populate_int8_vec(v2, dim, 1234);
+
+    // write the norm at the end of the vector
+    *(float *)(v1 + dim) = test_utils::compute_norm(v1, dim);
+    *(float *)(v2 + dim) = test_utils::compute_norm(v2, dim);
+    auto expected_alignment = [](size_t reg_bit_size, size_t dim) {
+        size_t elements_in_reg = reg_bit_size / sizeof(int8_t) / 8;
+        return (dim % elements_in_reg == 0) ? elements_in_reg * sizeof(int8_t) : 0;
+    };
+
+    dist_func_t<float> arch_opt_func;
+    float baseline = INT8_Cosine(v1, v2, dim);
+#ifdef OPT_AVX512_F_BW_VL_VNNI
+    if (optimization.avx512f && optimization.avx512bw && optimization.avx512vl &&
+        optimization.avx512vnni) {
+        unsigned char alignment = 0;
+        arch_opt_func = Cosine_INT8_GetDistFunc(dim, &alignment, &optimization);
+        ASSERT_EQ(arch_opt_func, Choose_INT8_Cosine_implementation_AVX512F_BW_VL_VNNI(dim))
+            << "Unexpected distance function chosen for dim " << dim;
+        ASSERT_EQ(baseline, arch_opt_func(v1, v2, dim)) << "AVX512 with dim " << dim;
+        ASSERT_EQ(alignment, expected_alignment(256, dim)) << "AVX512 with dim " << dim;
+        // Unset optimizations flag, so we'll choose the next optimization.
+        optimization.avx512f = optimization.avx512bw = optimization.avx512vl =
+            optimization.avx512vnni = 0;
+    }
+#endif
+    unsigned char alignment = 0;
+    arch_opt_func = Cosine_INT8_GetDistFunc(dim, &alignment, &optimization);
+    ASSERT_EQ(arch_opt_func, INT8_Cosine) << "Unexpected distance function chosen for dim " << dim;
+    ASSERT_EQ(baseline, arch_opt_func(v1, v2, dim)) << "No optimization with dim " << dim;
     ASSERT_EQ(alignment, 0) << "No optimization with dim " << dim;
 }
 
diff --git a/tests/utils/tests_utils.h b/tests/utils/tests_utils.h
index 0231a9838..31dc3d9ef 100644
--- a/tests/utils/tests_utils.h
+++ b/tests/utils/tests_utils.h
@@ -5,7 +5,8 @@
 
 namespace test_utils {
 
-static std::vector<int8_t> create_int8_vec(size_t dim, int seed = 1234) {
+// Assuming v is a memory allocation of size dim * sizeof(float)
+static void populate_int8_vec(int8_t *v, size_t dim, int seed = 1234) {
 
     std::mt19937 gen(seed); // Mersenne Twister engine initialized with the fixed seed
 
@@ -13,12 +14,18 @@ static std::vector<int8_t> create_int8_vec(size_t dim, int seed = 1234) {
     // Define a distribution range for int8_t
     std::uniform_int_distribution<int16_t> dis(-128, 127);
 
-    std::vector<int8_t> vec(dim);
-    for (auto &num : vec) {
-        num = static_cast<int8_t>(dis(gen));
+    for (size_t i = 0; i < dim; i++) {
+        v[i] = static_cast<int8_t>(dis(gen));
     }
+}
 
-    return vec;
+// TODO: replace with normalize function from VecSim
+float compute_norm(const int8_t *vec, size_t dim) {
+    int norm = 0;
+    for (size_t i = 0; i < dim; i++) {
+        norm += vec[i] * vec[i];
+    }
+    return sqrt(norm);
 }
 
 } // namespace test_utils

From 6f211b32159cd391e9ec4ec5b806f6109dcd899b Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Mon, 9 Dec 2024 11:43:18 +0000
Subject: [PATCH 14/33] use mask sub instead of msk load

---
 src/VecSim/spaces/L2/L2_AVX512F_BW_VL_VNNI_INT8.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/VecSim/spaces/L2/L2_AVX512F_BW_VL_VNNI_INT8.h b/src/VecSim/spaces/L2/L2_AVX512F_BW_VL_VNNI_INT8.h
index 9130d6414..edeb37b4b 100644
--- a/src/VecSim/spaces/L2/L2_AVX512F_BW_VL_VNNI_INT8.h
+++ b/src/VecSim/spaces/L2/L2_AVX512F_BW_VL_VNNI_INT8.h
@@ -37,15 +37,15 @@ float INT8_L2SqrSIMD32_AVX512F_BW_VL_VNNI(const void *pVect1v, const void *pVect
     // so mask loading is guaranteed to be safe
     if constexpr (residual) {
         __mmask32 mask = (1LU << residual) - 1;
-        __m256i temp_a = _mm256_maskz_loadu_epi8(mask, pVect1);
+        __m256i temp_a = _mm256_loadu_epi8(pVect1);
         __m512i va = _mm512_cvtepi8_epi16(temp_a);
         pVect1 += residual;
 
-        __m256i temp_b = _mm256_maskz_loadu_epi8(mask, pVect2);
+        __m256i temp_b = _mm256_loadu_epi8(pVect2);
         __m512i vb = _mm512_cvtepi8_epi16(temp_b);
         pVect2 += residual;
 
-        __m512i diff = _mm512_sub_epi16(va, vb);
+        __m512i diff = _mm512_maskz_sub_epi16(mask, va, vb);
         sum = _mm512_dpwssd_epi32(sum, diff, diff);
     }
 

From 6ac65a3476d3de2c261fb41ae581a785baf85623 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Tue, 10 Dec 2024 06:14:36 +0000
Subject: [PATCH 15/33] loop size = 512 minimal dim = 32

---
 .../spaces/IP/IP_AVX512F_BW_VL_VNNI_INT8.h    | 21 ++++++++++++-------
 .../spaces/L2/L2_AVX512F_BW_VL_VNNI_INT8.h    | 21 ++++++++++++-------
 .../spaces/functions/AVX512F_BW_VL_VNNI.cpp   |  6 +++---
 .../spaces/functions/implementation_chooser.h | 15 ++++++++-----
 4 files changed, 39 insertions(+), 24 deletions(-)

diff --git a/src/VecSim/spaces/IP/IP_AVX512F_BW_VL_VNNI_INT8.h b/src/VecSim/spaces/IP/IP_AVX512F_BW_VL_VNNI_INT8.h
index fcd33c00c..28187bf31 100644
--- a/src/VecSim/spaces/IP/IP_AVX512F_BW_VL_VNNI_INT8.h
+++ b/src/VecSim/spaces/IP/IP_AVX512F_BW_VL_VNNI_INT8.h
@@ -22,7 +22,7 @@ static inline void InnerProductStep(int8_t *&pVect1, int8_t *&pVect2, __m512i &s
     sum = _mm512_dpwssd_epi32(sum, va, vb);
 }
 
-template <unsigned char residual> // 0..32
+template <unsigned char residual> // 0..64
 static inline int INT8_InnerProductImp(const void *pVect1v, const void *pVect2v, size_t dimension) {
     int8_t *pVect1 = (int8_t *)pVect1v;
     int8_t *pVect2 = (int8_t *)pVect2v;
@@ -33,23 +33,28 @@ static inline int INT8_InnerProductImp(const void *pVect1v, const void *pVect2v,
 
     // Deal with remainder first. `dim` is more than 32, so we have at least one 32-int_8 block,
     // so mask loading is guaranteed to be safe
-    if constexpr (residual) {
-        __mmask32 mask = (1LU << residual) - 1;
+    if constexpr (residual % 32) {
+        __mmask32 mask = (1LU << (residual % 32)) - 1;
         __m256i temp_a = _mm256_maskz_loadu_epi8(mask, pVect1);
         __m512i va = _mm512_cvtepi8_epi16(temp_a);
-        pVect1 += residual;
+        pVect1 += residual % 32;
 
         __m256i temp_b = _mm256_maskz_loadu_epi8(mask, pVect2);
         __m512i vb = _mm512_cvtepi8_epi16(temp_b);
-        pVect2 += residual;
+        pVect2 += residual % 32;
 
         sum = _mm512_dpwssd_epi32(sum, va, vb);
     }
 
-    // We dealt with the residual part. We are left with some multiple of 32-int_8.
-    do {
+    if constexpr (residual >= 32) {
+        InnerProductStep(pVect1, pVect2, sum);
+    }
+
+    // We dealt with the residual part. We are left with some multiple of 64-int_8.
+    while (pVect1 < pEnd1) {
+        InnerProductStep(pVect1, pVect2, sum);
         InnerProductStep(pVect1, pVect2, sum);
-    } while (pVect1 < pEnd1);
+    }
 
     return _mm512_reduce_add_epi32(sum);
 }
diff --git a/src/VecSim/spaces/L2/L2_AVX512F_BW_VL_VNNI_INT8.h b/src/VecSim/spaces/L2/L2_AVX512F_BW_VL_VNNI_INT8.h
index edeb37b4b..d47964ca2 100644
--- a/src/VecSim/spaces/L2/L2_AVX512F_BW_VL_VNNI_INT8.h
+++ b/src/VecSim/spaces/L2/L2_AVX512F_BW_VL_VNNI_INT8.h
@@ -23,7 +23,7 @@ static inline void L2SqrStep(int8_t *&pVect1, int8_t *&pVect2, __m512i &sum) {
     sum = _mm512_dpwssd_epi32(sum, diff, diff);
 }
 
-template <unsigned char residual> // 0..32
+template <unsigned char residual> // 0..64
 float INT8_L2SqrSIMD32_AVX512F_BW_VL_VNNI(const void *pVect1v, const void *pVect2v,
                                           size_t dimension) {
     int8_t *pVect1 = (int8_t *)pVect1v;
@@ -35,24 +35,29 @@ float INT8_L2SqrSIMD32_AVX512F_BW_VL_VNNI(const void *pVect1v, const void *pVect
 
     // Deal with remainder first. `dim` is more than 32, so we have at least one 32-int_8 block,
     // so mask loading is guaranteed to be safe
-    if constexpr (residual) {
-        __mmask32 mask = (1LU << residual) - 1;
+    if constexpr (residual % 32) {
+        __mmask32 mask = (1LU << (residual % 32)) - 1;
         __m256i temp_a = _mm256_loadu_epi8(pVect1);
         __m512i va = _mm512_cvtepi8_epi16(temp_a);
-        pVect1 += residual;
+        pVect1 += residual % 32;
 
         __m256i temp_b = _mm256_loadu_epi8(pVect2);
         __m512i vb = _mm512_cvtepi8_epi16(temp_b);
-        pVect2 += residual;
+        pVect2 += residual % 32;
 
         __m512i diff = _mm512_maskz_sub_epi16(mask, va, vb);
         sum = _mm512_dpwssd_epi32(sum, diff, diff);
     }
 
-    // We dealt with the residual part. We are left with some multiple of 32-int_8.
-    do {
+    if constexpr (residual >= 32) {
         L2SqrStep(pVect1, pVect2, sum);
-    } while (pVect1 < pEnd1);
+    }
+
+    // We dealt with the residual part. We are left with some multiple of 64-int_8.
+    while (pVect1 < pEnd1) {
+        L2SqrStep(pVect1, pVect2, sum);
+        L2SqrStep(pVect1, pVect2, sum);
+    }
 
     return _mm512_reduce_add_epi32(sum);
 }
diff --git a/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.cpp b/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.cpp
index cd66a6096..599984954 100644
--- a/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.cpp
+++ b/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.cpp
@@ -15,19 +15,19 @@ namespace spaces {
 
 dist_func_t<float> Choose_INT8_L2_implementation_AVX512F_BW_VL_VNNI(size_t dim) {
     dist_func_t<float> ret_dist_func;
-    CHOOSE_IMPLEMENTATION(ret_dist_func, dim, 32, INT8_L2SqrSIMD32_AVX512F_BW_VL_VNNI);
+    CHOOSE_IMPLEMENTATION(ret_dist_func, dim, 64, INT8_L2SqrSIMD32_AVX512F_BW_VL_VNNI);
     return ret_dist_func;
 }
 
 dist_func_t<float> Choose_INT8_IP_implementation_AVX512F_BW_VL_VNNI(size_t dim) {
     dist_func_t<float> ret_dist_func;
-    CHOOSE_IMPLEMENTATION(ret_dist_func, dim, 32, INT8_InnerProductSIMD32_AVX512F_BW_VL_VNNI);
+    CHOOSE_IMPLEMENTATION(ret_dist_func, dim, 64, INT8_InnerProductSIMD32_AVX512F_BW_VL_VNNI);
     return ret_dist_func;
 }
 
 dist_func_t<float> Choose_INT8_Cosine_implementation_AVX512F_BW_VL_VNNI(size_t dim) {
     dist_func_t<float> ret_dist_func;
-    CHOOSE_IMPLEMENTATION(ret_dist_func, dim, 32, INT8_CosineSIMD32_AVX512F_BW_VL_VNNI);
+    CHOOSE_IMPLEMENTATION(ret_dist_func, dim, 64, INT8_CosineSIMD32_AVX512F_BW_VL_VNNI);
     return ret_dist_func;
 }
 
diff --git a/src/VecSim/spaces/functions/implementation_chooser.h b/src/VecSim/spaces/functions/implementation_chooser.h
index 6bb61815e..b32ad56c6 100644
--- a/src/VecSim/spaces/functions/implementation_chooser.h
+++ b/src/VecSim/spaces/functions/implementation_chooser.h
@@ -25,23 +25,28 @@
 // of 4N, 4N+1, 4N+2, 4N+3.
 #define C4(X, func, N) X(4 * N, func) X(4 * N + 1, func) X(4 * N + 2, func) X(4 * N + 3, func)
 
-// Macros for 8, 16 and 32 cases. Used to collapse the switch statement. Expands into 0-31, 0-15 or
-// 0-7 cases.
+// Macros for 8, 16, 32 and 64 cases. Used to collapse the switch statement. Expands into 0-63,
+// 0-31, 0-15 or 0-7 cases.
 #define CASES32(X, func)                                                                           \
     C4(X, func, 0)                                                                                 \
     C4(X, func, 1)                                                                                 \
     C4(X, func, 2) C4(X, func, 3) C4(X, func, 4) C4(X, func, 5) C4(X, func, 6) C4(X, func, 7)
 #define CASES16(X, func) C4(X, func, 0) C4(X, func, 1) C4(X, func, 2) C4(X, func, 3)
 #define CASES8(X, func)  C4(X, func, 0) C4(X, func, 1)
+#define CASES64(X, func)                                                                           \
+    CASES32(X, func)                                                                               \
+    C4(X, func, 8)                                                                                 \
+    C4(X, func, 9)                                                                                 \
+    C4(X, func, 10) C4(X, func, 11) C4(X, func, 12) C4(X, func, 13) C4(X, func, 14) C4(X, func, 15)
 
 // Main macro. Expands into a switch statement that chooses the implementation based on the
 // dimension's remainder.
 // @params:
 // out:     The output variable that will be set to the chosen implementation.
 // dim:     The dimension.
-// chunk:   The chunk size. Can be 32, 16 or 8. 32 for 16-bit elements, 16 for 32-bit elements, 8
-// for 64-bit elements.
-// func:    The templated function that we want to choose the implementation for.
+// chunk:   The chunk size. Can be 64, 32, 16 or 8. 64 for 8-bit elements, 32 for 16-bit elements,
+// 16 for 32-bit elements, 8 for 64-bit elements. func:    The templated function that we want to
+// choose the implementation for.
 #define CHOOSE_IMPLEMENTATION(out, dim, chunk, func)                                               \
     do {                                                                                           \
         decltype(out) __ret_dist_func;                                                             \

From 0d07c5d672b1355258a5deac68faafe0a200610f Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Tue, 10 Dec 2024 10:38:09 +0000
Subject: [PATCH 16/33] add int8 to bm

---
 tests/benchmark/benchmarks.sh | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/benchmark/benchmarks.sh b/tests/benchmark/benchmarks.sh
index 2e6664424..867077ede 100755
--- a/tests/benchmark/benchmarks.sh
+++ b/tests/benchmark/benchmarks.sh
@@ -13,6 +13,7 @@ if [ -z "$BM_TYPE"  ] || [ "$BM_TYPE" = "benchmarks-all" ]; then
     echo spaces_fp64
     echo spaces_bf16
     echo spaces_fp16
+    echo spaces_int8
 elif [ "$BM_TYPE" = "benchmarks-default" ]; then
     echo basics_single_fp32
     echo basics_multi_fp32
@@ -20,6 +21,7 @@ elif [ "$BM_TYPE" = "benchmarks-default" ]; then
     echo spaces_fp64
     echo spaces_bf16
     echo spaces_fp16
+    echo spaces_int8
 # Basic benchmarks
 elif [ "$BM_TYPE" = "bm-basics-fp32-single" ] ; then
     echo basics_single_fp32
@@ -66,6 +68,7 @@ elif [ "$BM_TYPE" = "bm-spaces" ] ; then
     echo spaces_fp16
     echo spaces_fp64
     echo spaces_bf16
+    echo spaces_int8
 elif [ "$BM_TYPE" = "bm-spaces-fp32" ] ; then
     echo spaces_fp32
 elif [ "$BM_TYPE" = "bm-spaces-fp64" ] ; then

From 3586a76f8d7137404a0fa3370bb89a2241aa9e24 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Tue, 10 Dec 2024 10:40:58 +0000
Subject: [PATCH 17/33] reanme to simd64

---
 src/VecSim/spaces/IP/IP_AVX512F_BW_VL_VNNI_INT8.h  | 4 ++--
 src/VecSim/spaces/L2/L2_AVX512F_BW_VL_VNNI_INT8.h  | 2 +-
 src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.cpp | 6 +++---
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/VecSim/spaces/IP/IP_AVX512F_BW_VL_VNNI_INT8.h b/src/VecSim/spaces/IP/IP_AVX512F_BW_VL_VNNI_INT8.h
index 28187bf31..ffda357ff 100644
--- a/src/VecSim/spaces/IP/IP_AVX512F_BW_VL_VNNI_INT8.h
+++ b/src/VecSim/spaces/IP/IP_AVX512F_BW_VL_VNNI_INT8.h
@@ -60,13 +60,13 @@ static inline int INT8_InnerProductImp(const void *pVect1v, const void *pVect2v,
 }
 
 template <unsigned char residual> // 0..32
-float INT8_InnerProductSIMD32_AVX512F_BW_VL_VNNI(const void *pVect1v, const void *pVect2v,
+float INT8_InnerProductSIMD64_AVX512F_BW_VL_VNNI(const void *pVect1v, const void *pVect2v,
                                                  size_t dimension) {
 
     return 1 - INT8_InnerProductImp<residual>(pVect1v, pVect2v, dimension);
 }
 template <unsigned char residual> // 0..32
-float INT8_CosineSIMD32_AVX512F_BW_VL_VNNI(const void *pVect1v, const void *pVect2v,
+float INT8_CosineSIMD64_AVX512F_BW_VL_VNNI(const void *pVect1v, const void *pVect2v,
                                            size_t dimension) {
     float norm_v1 = *(float *)((int8_t *)pVect1v + dimension);
     float norm_v2 = *(float *)((int8_t *)pVect2v + dimension);
diff --git a/src/VecSim/spaces/L2/L2_AVX512F_BW_VL_VNNI_INT8.h b/src/VecSim/spaces/L2/L2_AVX512F_BW_VL_VNNI_INT8.h
index d47964ca2..3f4ba33a6 100644
--- a/src/VecSim/spaces/L2/L2_AVX512F_BW_VL_VNNI_INT8.h
+++ b/src/VecSim/spaces/L2/L2_AVX512F_BW_VL_VNNI_INT8.h
@@ -24,7 +24,7 @@ static inline void L2SqrStep(int8_t *&pVect1, int8_t *&pVect2, __m512i &sum) {
 }
 
 template <unsigned char residual> // 0..64
-float INT8_L2SqrSIMD32_AVX512F_BW_VL_VNNI(const void *pVect1v, const void *pVect2v,
+float INT8_L2SqrSIMD64_AVX512F_BW_VL_VNNI(const void *pVect1v, const void *pVect2v,
                                           size_t dimension) {
     int8_t *pVect1 = (int8_t *)pVect1v;
     int8_t *pVect2 = (int8_t *)pVect2v;
diff --git a/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.cpp b/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.cpp
index 599984954..661c2c945 100644
--- a/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.cpp
+++ b/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.cpp
@@ -15,19 +15,19 @@ namespace spaces {
 
 dist_func_t<float> Choose_INT8_L2_implementation_AVX512F_BW_VL_VNNI(size_t dim) {
     dist_func_t<float> ret_dist_func;
-    CHOOSE_IMPLEMENTATION(ret_dist_func, dim, 64, INT8_L2SqrSIMD32_AVX512F_BW_VL_VNNI);
+    CHOOSE_IMPLEMENTATION(ret_dist_func, dim, 64, INT8_L2SqrSIMD64_AVX512F_BW_VL_VNNI);
     return ret_dist_func;
 }
 
 dist_func_t<float> Choose_INT8_IP_implementation_AVX512F_BW_VL_VNNI(size_t dim) {
     dist_func_t<float> ret_dist_func;
-    CHOOSE_IMPLEMENTATION(ret_dist_func, dim, 64, INT8_InnerProductSIMD32_AVX512F_BW_VL_VNNI);
+    CHOOSE_IMPLEMENTATION(ret_dist_func, dim, 64, INT8_InnerProductSIMD64_AVX512F_BW_VL_VNNI);
     return ret_dist_func;
 }
 
 dist_func_t<float> Choose_INT8_Cosine_implementation_AVX512F_BW_VL_VNNI(size_t dim) {
     dist_func_t<float> ret_dist_func;
-    CHOOSE_IMPLEMENTATION(ret_dist_func, dim, 64, INT8_CosineSIMD32_AVX512F_BW_VL_VNNI);
+    CHOOSE_IMPLEMENTATION(ret_dist_func, dim, 64, INT8_CosineSIMD64_AVX512F_BW_VL_VNNI);
     return ret_dist_func;
 }
 

From adbc4d7e1be2e6b6efeb6d82b7e342e52a174c5e Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Tue, 10 Dec 2024 16:32:56 +0000
Subject: [PATCH 18/33] convert to int before multiplication

---
 tests/utils/tests_utils.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/utils/tests_utils.h b/tests/utils/tests_utils.h
index 31dc3d9ef..01461a78d 100644
--- a/tests/utils/tests_utils.h
+++ b/tests/utils/tests_utils.h
@@ -23,7 +23,8 @@ static void populate_int8_vec(int8_t *v, size_t dim, int seed = 1234) {
 float compute_norm(const int8_t *vec, size_t dim) {
     int norm = 0;
     for (size_t i = 0; i < dim; i++) {
-        norm += vec[i] * vec[i];
+        int val = static_cast<int>(vec[i]);
+        norm += val * val;
     }
     return sqrt(norm);
 }

From 03be8544ac54130eb522258ffcb520e472b4dd86 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Tue, 10 Dec 2024 16:29:36 +0000
Subject: [PATCH 19/33] introduce IntegralType_ComputeNorm

---
 src/VecSim/spaces/normalize/compute_norm.h | 32 ++++++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100644 src/VecSim/spaces/normalize/compute_norm.h

diff --git a/src/VecSim/spaces/normalize/compute_norm.h b/src/VecSim/spaces/normalize/compute_norm.h
new file mode 100644
index 000000000..89b3b7d5c
--- /dev/null
+++ b/src/VecSim/spaces/normalize/compute_norm.h
@@ -0,0 +1,32 @@
+/*
+ *Copyright Redis Ltd. 2021 - present
+ *Licensed under your choice of the Redis Source Available License 2.0 (RSALv2) or
+ *the Server Side Public License v1 (SSPLv1).
+ */
+
+#pragma once
+
+#include "VecSim/types/bfloat16.h"
+#include "VecSim/types/float16.h"
+#include <cmath>
+#include <vector>
+
+using bfloat16 = vecsim_types::bfloat16;
+using float16 = vecsim_types::float16;
+
+namespace spaces {
+
+template <typename DataType>
+static inline float IntegralType_ComputeNorm(const DataType *vec, const size_t dim) {
+    int sum = 0;
+
+    for (size_t i = 0; i < dim; i++) {
+        int val = static_cast<int>(vec[i]);
+        sum += val * val;
+    }
+    float norm = sqrt(sum);
+}
+
+
+
+} // namespace spaces

From a26e8c998bac268b591f43c98aeefd4553b4ea36 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Thu, 12 Dec 2024 12:00:46 +0000
Subject: [PATCH 20/33] move preprocessor logic to choose if cosine
 preprocessor is needed to CreateIndexComponents:

pass bool is_normalized
get distnce function according to original metric
get pp according to is_normalized && metric == VecSimMetric_Cosine, and remove this logic from the indexes factories.

add dataSize member to AbstractIndexInitParams
add VecSimType_INT8 type

introduce VecSimParams_GetDataSize: returns datasize

introduce and implement GetNormalizeFunc<int8_t> thtat returns int8_normalizeVector
int8_normalizeVector computes the norm and stores it at the emd of argument vector.
---
 .../index_factories/brute_force_factory.cpp   | 19 ++++------
 .../components/components_factory.h           | 14 ++++++--
 src/VecSim/index_factories/hnsw_factory.cpp   | 36 +++++++------------
 src/VecSim/index_factories/tiered_factory.cpp |  3 ++
 src/VecSim/spaces/normalize/compute_norm.h    | 15 +++-----
 src/VecSim/spaces/normalize/normalize_naive.h | 10 ++++++
 src/VecSim/spaces/spaces.cpp                  |  6 ++++
 src/VecSim/utils/vec_utils.cpp                | 13 +++++++
 src/VecSim/utils/vec_utils.h                  |  4 +++
 src/VecSim/vec_sim_common.h                   |  1 +
 src/VecSim/vec_sim_index.h                    |  5 ++-
 src/python_bindings/bindings.cpp              |  1 +
 tests/unit/CMakeLists.txt                     |  2 +-
 tests/unit/test_normalize.cpp                 | 21 +++++++++++
 14 files changed, 99 insertions(+), 51 deletions(-)
 create mode 100644 tests/unit/test_normalize.cpp

diff --git a/src/VecSim/index_factories/brute_force_factory.cpp b/src/VecSim/index_factories/brute_force_factory.cpp
index 8d777cf64..04a33fd79 100644
--- a/src/VecSim/index_factories/brute_force_factory.cpp
+++ b/src/VecSim/index_factories/brute_force_factory.cpp
@@ -33,10 +33,12 @@ inline VecSimIndex *NewIndex_ChooseMultiOrSingle(const BFParams *params,
 static AbstractIndexInitParams NewAbstractInitParams(const VecSimParams *params) {
 
     const BFParams *bfParams = &params->algoParams.bfParams;
+    size_t dataSize = VecSimParams_GetDataSize(bfParams->type, bfParams->dim, bfParams->metric);
     AbstractIndexInitParams abstractInitParams = {.allocator =
                                                       VecSimAllocator::newVecsimAllocator(),
                                                   .dim = bfParams->dim,
                                                   .vecType = bfParams->type,
+                                                  .dataSize = dataSize,
                                                   .metric = bfParams->metric,
                                                   .blockSize = bfParams->blockSize,
                                                   .multi = bfParams->multi,
@@ -52,30 +54,23 @@ VecSimIndex *NewIndex(const VecSimParams *params, bool is_normalized) {
 
 VecSimIndex *NewIndex(const BFParams *bfparams, const AbstractIndexInitParams &abstractInitParams,
                       bool is_normalized) {
-    // If the index metric is Cosine, and is_normalized == true, we will skip normalizing vectors
-    // and query blobs.
-    VecSimMetric metric;
-    if (is_normalized && bfparams->metric == VecSimMetric_Cosine) {
-        metric = VecSimMetric_IP;
-    } else {
-        metric = bfparams->metric;
-    }
+
     if (bfparams->type == VecSimType_FLOAT32) {
         IndexComponents<float, float> indexComponents = CreateIndexComponents<float, float>(
-            abstractInitParams.allocator, metric, bfparams->dim);
+            abstractInitParams.allocator, bfparams->metric, bfparams->dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<float>(bfparams, abstractInitParams, indexComponents);
     } else if (bfparams->type == VecSimType_FLOAT64) {
         IndexComponents<double, double> indexComponents = CreateIndexComponents<double, double>(
-            abstractInitParams.allocator, metric, bfparams->dim);
+            abstractInitParams.allocator, bfparams->metric, bfparams->dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<double>(bfparams, abstractInitParams, indexComponents);
     } else if (bfparams->type == VecSimType_BFLOAT16) {
         IndexComponents<bfloat16, float> indexComponents = CreateIndexComponents<bfloat16, float>(
-            abstractInitParams.allocator, metric, bfparams->dim);
+            abstractInitParams.allocator, bfparams->metric, bfparams->dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<bfloat16, float>(bfparams, abstractInitParams,
                                                              indexComponents);
     } else if (bfparams->type == VecSimType_FLOAT16) {
         IndexComponents<float16, float> indexComponents = CreateIndexComponents<float16, float>(
-            abstractInitParams.allocator, metric, bfparams->dim);
+            abstractInitParams.allocator, bfparams->metric, bfparams->dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<float16, float>(bfparams, abstractInitParams,
                                                             indexComponents);
     }
diff --git a/src/VecSim/index_factories/components/components_factory.h b/src/VecSim/index_factories/components/components_factory.h
index 5846192e4..3afe0f820 100644
--- a/src/VecSim/index_factories/components/components_factory.h
+++ b/src/VecSim/index_factories/components/components_factory.h
@@ -14,14 +14,24 @@
 
 template <typename DataType, typename DistType>
 IndexComponents<DataType, DistType>
-CreateIndexComponents(std::shared_ptr<VecSimAllocator> allocator, VecSimMetric metric, size_t dim) {
+CreateIndexComponents(std::shared_ptr<VecSimAllocator> allocator, VecSimMetric metric, size_t dim,
+                      bool is_normalized) {
     unsigned char alignment = 0;
     spaces::dist_func_t<DistType> distFunc =
         spaces::GetDistFunc<DataType, DistType>(metric, dim, &alignment);
     // Currently we have only one distance calculator implementation
     auto indexCalculator = new (allocator) DistanceCalculatorCommon<DistType>(allocator, distFunc);
 
-    PreprocessorsContainerParams ppParams = {.metric = metric, .dim = dim, .alignment = alignment};
+    // If the index metric is Cosine, and is_normalized == true, we will skip normalizing vectors
+    // and query blobs.
+    VecSimMetric pp_metric;
+    if (is_normalized && metric == VecSimMetric_Cosine) {
+        pp_metric = VecSimMetric_IP;
+    } else {
+        pp_metric = metric;
+    }
+    PreprocessorsContainerParams ppParams = {
+        .metric = pp_metric, .dim = dim, .alignment = alignment};
     auto preprocessors = CreatePreprocessorsContainer<DataType>(allocator, ppParams);
 
     return {indexCalculator, preprocessors};
diff --git a/src/VecSim/index_factories/hnsw_factory.cpp b/src/VecSim/index_factories/hnsw_factory.cpp
index 3c70c7964..3e3dad32b 100644
--- a/src/VecSim/index_factories/hnsw_factory.cpp
+++ b/src/VecSim/index_factories/hnsw_factory.cpp
@@ -33,10 +33,14 @@ NewIndex_ChooseMultiOrSingle(const HNSWParams *params,
 
 static AbstractIndexInitParams NewAbstractInitParams(const VecSimParams *params) {
     const HNSWParams *hnswParams = &params->algoParams.hnswParams;
+
+    size_t dataSize =
+        VecSimParams_GetDataSize(hnswParams->type, hnswParams->dim, hnswParams->metric);
     AbstractIndexInitParams abstractInitParams = {.allocator =
                                                       VecSimAllocator::newVecsimAllocator(),
                                                   .dim = hnswParams->dim,
                                                   .vecType = hnswParams->type,
+                                                  .dataSize = dataSize,
                                                   .metric = hnswParams->metric,
                                                   .blockSize = hnswParams->blockSize,
                                                   .multi = hnswParams->multi,
@@ -48,34 +52,25 @@ VecSimIndex *NewIndex(const VecSimParams *params, bool is_normalized) {
     const HNSWParams *hnswParams = &params->algoParams.hnswParams;
     AbstractIndexInitParams abstractInitParams = NewAbstractInitParams(params);
 
-    // If the index metric is Cosine, and is_normalized == true, we will skip normalizing vectors
-    // and query blobs.
-    VecSimMetric metric;
-    if (is_normalized && hnswParams->metric == VecSimMetric_Cosine) {
-        metric = VecSimMetric_IP;
-    } else {
-        metric = hnswParams->metric;
-    }
-
     if (hnswParams->type == VecSimType_FLOAT32) {
         IndexComponents<float, float> indexComponents = CreateIndexComponents<float, float>(
-            abstractInitParams.allocator, metric, hnswParams->dim);
+            abstractInitParams.allocator, hnswParams->metric, hnswParams->dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<float>(hnswParams, abstractInitParams, indexComponents);
 
     } else if (hnswParams->type == VecSimType_FLOAT64) {
         IndexComponents<double, double> indexComponents = CreateIndexComponents<double, double>(
-            abstractInitParams.allocator, metric, hnswParams->dim);
+            abstractInitParams.allocator, hnswParams->metric, hnswParams->dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<double>(hnswParams, abstractInitParams,
                                                     indexComponents);
 
     } else if (hnswParams->type == VecSimType_BFLOAT16) {
         IndexComponents<bfloat16, float> indexComponents = CreateIndexComponents<bfloat16, float>(
-            abstractInitParams.allocator, metric, hnswParams->dim);
+            abstractInitParams.allocator, hnswParams->metric, hnswParams->dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<bfloat16, float>(hnswParams, abstractInitParams,
                                                              indexComponents);
     } else if (hnswParams->type == VecSimType_FLOAT16) {
         IndexComponents<float16, float> indexComponents = CreateIndexComponents<float16, float>(
-            abstractInitParams.allocator, metric, hnswParams->dim);
+            abstractInitParams.allocator, hnswParams->metric, hnswParams->dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<float16, float>(hnswParams, abstractInitParams,
                                                             indexComponents);
     }
@@ -203,32 +198,25 @@ VecSimIndex *NewIndex(const std::string &location, bool is_normalized) {
     VecSimParams vecsimParams = {.algo = VecSimAlgo_HNSWLIB,
                                  .algoParams = {.hnswParams = HNSWParams{params}}};
 
-    VecSimMetric metric;
-    if (is_normalized && params.metric == VecSimMetric_Cosine) {
-        metric = VecSimMetric_IP;
-    } else {
-        metric = params.metric;
-    }
-
     AbstractIndexInitParams abstractInitParams = NewAbstractInitParams(&vecsimParams);
     if (params.type == VecSimType_FLOAT32) {
         IndexComponents<float, float> indexComponents = CreateIndexComponents<float, float>(
-            abstractInitParams.allocator, metric, abstractInitParams.dim);
+            abstractInitParams.allocator, params.metric, abstractInitParams.dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<float>(input, &params, abstractInitParams,
                                                    indexComponents, version);
     } else if (params.type == VecSimType_FLOAT64) {
         IndexComponents<double, double> indexComponents = CreateIndexComponents<double, double>(
-            abstractInitParams.allocator, metric, abstractInitParams.dim);
+            abstractInitParams.allocator, params.metric, abstractInitParams.dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<double>(input, &params, abstractInitParams,
                                                     indexComponents, version);
     } else if (params.type == VecSimType_BFLOAT16) {
         IndexComponents<bfloat16, float> indexComponents = CreateIndexComponents<bfloat16, float>(
-            abstractInitParams.allocator, metric, abstractInitParams.dim);
+            abstractInitParams.allocator, params.metric, abstractInitParams.dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<bfloat16, float>(input, &params, abstractInitParams,
                                                              indexComponents, version);
     } else if (params.type == VecSimType_FLOAT16) {
         IndexComponents<float16, float> indexComponents = CreateIndexComponents<float16, float>(
-            abstractInitParams.allocator, metric, abstractInitParams.dim);
+            abstractInitParams.allocator, params.metric, abstractInitParams.dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<float16, float>(input, &params, abstractInitParams,
                                                             indexComponents, version);
     } else {
diff --git a/src/VecSim/index_factories/tiered_factory.cpp b/src/VecSim/index_factories/tiered_factory.cpp
index d56f3e4a0..1db14f230 100644
--- a/src/VecSim/index_factories/tiered_factory.cpp
+++ b/src/VecSim/index_factories/tiered_factory.cpp
@@ -34,9 +34,12 @@ inline VecSimIndex *NewIndex(const TieredIndexParams *params) {
                           .blockSize = params->primaryIndexParams->algoParams.hnswParams.blockSize};
 
     std::shared_ptr<VecSimAllocator> flat_allocator = VecSimAllocator::newVecsimAllocator();
+    size_t dataSize = VecSimParams_GetDataSize(bf_params.type, bf_params.dim, bf_params.metric);
+
     AbstractIndexInitParams abstractInitParams = {.allocator = flat_allocator,
                                                   .dim = bf_params.dim,
                                                   .vecType = bf_params.type,
+                                                  .dataSize = dataSize,
                                                   .metric = bf_params.metric,
                                                   .blockSize = bf_params.blockSize,
                                                   .multi = bf_params.multi,
diff --git a/src/VecSim/spaces/normalize/compute_norm.h b/src/VecSim/spaces/normalize/compute_norm.h
index 89b3b7d5c..d58139648 100644
--- a/src/VecSim/spaces/normalize/compute_norm.h
+++ b/src/VecSim/spaces/normalize/compute_norm.h
@@ -6,13 +6,7 @@
 
 #pragma once
 
-#include "VecSim/types/bfloat16.h"
-#include "VecSim/types/float16.h"
 #include <cmath>
-#include <vector>
-
-using bfloat16 = vecsim_types::bfloat16;
-using float16 = vecsim_types::float16;
 
 namespace spaces {
 
@@ -21,12 +15,11 @@ static inline float IntegralType_ComputeNorm(const DataType *vec, const size_t d
     int sum = 0;
 
     for (size_t i = 0; i < dim; i++) {
-        int val = static_cast<int>(vec[i]);
-        sum += val * val;
+        // No need to cast to int because c++ integer promotion ensures vec[i] is promoted to int
+        // before multiplication.
+        sum += vec[i] * vec[i];
     }
-    float norm = sqrt(sum);
+    return sqrt(sum);
 }
 
-
-
 } // namespace spaces
diff --git a/src/VecSim/spaces/normalize/normalize_naive.h b/src/VecSim/spaces/normalize/normalize_naive.h
index 119c19dcf..2264b7da7 100644
--- a/src/VecSim/spaces/normalize/normalize_naive.h
+++ b/src/VecSim/spaces/normalize/normalize_naive.h
@@ -8,6 +8,7 @@
 
 #include "VecSim/types/bfloat16.h"
 #include "VecSim/types/float16.h"
+#include "compute_norm.h"
 #include <cmath>
 #include <vector>
 
@@ -73,4 +74,13 @@ static inline void float16_normalizeVector(void *vec, const size_t dim) {
     }
 }
 
+static inline void int8_normalizeVector(void *vec, const size_t dim) {
+    int8_t *input_vector = (int8_t *)vec;
+
+    float norm = IntegralType_ComputeNorm<int8_t>(input_vector, dim);
+
+    // Store norm at the end of the vector.
+    *(float *)(input_vector + dim) = norm;
+}
+
 } // namespace spaces
diff --git a/src/VecSim/spaces/spaces.cpp b/src/VecSim/spaces/spaces.cpp
index b512c9750..a169cf494 100644
--- a/src/VecSim/spaces/spaces.cpp
+++ b/src/VecSim/spaces/spaces.cpp
@@ -107,4 +107,10 @@ normalizeVector_f<vecsim_types::float16> GetNormalizeFunc<vecsim_types::float16>
     return float16_normalizeVector;
 }
 
+/** The returned function computes the norm and stores it at the end of the given vector */
+template <>
+normalizeVector_f<int8_t> GetNormalizeFunc<int8_t>(void) {
+    return int8_normalizeVector;
+}
+
 } // namespace spaces
diff --git a/src/VecSim/utils/vec_utils.cpp b/src/VecSim/utils/vec_utils.cpp
index 99160c247..cbe61338b 100644
--- a/src/VecSim/utils/vec_utils.cpp
+++ b/src/VecSim/utils/vec_utils.cpp
@@ -27,6 +27,7 @@ const char *VecSimCommonStrings::FLOAT32_STRING = "FLOAT32";
 const char *VecSimCommonStrings::FLOAT64_STRING = "FLOAT64";
 const char *VecSimCommonStrings::BFLOAT16_STRING = "BFLOAT16";
 const char *VecSimCommonStrings::FLOAT16_STRING = "FLOAT16";
+const char *VecSimCommonStrings::INT8_STRING = "INT8";
 const char *VecSimCommonStrings::INT32_STRING = "INT32";
 const char *VecSimCommonStrings::INT64_STRING = "INT64";
 
@@ -147,6 +148,8 @@ const char *VecSimType_ToString(VecSimType vecsimType) {
         return VecSimCommonStrings::BFLOAT16_STRING;
     case VecSimType_FLOAT16:
         return VecSimCommonStrings::FLOAT16_STRING;
+    case VecSimType_INT8:
+        return VecSimCommonStrings::INT8_STRING;
     case VecSimType_INT32:
         return VecSimCommonStrings::INT32_STRING;
     case VecSimType_INT64:
@@ -195,6 +198,8 @@ size_t VecSimType_sizeof(VecSimType type) {
         return sizeof(bfloat16);
     case VecSimType_FLOAT16:
         return sizeof(float16);
+    case VecSimType_INT8:
+        return sizeof(int8_t);
     case VecSimType_INT32:
         return sizeof(int32_t);
     case VecSimType_INT64:
@@ -202,3 +207,11 @@ size_t VecSimType_sizeof(VecSimType type) {
     }
     return 0;
 }
+
+size_t VecSimParams_GetDataSize(VecSimType type, size_t dim, VecSimMetric metric) {
+    size_t dataSize = VecSimType_sizeof(type) * dim;
+    if (type == VecSimType_INT8 && metric == VecSimMetric_Cosine) {
+        dataSize += sizeof(float); // For the norm
+    }
+    return dataSize;
+}
diff --git a/src/VecSim/utils/vec_utils.h b/src/VecSim/utils/vec_utils.h
index abb0c5688..18a5d1db3 100644
--- a/src/VecSim/utils/vec_utils.h
+++ b/src/VecSim/utils/vec_utils.h
@@ -27,6 +27,7 @@ struct VecSimCommonStrings {
     static const char *FLOAT64_STRING;
     static const char *BFLOAT16_STRING;
     static const char *FLOAT16_STRING;
+    static const char *INT8_STRING;
     static const char *INT32_STRING;
     static const char *INT64_STRING;
 
@@ -90,3 +91,6 @@ const char *VecSimMetric_ToString(VecSimMetric vecsimMetric);
 const char *VecSimSearchMode_ToString(VecSearchMode vecsimSearchMode);
 
 size_t VecSimType_sizeof(VecSimType vecsimType);
+
+/** Returns the size in bytes of a stored or query vector */
+size_t VecSimParams_GetDataSize(VecSimType type, size_t dim, VecSimMetric metric);
diff --git a/src/VecSim/vec_sim_common.h b/src/VecSim/vec_sim_common.h
index 943338dee..e8062484a 100644
--- a/src/VecSim/vec_sim_common.h
+++ b/src/VecSim/vec_sim_common.h
@@ -36,6 +36,7 @@ typedef enum {
     VecSimType_FLOAT64,
     VecSimType_BFLOAT16,
     VecSimType_FLOAT16,
+    VecSimType_INT8,
     VecSimType_INT32,
     VecSimType_INT64
 } VecSimType;
diff --git a/src/VecSim/vec_sim_index.h b/src/VecSim/vec_sim_index.h
index 669b6a7bd..51e4fce71 100644
--- a/src/VecSim/vec_sim_index.h
+++ b/src/VecSim/vec_sim_index.h
@@ -25,6 +25,7 @@
  * @param allocator The allocator to use for the index.
  * @param dim The dimension of the vectors in the index.
  * @param vecType The type of the vectors in the index.
+ * @param dataSize The size of stored vectors in bytes.
  * @param metric The metric to use in the index.
  * @param blockSize The block size to use in the index.
  * @param multi Determines if the index should multi-index or not.
@@ -34,6 +35,7 @@ struct AbstractIndexInitParams {
     std::shared_ptr<VecSimAllocator> allocator;
     size_t dim;
     VecSimType vecType;
+    size_t dataSize;
     VecSimMetric metric;
     size_t blockSize;
     bool multi;
@@ -102,12 +104,13 @@ struct VecSimIndexAbstract : public VecSimIndexInterface {
     VecSimIndexAbstract(const AbstractIndexInitParams &params,
                         const IndexComponents<DataType, DistType> &components)
         : VecSimIndexInterface(params.allocator), dim(params.dim), vecType(params.vecType),
-          dataSize(dim * VecSimType_sizeof(vecType)), metric(params.metric),
+          dataSize(params.dataSize), metric(params.metric),
           blockSize(params.blockSize ? params.blockSize : DEFAULT_BLOCK_SIZE),
           indexCalculator(components.indexCalculator), preprocessors(components.preprocessors),
           alignment(preprocessors->getAlignment()), lastMode(EMPTY_MODE), isMulti(params.multi),
           logCallbackCtx(params.logCtx), normalize_func(spaces::GetNormalizeFunc<DataType>()) {
         assert(VecSimType_sizeof(vecType));
+        assert(dataSize);
     }
 
     /**
diff --git a/src/python_bindings/bindings.cpp b/src/python_bindings/bindings.cpp
index 13215d6a4..b72670fb8 100644
--- a/src/python_bindings/bindings.cpp
+++ b/src/python_bindings/bindings.cpp
@@ -534,6 +534,7 @@ PYBIND11_MODULE(VecSim, m) {
         .value("VecSimType_FLOAT64", VecSimType_FLOAT64)
         .value("VecSimType_BFLOAT16", VecSimType_BFLOAT16)
         .value("VecSimType_FLOAT16", VecSimType_FLOAT16)
+        .value("VecSimType_INT8", VecSimType_INT8)
         .value("VecSimType_INT32", VecSimType_INT32)
         .value("VecSimType_INT64", VecSimType_INT64)
         .export_values();
diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt
index caa3fc522..fb720f264 100644
--- a/tests/unit/CMakeLists.txt
+++ b/tests/unit/CMakeLists.txt
@@ -34,7 +34,7 @@ add_executable(test_hnsw ../utils/mock_thread_pool.cpp test_hnsw.cpp test_hnsw_m
 add_executable(test_hnsw_parallel test_hnsw_parallel.cpp unit_test_utils.cpp)
 add_executable(test_bruteforce test_bruteforce.cpp test_bruteforce_multi.cpp unit_test_utils.cpp)
 add_executable(test_allocator test_allocator.cpp unit_test_utils.cpp)
-add_executable(test_spaces test_spaces.cpp)
+add_executable(test_spaces test_spaces.cpp test_normalize.cpp)
 add_executable(test_types test_types.cpp)
 add_executable(test_common ../utils/mock_thread_pool.cpp unit_test_utils.cpp test_common.cpp)
 add_executable(test_bf16 ../utils/mock_thread_pool.cpp test_bf16.cpp unit_test_utils.cpp)
diff --git a/tests/unit/test_normalize.cpp b/tests/unit/test_normalize.cpp
new file mode 100644
index 000000000..568b58b7a
--- /dev/null
+++ b/tests/unit/test_normalize.cpp
@@ -0,0 +1,21 @@
+/*
+ *Copyright Redis Ltd. 2021 - present
+ *Licensed under your choice of the Redis Source Available License 2.0 (RSALv2) or
+ *the Server Side Public License v1 (SSPLv1).
+ */
+
+#include <random> //TODO: remove once callinng populate_int8_vec
+
+#include "gtest/gtest.h"
+#include "VecSim/spaces/normalize/compute_norm.h"
+class NormalizeTest : public ::testing::Test {};
+
+TEST_F(NormalizeTest, TestINT8ComputeNorm) {
+    size_t dim = 4;
+    int8_t v[] = {-68, -100, 24, 127};
+    float expected_norm = 177.0; // manually calculated
+
+    float norm = spaces::IntegralType_ComputeNorm<int8_t>(v, dim);
+
+    ASSERT_EQ(norm, expected_norm);
+}

From c32e4fb971c9498079787ab16396212c10dcbdaf Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Thu, 12 Dec 2024 17:41:48 +0000
Subject: [PATCH 21/33] add int8 tests

---
 tests/unit/CMakeLists.txt |  3 ++
 tests/unit/test_int8.cpp  | 74 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 77 insertions(+)
 create mode 100644 tests/unit/test_int8.cpp

diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt
index fb720f264..82c831e73 100644
--- a/tests/unit/CMakeLists.txt
+++ b/tests/unit/CMakeLists.txt
@@ -39,6 +39,7 @@ add_executable(test_types test_types.cpp)
 add_executable(test_common ../utils/mock_thread_pool.cpp unit_test_utils.cpp test_common.cpp)
 add_executable(test_bf16 ../utils/mock_thread_pool.cpp test_bf16.cpp unit_test_utils.cpp)
 add_executable(test_fp16 ../utils/mock_thread_pool.cpp test_fp16.cpp unit_test_utils.cpp)
+add_executable(test_int8 test_int8.cpp unit_test_utils.cpp)
 
 target_link_libraries(test_hnsw PUBLIC gtest_main VectorSimilarity)
 target_link_libraries(test_hnsw_parallel PUBLIC gtest_main VectorSimilarity)
@@ -49,6 +50,7 @@ target_link_libraries(test_common PUBLIC gtest_main VectorSimilarity)
 target_link_libraries(test_types PUBLIC gtest_main VectorSimilarity)
 target_link_libraries(test_bf16 PUBLIC gtest_main VectorSimilarity)
 target_link_libraries(test_fp16 PUBLIC gtest_main VectorSimilarity)
+target_link_libraries(test_int8 PUBLIC gtest_main VectorSimilarity)
 
 include(GoogleTest)
 
@@ -61,3 +63,4 @@ gtest_discover_tests(test_common)
 gtest_discover_tests(test_types)
 gtest_discover_tests(test_bf16 TEST_PREFIX BF16UNIT_)
 gtest_discover_tests(test_fp16 TEST_PREFIX FP16UNIT_)
+gtest_discover_tests(test_int8 TEST_PREFIX INT8UNIT_)
diff --git a/tests/unit/test_int8.cpp b/tests/unit/test_int8.cpp
new file mode 100644
index 000000000..14ad219d2
--- /dev/null
+++ b/tests/unit/test_int8.cpp
@@ -0,0 +1,74 @@
+#include "gtest/gtest.h"
+#include "VecSim/vec_sim.h"
+#include "VecSim/algorithms/hnsw/hnsw_single.h"
+// #include "VecSim/index_factories/hnsw_factory.h"
+#include "tests_utils.h"
+#include "test_utils.h"
+// #include "VecSim/utils/serializer.h"
+// #include "mock_thread_pool.h"
+// #include "VecSim/query_result_definitions.h"
+// #include "VecSim/types/float16.h"
+// #include "VecSim/vec_sim_debug.h"
+// #include "VecSim/spaces/L2/L2.h"
+
+class INT8Test : public ::testing::Test {
+protected:
+    virtual void SetUp(HNSWParams &params) {
+        FAIL() << "INT8Test::SetUp(HNSWParams) this method should be overriden";
+    }
+
+    virtual void TearDown() { VecSimIndex_Free(index); }
+
+    virtual const void *GetDataByInternalId(idType id) = 0;
+
+    template <typename algo_t>
+    algo_t *CastIndex() {
+        return dynamic_cast<algo_t *>(index);
+    }
+
+    void GenerateVector(int8_t *out_vec) { test_utils::populate_int8_vec(out_vec, dim); }
+
+    int GenerateAndAddVector(size_t id) {
+        int8_t v[dim];
+        GenerateVector(v);
+        return VecSimIndex_AddVector(index, v, id);
+    }
+    template <typename params_t>
+    void create_index_test(params_t index_params);
+
+    VecSimIndex *index;
+    size_t dim;
+};
+
+class INT8HNSWTest : public INT8Test {
+protected:
+    virtual void SetUp(HNSWParams &params) override {
+        params.type = VecSimType_INT8;
+        VecSimParams vecsim_params = CreateParams(params);
+        index = VecSimIndex_New(&vecsim_params);
+        dim = params.dim;
+    }
+
+    virtual const void *GetDataByInternalId(idType id) override {
+        return CastIndex<HNSWIndex_Single<int8_t, float>>()->getDataByInternalId(id);
+    }
+};
+
+/* ---------------------------- Create index tests ---------------------------- */
+
+template <typename params_t>
+void INT8Test::create_index_test(params_t index_params) {
+    SetUp(index_params);
+
+    ASSERT_EQ(VecSimIndex_IndexSize(index), 0);
+
+    int8_t vector[dim];
+    this->GenerateVector(vector);
+    VecSimIndex_AddVector(index, vector, 0);
+
+    ASSERT_EQ(VecSimIndex_IndexSize(index), 1);
+    ASSERT_EQ(index->getDistanceFrom_Unsafe(0, vector), 0);
+
+    ASSERT_NO_FATAL_FAILURE(
+        CompareVectors(static_cast<const int8_t *>(this->GetDataByInternalId(0)), vector, dim));
+}

From de6769d1038b2bce17171b84fa3a3c6cde3be81b Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Sun, 15 Dec 2024 05:54:56 +0000
Subject: [PATCH 22/33] fix include unint_test_utils

---
 tests/unit/test_int8.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit/test_int8.cpp b/tests/unit/test_int8.cpp
index 14ad219d2..f9f3766af 100644
--- a/tests/unit/test_int8.cpp
+++ b/tests/unit/test_int8.cpp
@@ -3,7 +3,7 @@
 #include "VecSim/algorithms/hnsw/hnsw_single.h"
 // #include "VecSim/index_factories/hnsw_factory.h"
 #include "tests_utils.h"
-#include "test_utils.h"
+#include "unit_test_utils.h"
 // #include "VecSim/utils/serializer.h"
 // #include "mock_thread_pool.h"
 // #include "VecSim/query_result_definitions.h"

From f4598d346e271a4d5a1d8eca382993bb7b0f44b0 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Sun, 15 Dec 2024 12:43:51 +0000
Subject: [PATCH 23/33] add int 8 to index factories

remove normalize_func from VecSimIndexAbstract members

tests:
int8 unit test
create int8 indexes

unit_test_utils:
CalcIndexDataSize: casts VecSimIndex * to VecSimIndexAbstract<dist_t, data_t> * and calls VecSimIndexAbstract<dist_t, data_t>::getDataSize()

cast_to_tiered_index<data_t, dist_t>: takes VecSimIndex * ans casts to TieredHNSWIndex<data_t, dist_t> *
---
 .../hnsw/hnsw_tiered_tests_friends.h          |  1 +
 .../index_factories/brute_force_factory.cpp   |  5 +
 src/VecSim/index_factories/hnsw_factory.cpp   |  5 +
 src/VecSim/index_factories/tiered_factory.cpp |  2 +
 src/VecSim/vec_sim_index.h                    |  5 +-
 tests/unit/CMakeLists.txt                     |  2 +-
 tests/unit/test_common.cpp                    | 98 +++++++++++++++++++
 tests/unit/test_int8.cpp                      | 97 +++++++++++++++++-
 tests/unit/unit_test_utils.cpp                | 43 ++++++++
 tests/unit/unit_test_utils.h                  | 10 ++
 10 files changed, 261 insertions(+), 7 deletions(-)

diff --git a/src/VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h b/src/VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h
index b4cec5fef..f9e70ba68 100644
--- a/src/VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h
+++ b/src/VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h
@@ -57,6 +57,7 @@ INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTestBasic_switchDeleteModes_Test)
 
 friend class BF16TieredTest;
 friend class FP16TieredTest;
+friend class DataSizeTest_TestTieredHNSW_Test;
 
 INDEX_TEST_FRIEND_CLASS(BM_VecSimBasics)
 INDEX_TEST_FRIEND_CLASS(BM_VecSimCommon)
diff --git a/src/VecSim/index_factories/brute_force_factory.cpp b/src/VecSim/index_factories/brute_force_factory.cpp
index 7fea34140..191feede4 100644
--- a/src/VecSim/index_factories/brute_force_factory.cpp
+++ b/src/VecSim/index_factories/brute_force_factory.cpp
@@ -73,6 +73,11 @@ VecSimIndex *NewIndex(const BFParams *bfparams, const AbstractIndexInitParams &a
             abstractInitParams.allocator, bfparams->metric, bfparams->dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<float16, float>(bfparams, abstractInitParams,
                                                             indexComponents);
+    } else if (bfparams->type == VecSimType_INT8) {
+        IndexComponents<int8_t, float> indexComponents = CreateIndexComponents<int8_t, float>(
+            abstractInitParams.allocator, bfparams->metric, bfparams->dim, is_normalized);
+        return NewIndex_ChooseMultiOrSingle<int8_t, float>(bfparams, abstractInitParams,
+                                                           indexComponents);
     }
 
     // If we got here something is wrong.
diff --git a/src/VecSim/index_factories/hnsw_factory.cpp b/src/VecSim/index_factories/hnsw_factory.cpp
index 580dc3cae..8d05348cd 100644
--- a/src/VecSim/index_factories/hnsw_factory.cpp
+++ b/src/VecSim/index_factories/hnsw_factory.cpp
@@ -73,6 +73,11 @@ VecSimIndex *NewIndex(const VecSimParams *params, bool is_normalized) {
             abstractInitParams.allocator, hnswParams->metric, hnswParams->dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<float16, float>(hnswParams, abstractInitParams,
                                                             indexComponents);
+    } else if (hnswParams->type == VecSimType_INT8) {
+        IndexComponents<int8_t, float> indexComponents = CreateIndexComponents<int8_t, float>(
+            abstractInitParams.allocator, hnswParams->metric, hnswParams->dim, is_normalized);
+        return NewIndex_ChooseMultiOrSingle<int8_t, float>(hnswParams, abstractInitParams,
+                                                           indexComponents);
     }
 
     // If we got here something is wrong.
diff --git a/src/VecSim/index_factories/tiered_factory.cpp b/src/VecSim/index_factories/tiered_factory.cpp
index e85d90698..bbd683c50 100644
--- a/src/VecSim/index_factories/tiered_factory.cpp
+++ b/src/VecSim/index_factories/tiered_factory.cpp
@@ -99,6 +99,8 @@ VecSimIndex *NewIndex(const TieredIndexParams *params) {
         return TieredHNSWFactory::NewIndex<bfloat16, float>(params);
     } else if (type == VecSimType_FLOAT16) {
         return TieredHNSWFactory::NewIndex<float16, float>(params);
+    } else if (type == VecSimType_INT8) {
+        return TieredHNSWFactory::NewIndex<int8_t, float>(params);
     }
     return nullptr; // Invalid type.
 }
diff --git a/src/VecSim/vec_sim_index.h b/src/VecSim/vec_sim_index.h
index c9964cdd4..a5762c835 100644
--- a/src/VecSim/vec_sim_index.h
+++ b/src/VecSim/vec_sim_index.h
@@ -93,9 +93,6 @@ struct VecSimIndexAbstract : public VecSimIndexInterface {
         return info;
     }
 
-    spaces::normalizeVector_f<DataType>
-        normalize_func; // A pointer to a normalization function of specific type.
-
 public:
     /**
      * @brief Construct a new Vec Sim Index object
@@ -108,7 +105,7 @@ struct VecSimIndexAbstract : public VecSimIndexInterface {
           blockSize(params.blockSize ? params.blockSize : DEFAULT_BLOCK_SIZE),
           indexCalculator(components.indexCalculator), preprocessors(components.preprocessors),
           alignment(preprocessors->getAlignment()), lastMode(EMPTY_MODE), isMulti(params.multi),
-          logCallbackCtx(params.logCtx), normalize_func(spaces::GetNormalizeFunc<DataType>()) {
+          logCallbackCtx(params.logCtx) {
         assert(VecSimType_sizeof(vecType));
         assert(dataSize);
     }
diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt
index 82c831e73..f2a448493 100644
--- a/tests/unit/CMakeLists.txt
+++ b/tests/unit/CMakeLists.txt
@@ -39,7 +39,7 @@ add_executable(test_types test_types.cpp)
 add_executable(test_common ../utils/mock_thread_pool.cpp unit_test_utils.cpp test_common.cpp)
 add_executable(test_bf16 ../utils/mock_thread_pool.cpp test_bf16.cpp unit_test_utils.cpp)
 add_executable(test_fp16 ../utils/mock_thread_pool.cpp test_fp16.cpp unit_test_utils.cpp)
-add_executable(test_int8 test_int8.cpp unit_test_utils.cpp)
+add_executable(test_int8 ../utils/mock_thread_pool.cpp test_int8.cpp unit_test_utils.cpp)
 
 target_link_libraries(test_hnsw PUBLIC gtest_main VectorSimilarity)
 target_link_libraries(test_hnsw_parallel PUBLIC gtest_main VectorSimilarity)
diff --git a/tests/unit/test_common.cpp b/tests/unit/test_common.cpp
index bdfd6d9f2..3e39234cf 100644
--- a/tests/unit/test_common.cpp
+++ b/tests/unit/test_common.cpp
@@ -625,6 +625,104 @@ TEST(CommonAPITest, NormalizeFloat16) {
     ASSERT_NEAR(1.0, norm, 0.001);
 }
 
+// test datasize
+class DataSizeTest : public testing::TestWithParam<std::tuple<VecSimType, VecSimMetric>> {
+protected:
+    template <typename algo_params>
+    void test_datasize() {
+        size_t dim = 4;
+        VecSimType type = std::get<0>(GetParam());
+        VecSimMetric metric = std::get<1>(GetParam());
+        algo_params params = {.dim = dim, .metric = metric};
+        VecSimIndex *index = test_utils::CreateNewIndex(params, type);
+        size_t actual = test_utils::CalcIndexDataSize(index, type);
+        size_t expected = dim * VecSimType_sizeof(type);
+        if (type == VecSimType_INT8 && metric == VecSimMetric_Cosine) {
+            expected += sizeof(float);
+        }
+        ASSERT_EQ(actual, expected);
+    }
+};
+
+TEST_P(DataSizeTest, TestBF) { this->test_datasize<BFParams>(); }
+TEST_P(DataSizeTest, TestHNSW) { this->test_datasize<HNSWParams>(); }
+TEST_P(DataSizeTest, TestTieredHNSW) {
+    size_t dim = 4;
+    VecSimType type = std::get<0>(GetParam());
+    VecSimMetric metric = std::get<1>(GetParam());
+
+    HNSWParams hnsw_params = {.type = type, .dim = 4, .metric = metric};
+    VecSimParams params{.algo = VecSimAlgo_HNSWLIB,
+                        .algoParams = {.hnswParams = HNSWParams{hnsw_params}}};
+    auto mock_thread_pool = tieredIndexMock();
+    TieredIndexParams tiered_params = {.jobQueue = &mock_thread_pool.jobQ,
+                                       .jobQueueCtx = mock_thread_pool.ctx,
+                                       .submitCb = tieredIndexMock::submit_callback,
+                                       .flatBufferLimit = SIZE_MAX,
+                                       .primaryIndexParams = &params,
+                                       .specificParams = {TieredHNSWParams{.swapJobThreshold = 0}}};
+    VecSimIndex *index = TieredFactory::NewIndex(&tiered_params);
+    mock_thread_pool.ctx->index_strong_ref.reset(index);
+    // TODO:move death test to a separate test
+    // ASSERT_DEBUG_DEATH(test_utils::CalcIndexDataSize(index, type), "dynamic_cast failed");
+
+    auto verify_data_size = [&](const auto &tiered_index) {
+        auto hnsw_index = tiered_index->getHNSWIndex();
+        auto bf_index = tiered_index->getFlatBufferIndex();
+        size_t expected = dim * VecSimType_sizeof(type);
+        if (type == VecSimType_INT8 && metric == VecSimMetric_Cosine) {
+            expected += sizeof(float);
+        }
+        size_t actual_hnsw = hnsw_index->getDataSize();
+        ASSERT_EQ(actual_hnsw, expected);
+        size_t actual_bf = bf_index->getDataSize();
+        ASSERT_EQ(actual_bf, expected);
+    };
+
+    switch (type) {
+    case VecSimType_FLOAT32: {
+        auto tiered_index = test_utils::cast_to_tiered_index<float, float>(index);
+        verify_data_size(tiered_index);
+        break;
+    }
+    case VecSimType_FLOAT64: {
+        auto tiered_index = test_utils::cast_to_tiered_index<double, double>(index);
+        verify_data_size(tiered_index);
+        break;
+    }
+    case VecSimType_BFLOAT16: {
+        auto tiered_index = test_utils::cast_to_tiered_index<vecsim_types::bfloat16, float>(index);
+        verify_data_size(tiered_index);
+        break;
+    }
+    case VecSimType_FLOAT16: {
+        auto tiered_index = test_utils::cast_to_tiered_index<vecsim_types::float16, float>(index);
+        verify_data_size(tiered_index);
+        break;
+    }
+    case VecSimType_INT8: {
+        auto tiered_index = test_utils::cast_to_tiered_index<int8_t, float>(index);
+        verify_data_size(tiered_index);
+        break;
+    }
+    default:
+        FAIL() << "Unsupported data type";
+    }
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    CommonTest, DataSizeTest,
+    testing::Combine(testing::Values(VecSimType_FLOAT32, VecSimType_FLOAT64, VecSimType_INT8),
+                     testing::Values(VecSimMetric_L2, VecSimMetric_IP, VecSimMetric_Cosine)),
+    [](const testing::TestParamInfo<DataSizeTest::ParamType> &info) {
+        const char *type = VecSimType_ToString(std::get<0>(info.param));
+        const char *metric = VecSimMetric_ToString(std::get<1>(info.param));
+        std::string test_name(type);
+        return test_name + "_" + metric;
+    });
+
+// for each index type- >test with all metric types
+// for each metric type -> test with all data types
 class IndexCalculatorTest : public ::testing::Test {};
 
 namespace dummyCalcultor {
diff --git a/tests/unit/test_int8.cpp b/tests/unit/test_int8.cpp
index f9f3766af..a2d3904bb 100644
--- a/tests/unit/test_int8.cpp
+++ b/tests/unit/test_int8.cpp
@@ -5,9 +5,8 @@
 #include "tests_utils.h"
 #include "unit_test_utils.h"
 // #include "VecSim/utils/serializer.h"
-// #include "mock_thread_pool.h"
+#include "mock_thread_pool.h"
 // #include "VecSim/query_result_definitions.h"
-// #include "VecSim/types/float16.h"
 // #include "VecSim/vec_sim_debug.h"
 // #include "VecSim/spaces/L2/L2.h"
 
@@ -17,6 +16,14 @@ class INT8Test : public ::testing::Test {
         FAIL() << "INT8Test::SetUp(HNSWParams) this method should be overriden";
     }
 
+    virtual void SetUp(BFParams &params) {
+        FAIL() << "INT8Test::SetUp(BFParams) this method should be overriden";
+    }
+
+    virtual void SetUp(TieredIndexParams &tiered_params) {
+        FAIL() << "INT8Test::SetUp(TieredIndexParams) this method should be overriden";
+    }
+
     virtual void TearDown() { VecSimIndex_Free(index); }
 
     virtual const void *GetDataByInternalId(idType id) = 0;
@@ -26,6 +33,11 @@ class INT8Test : public ::testing::Test {
         return dynamic_cast<algo_t *>(index);
     }
 
+    template <typename algo_t>
+    algo_t *CastIndex(VecSimIndex *vecsim_index) {
+        return dynamic_cast<algo_t *>(vecsim_index);
+    }
+
     void GenerateVector(int8_t *out_vec) { test_utils::populate_int8_vec(out_vec, dim); }
 
     int GenerateAndAddVector(size_t id) {
@@ -54,6 +66,66 @@ class INT8HNSWTest : public INT8Test {
     }
 };
 
+class INT8BruteForceTest : public INT8Test {
+protected:
+    virtual void SetUp(BFParams &params) override {
+        params.type = VecSimType_INT8;
+        VecSimParams vecsim_params = CreateParams(params);
+        index = VecSimIndex_New(&vecsim_params);
+        dim = params.dim;
+    }
+
+    virtual const void *GetDataByInternalId(idType id) override {
+        return CastIndex<BruteForceIndex_Single<int8_t, float>>()->getDataByInternalId(id);
+    }
+};
+
+class INT8TieredTest : public INT8Test {
+protected:
+    TieredIndexParams generate_tiered_params(HNSWParams &hnsw_params, size_t swap_job_threshold = 0,
+                                             size_t flat_buffer_limit = SIZE_MAX) {
+        hnsw_params.type = VecSimType_INT8;
+        vecsim_hnsw_params = CreateParams(hnsw_params);
+        TieredIndexParams tiered_params = {
+            .jobQueue = &mock_thread_pool.jobQ,
+            .jobQueueCtx = mock_thread_pool.ctx,
+            .submitCb = tieredIndexMock::submit_callback,
+            .flatBufferLimit = flat_buffer_limit,
+            .primaryIndexParams = &vecsim_hnsw_params,
+            .specificParams = {TieredHNSWParams{.swapJobThreshold = swap_job_threshold}}};
+        return tiered_params;
+    }
+
+    virtual void SetUp(TieredIndexParams &tiered_params) override {
+        VecSimParams params = CreateParams(tiered_params);
+        index = VecSimIndex_New(&params);
+        dim = tiered_params.primaryIndexParams->algoParams.hnswParams.dim;
+
+        // Set the created tiered index in the index external context.
+        mock_thread_pool.ctx->index_strong_ref.reset(index);
+    }
+
+    virtual void SetUp(HNSWParams &hnsw_params) override {
+        TieredIndexParams tiered_params = generate_tiered_params(hnsw_params);
+        SetUp(tiered_params);
+    }
+
+    virtual void TearDown() override {}
+
+    virtual const void *GetDataByInternalId(idType id) override {
+        return CastIndex<BruteForceIndex<int8_t, float>>(CastToBruteForce())
+            ->getDataByInternalId(id);
+    }
+
+    VecSimIndexAbstract<int8_t, float> *CastToBruteForce() {
+        auto tiered_index = dynamic_cast<TieredHNSWIndex<int8_t, float> *>(index);
+        return tiered_index->getFlatBufferIndex();
+    }
+
+    VecSimParams vecsim_hnsw_params;
+    tieredIndexMock mock_thread_pool;
+};
+
 /* ---------------------------- Create index tests ---------------------------- */
 
 template <typename params_t>
@@ -72,3 +144,24 @@ void INT8Test::create_index_test(params_t index_params) {
     ASSERT_NO_FATAL_FAILURE(
         CompareVectors(static_cast<const int8_t *>(this->GetDataByInternalId(0)), vector, dim));
 }
+
+TEST_F(INT8HNSWTest, createIndex) {
+    HNSWParams params = {.dim = 40, .M = 16, .efConstruction = 200};
+    EXPECT_NO_FATAL_FAILURE(create_index_test(params));
+    ASSERT_EQ(index->basicInfo().type, VecSimType_INT8);
+    ASSERT_EQ(index->basicInfo().algo, VecSimAlgo_HNSWLIB);
+}
+
+TEST_F(INT8BruteForceTest, createIndex) {
+    BFParams params = {.dim = 40};
+    EXPECT_NO_FATAL_FAILURE(create_index_test(params));
+    ASSERT_EQ(index->basicInfo().type, VecSimType_INT8);
+    ASSERT_EQ(index->basicInfo().algo, VecSimAlgo_BF);
+}
+
+TEST_F(INT8TieredTest, createIndex) {
+    HNSWParams params = {.dim = 40, .M = 16, .efConstruction = 200};
+    EXPECT_NO_FATAL_FAILURE(create_index_test(params));
+    ASSERT_EQ(index->basicInfo().type, VecSimType_INT8);
+    ASSERT_EQ(index->basicInfo().isTiered, true);
+}
diff --git a/tests/unit/unit_test_utils.cpp b/tests/unit/unit_test_utils.cpp
index 89973d19d..8d888cff0 100644
--- a/tests/unit/unit_test_utils.cpp
+++ b/tests/unit/unit_test_utils.cpp
@@ -376,3 +376,46 @@ size_t getLabelsLookupNodeSize() {
     size_t memory_after = allocator->getAllocationSize();
     return memory_after - memory_before;
 }
+namespace test_utils {
+size_t CalcIndexDataSize(VecSimIndex *index, VecSimType data_type) {
+    switch (data_type) {
+    case VecSimType_FLOAT32: {
+        VecSimIndexAbstract<float, float> *abs_index =
+            dynamic_cast<VecSimIndexAbstract<float, float> *>(index);
+        assert(abs_index &&
+               "dynamic_cast failed: can't convert index to VecSimIndexAbstract<float, float>");
+        return abs_index->getDataSize();
+    }
+    case VecSimType_FLOAT64: {
+        VecSimIndexAbstract<double, double> *abs_index =
+            dynamic_cast<VecSimIndexAbstract<double, double> *>(index);
+        assert(abs_index &&
+               "dynamic_cast failed: can't convert index to VecSimIndexAbstract<double, double>");
+        return abs_index->getDataSize();
+    }
+    case VecSimType_BFLOAT16: {
+        VecSimIndexAbstract<vecsim_types::bfloat16, float> *abs_index =
+            dynamic_cast<VecSimIndexAbstract<vecsim_types::bfloat16, float> *>(index);
+        assert(abs_index && "dynamic_cast failed: can't convert index to "
+                            "VecSimIndexAbstract<vecsim_types::bfloat16, float>");
+        return abs_index->getDataSize();
+    }
+    case VecSimType_FLOAT16: {
+        VecSimIndexAbstract<vecsim_types::float16, float> *abs_index =
+            dynamic_cast<VecSimIndexAbstract<vecsim_types::float16, float> *>(index);
+        assert(abs_index && "dynamic_cast failed: can't convert index to "
+                            "VecSimIndexAbstract<vecsim_types::float16, float>");
+        return abs_index->getDataSize();
+    }
+    case VecSimType_INT8: {
+        VecSimIndexAbstract<int8_t, float> *abs_index =
+            dynamic_cast<VecSimIndexAbstract<int8_t, float> *>(index);
+        assert(abs_index &&
+               "dynamic_cast failed: can't convert index to VecSimIndexAbstract<int8_t, float>");
+        return abs_index->getDataSize();
+    }
+    default:
+        return 0;
+    }
+}
+} // namespace test_utils
diff --git a/tests/unit/unit_test_utils.h b/tests/unit/unit_test_utils.h
index 54478cfd7..db677c85a 100644
--- a/tests/unit/unit_test_utils.h
+++ b/tests/unit/unit_test_utils.h
@@ -162,6 +162,16 @@ inline double GetInfVal(VecSimType type) {
         throw std::invalid_argument("This type is not supported");
     }
 }
+// TODO: Move all test_utils to this namespace
+namespace test_utils {
+size_t CalcIndexDataSize(VecSimIndex *index, VecSimType data_type);
+
+template <typename data_t, typename dist_t>
+TieredHNSWIndex<data_t, dist_t> *cast_to_tiered_index(VecSimIndex *index) {
+    return dynamic_cast<TieredHNSWIndex<data_t, dist_t> *>(index);
+}
+
+} // namespace test_utils
 
 // Test a specific exception type is thrown and prints the right message.
 #define ASSERT_EXCEPTION_MESSAGE(VALUE, EXCEPTION_TYPE, MESSAGE)                                   \

From d5b7d276311c03e431b5d49cdb28f85227b4fd22 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Mon, 16 Dec 2024 07:41:34 +0000
Subject: [PATCH 24/33] add EstimateInitialSize for int8 to indexes factories

2 new function to test_utils::
CreateTieredParams
CreateNewTieredHNSWIndex

add test_initial_size_estimation to CommonTypeMetricTests
use CommonTypeMetricTieredTests for tiered tests
---
 .../hnsw/hnsw_tiered_tests_friends.h          |   2 +-
 .../index_factories/brute_force_factory.cpp   |   5 +
 src/VecSim/index_factories/hnsw_factory.cpp   |   5 +
 src/VecSim/index_factories/tiered_factory.cpp |   4 +
 tests/unit/CMakeLists.txt                     |   8 +-
 tests/unit/test_common.cpp                    | 126 +++++++++++++-----
 tests/unit/unit_test_utils.cpp                |  23 ++++
 tests/unit/unit_test_utils.h                  |   6 +
 8 files changed, 140 insertions(+), 39 deletions(-)

diff --git a/src/VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h b/src/VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h
index f9e70ba68..1ba646fb3 100644
--- a/src/VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h
+++ b/src/VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h
@@ -57,7 +57,7 @@ INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTestBasic_switchDeleteModes_Test)
 
 friend class BF16TieredTest;
 friend class FP16TieredTest;
-friend class DataSizeTest_TestTieredHNSW_Test;
+friend class CommonTypeMetricTieredTests_TestDataSizeTieredHNSW_Test;
 
 INDEX_TEST_FRIEND_CLASS(BM_VecSimBasics)
 INDEX_TEST_FRIEND_CLASS(BM_VecSimCommon)
diff --git a/src/VecSim/index_factories/brute_force_factory.cpp b/src/VecSim/index_factories/brute_force_factory.cpp
index 191feede4..a7afb9c88 100644
--- a/src/VecSim/index_factories/brute_force_factory.cpp
+++ b/src/VecSim/index_factories/brute_force_factory.cpp
@@ -117,6 +117,11 @@ size_t EstimateInitialSize(const BFParams *params, bool is_normalized) {
     } else if (params->type == VecSimType_FLOAT16) {
         est += EstimateComponentsMemory<float16, float>(params->metric, is_normalized);
         est += EstimateInitialSize_ChooseMultiOrSingle<float16, float>(params->multi);
+    } else if (params->type == VecSimType_INT8) {
+        est += EstimateComponentsMemory<int8_t, float>(params->metric, is_normalized);
+        est += EstimateInitialSize_ChooseMultiOrSingle<int8_t, float>(params->multi);
+    } else {
+        throw std::invalid_argument("Invalid params->type");
     }
 
     est += sizeof(DataBlocksContainer) + allocations_overhead;
diff --git a/src/VecSim/index_factories/hnsw_factory.cpp b/src/VecSim/index_factories/hnsw_factory.cpp
index 8d05348cd..de8672133 100644
--- a/src/VecSim/index_factories/hnsw_factory.cpp
+++ b/src/VecSim/index_factories/hnsw_factory.cpp
@@ -114,6 +114,11 @@ size_t EstimateInitialSize(const HNSWParams *params, bool is_normalized) {
     } else if (params->type == VecSimType_FLOAT16) {
         est += EstimateComponentsMemory<float16, float>(params->metric, is_normalized);
         est += EstimateInitialSize_ChooseMultiOrSingle<float16, float>(params->multi);
+    } else if (params->type == VecSimType_INT8) {
+        est += EstimateComponentsMemory<int8_t, float>(params->metric, is_normalized);
+        est += EstimateInitialSize_ChooseMultiOrSingle<int8_t, float>(params->multi);
+    } else {
+        throw std::invalid_argument("Invalid params->type");
     }
     return est;
 }
diff --git a/src/VecSim/index_factories/tiered_factory.cpp b/src/VecSim/index_factories/tiered_factory.cpp
index bbd683c50..930630692 100644
--- a/src/VecSim/index_factories/tiered_factory.cpp
+++ b/src/VecSim/index_factories/tiered_factory.cpp
@@ -83,6 +83,10 @@ inline size_t EstimateInitialSize(const TieredIndexParams *params) {
         est += sizeof(TieredHNSWIndex<bfloat16, float>);
     } else if (hnsw_params.type == VecSimType_FLOAT16) {
         est += sizeof(TieredHNSWIndex<float16, float>);
+    } else if (hnsw_params.type == VecSimType_INT8) {
+        est += sizeof(TieredHNSWIndex<int8_t, float>);
+    } else {
+        throw std::invalid_argument("Invalid hnsw_params.type");
     }
 
     return est;
diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt
index f2a448493..2320b08ff 100644
--- a/tests/unit/CMakeLists.txt
+++ b/tests/unit/CMakeLists.txt
@@ -31,12 +31,12 @@ endif()
 include(${root}/cmake/x86_64InstructionFlags.cmake)
 
 add_executable(test_hnsw ../utils/mock_thread_pool.cpp test_hnsw.cpp test_hnsw_multi.cpp test_hnsw_tiered.cpp unit_test_utils.cpp)
-add_executable(test_hnsw_parallel test_hnsw_parallel.cpp unit_test_utils.cpp)
-add_executable(test_bruteforce test_bruteforce.cpp test_bruteforce_multi.cpp unit_test_utils.cpp)
-add_executable(test_allocator test_allocator.cpp unit_test_utils.cpp)
+add_executable(test_hnsw_parallel test_hnsw_parallel.cpp ../utils/mock_thread_pool.cpp unit_test_utils.cpp)
+add_executable(test_bruteforce test_bruteforce.cpp test_bruteforce_multi.cpp ../utils/mock_thread_pool.cpp unit_test_utils.cpp)
+add_executable(test_allocator test_allocator.cpp ../utils/mock_thread_pool.cpp unit_test_utils.cpp)
 add_executable(test_spaces test_spaces.cpp test_normalize.cpp)
 add_executable(test_types test_types.cpp)
-add_executable(test_common ../utils/mock_thread_pool.cpp unit_test_utils.cpp test_common.cpp)
+add_executable(test_common ../utils/mock_thread_pool.cpp test_common.cpp unit_test_utils.cpp )
 add_executable(test_bf16 ../utils/mock_thread_pool.cpp test_bf16.cpp unit_test_utils.cpp)
 add_executable(test_fp16 ../utils/mock_thread_pool.cpp test_fp16.cpp unit_test_utils.cpp)
 add_executable(test_int8 ../utils/mock_thread_pool.cpp test_int8.cpp unit_test_utils.cpp)
diff --git a/tests/unit/test_common.cpp b/tests/unit/test_common.cpp
index 3e39234cf..607c0bb7d 100644
--- a/tests/unit/test_common.cpp
+++ b/tests/unit/test_common.cpp
@@ -625,44 +625,72 @@ TEST(CommonAPITest, NormalizeFloat16) {
     ASSERT_NEAR(1.0, norm, 0.001);
 }
 
-// test datasize
-class DataSizeTest : public testing::TestWithParam<std::tuple<VecSimType, VecSimMetric>> {
+class CommonTypeMetricTests : public testing::TestWithParam<std::tuple<VecSimType, VecSimMetric>> {
 protected:
     template <typename algo_params>
-    void test_datasize() {
-        size_t dim = 4;
-        VecSimType type = std::get<0>(GetParam());
-        VecSimMetric metric = std::get<1>(GetParam());
-        algo_params params = {.dim = dim, .metric = metric};
-        VecSimIndex *index = test_utils::CreateNewIndex(params, type);
-        size_t actual = test_utils::CalcIndexDataSize(index, type);
-        size_t expected = dim * VecSimType_sizeof(type);
-        if (type == VecSimType_INT8 && metric == VecSimMetric_Cosine) {
-            expected += sizeof(float);
-        }
-        ASSERT_EQ(actual, expected);
+    void test_datasize();
+
+    template <typename algo_params>
+    void test_initial_size_estimation();
+
+    virtual void TearDown() { VecSimIndex_Free(index); }
+
+    VecSimIndex *index;
+};
+
+template <typename algo_params>
+void CommonTypeMetricTests::test_datasize() {
+    size_t dim = 4;
+    VecSimType type = std::get<0>(GetParam());
+    VecSimMetric metric = std::get<1>(GetParam());
+    algo_params params = {.dim = dim, .metric = metric};
+    this->index = test_utils::CreateNewIndex(params, type);
+    size_t actual = test_utils::CalcIndexDataSize(index, type);
+    size_t expected = dim * VecSimType_sizeof(type);
+    if (type == VecSimType_INT8 && metric == VecSimMetric_Cosine) {
+        expected += sizeof(float);
     }
+    ASSERT_EQ(actual, expected);
+}
+
+TEST_P(CommonTypeMetricTests, TestDataSizeBF) { this->test_datasize<BFParams>(); }
+TEST_P(CommonTypeMetricTests, TestDataSizeHNSW) { this->test_datasize<HNSWParams>(); }
+
+template <typename algo_params>
+void CommonTypeMetricTests::test_initial_size_estimation() {
+    size_t dim = 4;
+    VecSimType type = std::get<0>(GetParam());
+    VecSimMetric metric = std::get<1>(GetParam());
+    algo_params params = {.dim = dim, .metric = metric};
+    this->index = test_utils::CreateNewIndex(params, type);
+
+    size_t estimation = EstimateInitialSize(params);
+    size_t actual = index->getAllocationSize();
+
+    ASSERT_EQ(estimation, actual);
+}
+
+TEST_P(CommonTypeMetricTests, TestInitialSizeEstimationBF) {
+    this->test_initial_size_estimation<BFParams>();
+}
+TEST_P(CommonTypeMetricTests, TestInitialSizeEstimationHNSW) {
+    this->test_initial_size_estimation<HNSWParams>();
+}
+
+class CommonTypeMetricTieredTests : public CommonTypeMetricTests {
+protected:
+    virtual void TearDown() override {}
+
+    tieredIndexMock mock_thread_pool;
 };
 
-TEST_P(DataSizeTest, TestBF) { this->test_datasize<BFParams>(); }
-TEST_P(DataSizeTest, TestHNSW) { this->test_datasize<HNSWParams>(); }
-TEST_P(DataSizeTest, TestTieredHNSW) {
+TEST_P(CommonTypeMetricTieredTests, TestDataSizeTieredHNSW) {
     size_t dim = 4;
     VecSimType type = std::get<0>(GetParam());
     VecSimMetric metric = std::get<1>(GetParam());
 
     HNSWParams hnsw_params = {.type = type, .dim = 4, .metric = metric};
-    VecSimParams params{.algo = VecSimAlgo_HNSWLIB,
-                        .algoParams = {.hnswParams = HNSWParams{hnsw_params}}};
-    auto mock_thread_pool = tieredIndexMock();
-    TieredIndexParams tiered_params = {.jobQueue = &mock_thread_pool.jobQ,
-                                       .jobQueueCtx = mock_thread_pool.ctx,
-                                       .submitCb = tieredIndexMock::submit_callback,
-                                       .flatBufferLimit = SIZE_MAX,
-                                       .primaryIndexParams = &params,
-                                       .specificParams = {TieredHNSWParams{.swapJobThreshold = 0}}};
-    VecSimIndex *index = TieredFactory::NewIndex(&tiered_params);
-    mock_thread_pool.ctx->index_strong_ref.reset(index);
+    VecSimIndex *index = test_utils::CreateNewTieredHNSWIndex(hnsw_params, this->mock_thread_pool);
     // TODO:move death test to a separate test
     // ASSERT_DEBUG_DEATH(test_utils::CalcIndexDataSize(index, type), "dynamic_cast failed");
 
@@ -710,21 +738,51 @@ TEST_P(DataSizeTest, TestTieredHNSW) {
     }
 }
 
+TEST_P(CommonTypeMetricTieredTests, TestInitialSizeEstimationTieredHNSW) {
+    size_t dim = 4;
+    VecSimType type = std::get<0>(GetParam());
+    VecSimMetric metric = std::get<1>(GetParam());
+    HNSWParams hnsw_params = {.type = type, .dim = dim, .metric = metric};
+    VecSimParams vecsim_hnsw_params = CreateParams(hnsw_params);
+    TieredIndexParams tiered_params =
+        test_utils::CreateTieredParams(vecsim_hnsw_params, this->mock_thread_pool);
+    VecSimParams params = CreateParams(tiered_params);
+    auto *index = VecSimIndex_New(&params);
+    mock_thread_pool.ctx->index_strong_ref.reset(index);
+
+    size_t estimation = VecSimIndex_EstimateInitialSize(&params);
+    size_t actual = index->getAllocationSize();
+
+    ASSERT_EQ(estimation, actual);
+}
+
+constexpr VecSimType vecsim_datatypes[] = {VecSimType_FLOAT32, VecSimType_FLOAT64,
+                                           VecSimType_BFLOAT16, VecSimType_FLOAT16,
+                                           VecSimType_INT8};
+
+INSTANTIATE_TEST_SUITE_P(CommonTest, CommonTypeMetricTests,
+                         testing::Combine(testing::ValuesIn(vecsim_datatypes),
+                                          testing::Values(VecSimMetric_L2, VecSimMetric_IP,
+                                                          VecSimMetric_Cosine)),
+                         [](const testing::TestParamInfo<CommonTypeMetricTests::ParamType> &info) {
+                             const char *type = VecSimType_ToString(std::get<0>(info.param));
+                             const char *metric = VecSimMetric_ToString(std::get<1>(info.param));
+                             std::string test_name(type);
+                             return test_name + "_" + metric;
+                         });
+
 INSTANTIATE_TEST_SUITE_P(
-    CommonTest, DataSizeTest,
-    testing::Combine(testing::Values(VecSimType_FLOAT32, VecSimType_FLOAT64, VecSimType_INT8),
+    CommonTieredTest, CommonTypeMetricTieredTests,
+    testing::Combine(testing::ValuesIn(vecsim_datatypes),
                      testing::Values(VecSimMetric_L2, VecSimMetric_IP, VecSimMetric_Cosine)),
-    [](const testing::TestParamInfo<DataSizeTest::ParamType> &info) {
+    [](const testing::TestParamInfo<CommonTypeMetricTieredTests::ParamType> &info) {
         const char *type = VecSimType_ToString(std::get<0>(info.param));
         const char *metric = VecSimMetric_ToString(std::get<1>(info.param));
         std::string test_name(type);
         return test_name + "_" + metric;
     });
 
-// for each index type- >test with all metric types
-// for each metric type -> test with all data types
 class IndexCalculatorTest : public ::testing::Test {};
-
 namespace dummyCalcultor {
 
 using DummyType = int;
diff --git a/tests/unit/unit_test_utils.cpp b/tests/unit/unit_test_utils.cpp
index 8d888cff0..196a7caa9 100644
--- a/tests/unit/unit_test_utils.cpp
+++ b/tests/unit/unit_test_utils.cpp
@@ -418,4 +418,27 @@ size_t CalcIndexDataSize(VecSimIndex *index, VecSimType data_type) {
         return 0;
     }
 }
+
+TieredIndexParams CreateTieredParams(VecSimParams &primary_params,
+                                     tieredIndexMock &mock_thread_pool) {
+    TieredIndexParams tiered_params = {.jobQueue = &mock_thread_pool.jobQ,
+                                       .jobQueueCtx = mock_thread_pool.ctx,
+                                       .submitCb = tieredIndexMock::submit_callback,
+                                       .flatBufferLimit = SIZE_MAX,
+                                       .primaryIndexParams = &primary_params,
+                                       .specificParams = {TieredHNSWParams{.swapJobThreshold = 0}}};
+
+    return tiered_params;
+}
+
+VecSimIndex *CreateNewTieredHNSWIndex(const HNSWParams &hnsw_params,
+                                      tieredIndexMock &mock_thread_pool) {
+    VecSimParams primary_params = CreateParams(hnsw_params);
+    auto tiered_params = CreateTieredParams(primary_params, mock_thread_pool);
+    VecSimParams params = CreateParams(tiered_params);
+    VecSimIndex *index = VecSimIndex_New(&params);
+    mock_thread_pool.ctx->index_strong_ref.reset(index);
+
+    return index;
+}
 } // namespace test_utils
diff --git a/tests/unit/unit_test_utils.h b/tests/unit/unit_test_utils.h
index db677c85a..4e653a469 100644
--- a/tests/unit/unit_test_utils.h
+++ b/tests/unit/unit_test_utils.h
@@ -13,6 +13,7 @@
 
 #include "VecSim/vec_sim.h"
 #include "VecSim/algorithms/hnsw/hnsw_tiered.h"
+#include "mock_thread_pool.h"
 #include "gtest/gtest.h"
 
 // IndexType is used to define indices unit tests
@@ -99,6 +100,11 @@ inline VecSimIndex *CreateNewIndex(IndexParams &index_params, VecSimType type,
     return VecSimIndex_New(&params);
 }
 
+TieredIndexParams CreateTieredParams(VecSimParams &primary_params,
+                                     tieredIndexMock &mock_thread_pool);
+VecSimIndex *CreateNewTieredHNSWIndex(const HNSWParams &hnsw_params,
+                                      tieredIndexMock &mock_thread_pool);
+
 extern VecsimQueryType query_types[4];
 
 } // namespace test_utils

From ef9beb69e2a973ddd444d13d7bbd3b43991d83c8 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Tue, 17 Dec 2024 15:54:07 +0000
Subject: [PATCH 25/33] add int8 unit tests

add int8 to
* VecSimDebug_GetElementNeighborsInHNSWGraph
* VecSim_Normalize
*HNSW NewIndex from file
---
 .../hnsw/hnsw_tiered_tests_friends.h          |   1 +
 src/VecSim/index_factories/hnsw_factory.cpp   |   5 +
 src/VecSim/vec_sim.cpp                        |   3 +
 src/VecSim/vec_sim_debug.cpp                  |   6 +
 tests/unit/test_common.cpp                    |  16 +
 tests/unit/test_int8.cpp                      | 849 +++++++++++++++++-
 tests/unit/unit_test_utils.cpp                |   1 +
 7 files changed, 871 insertions(+), 10 deletions(-)

diff --git a/src/VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h b/src/VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h
index 1ba646fb3..6a37fe48a 100644
--- a/src/VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h
+++ b/src/VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h
@@ -57,6 +57,7 @@ INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTestBasic_switchDeleteModes_Test)
 
 friend class BF16TieredTest;
 friend class FP16TieredTest;
+friend class INT8TieredTest;
 friend class CommonTypeMetricTieredTests_TestDataSizeTieredHNSW_Test;
 
 INDEX_TEST_FRIEND_CLASS(BM_VecSimBasics)
diff --git a/src/VecSim/index_factories/hnsw_factory.cpp b/src/VecSim/index_factories/hnsw_factory.cpp
index de8672133..819899363 100644
--- a/src/VecSim/index_factories/hnsw_factory.cpp
+++ b/src/VecSim/index_factories/hnsw_factory.cpp
@@ -229,6 +229,11 @@ VecSimIndex *NewIndex(const std::string &location, bool is_normalized) {
             abstractInitParams.allocator, params.metric, abstractInitParams.dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<float16, float>(input, &params, abstractInitParams,
                                                             indexComponents, version);
+    } else if (params.type == VecSimType_INT8) {
+        IndexComponents<int8_t, float> indexComponents = CreateIndexComponents<int8_t, float>(
+            abstractInitParams.allocator, params.metric, abstractInitParams.dim, is_normalized);
+        return NewIndex_ChooseMultiOrSingle<int8_t, float>(input, &params, abstractInitParams,
+                                                           indexComponents, version);
     } else {
         auto bad_name = VecSimType_ToString(params.type);
         if (bad_name == nullptr) {
diff --git a/src/VecSim/vec_sim.cpp b/src/VecSim/vec_sim.cpp
index 56912b07e..1a6d241fb 100644
--- a/src/VecSim/vec_sim.cpp
+++ b/src/VecSim/vec_sim.cpp
@@ -138,6 +138,9 @@ extern "C" void VecSim_Normalize(void *blob, size_t dim, VecSimType type) {
         spaces::GetNormalizeFunc<vecsim_types::bfloat16>()(blob, dim);
     } else if (type == VecSimType_FLOAT16) {
         spaces::GetNormalizeFunc<vecsim_types::float16>()(blob, dim);
+    } else if (type == VecSimType_INT8) {
+        // assuming blob is large enough to store the norm at the end of the vector
+        spaces::GetNormalizeFunc<int8_t>()(blob, dim);
     }
 }
 
diff --git a/src/VecSim/vec_sim_debug.cpp b/src/VecSim/vec_sim_debug.cpp
index 98cc05c91..395a3a9e0 100644
--- a/src/VecSim/vec_sim_debug.cpp
+++ b/src/VecSim/vec_sim_debug.cpp
@@ -32,6 +32,9 @@ extern "C" int VecSimDebug_GetElementNeighborsInHNSWGraph(VecSimIndex *index, si
         } else if (info.type == VecSimType_FLOAT16) {
             return dynamic_cast<HNSWIndex<vecsim_types::float16, float> *>(index)
                 ->getHNSWElementNeighbors(label, neighborsData);
+        } else if (info.type == VecSimType_INT8) {
+            return dynamic_cast<HNSWIndex<int8_t, float> *>(index)->getHNSWElementNeighbors(
+                label, neighborsData);
         } else {
             assert(false && "Invalid data type");
         }
@@ -48,6 +51,9 @@ extern "C" int VecSimDebug_GetElementNeighborsInHNSWGraph(VecSimIndex *index, si
         } else if (info.type == VecSimType_FLOAT16) {
             return dynamic_cast<TieredHNSWIndex<vecsim_types::float16, float> *>(index)
                 ->getHNSWElementNeighbors(label, neighborsData);
+        } else if (info.type == VecSimType_INT8) {
+            return dynamic_cast<TieredHNSWIndex<int8_t, float> *>(index)->getHNSWElementNeighbors(
+                label, neighborsData);
         } else {
             assert(false && "Invalid data type");
         }
diff --git a/tests/unit/test_common.cpp b/tests/unit/test_common.cpp
index 607c0bb7d..96be49345 100644
--- a/tests/unit/test_common.cpp
+++ b/tests/unit/test_common.cpp
@@ -15,10 +15,12 @@
 #include "VecSim/algorithms/hnsw/hnsw.h"
 #include "VecSim/index_factories/hnsw_factory.h"
 #include "mock_thread_pool.h"
+#include "tests_utils.h"
 #include "VecSim/index_factories/tiered_factory.h"
 #include "VecSim/spaces/spaces.h"
 #include "VecSim/types/bfloat16.h"
 #include "VecSim/types/float16.h"
+#include "VecSim/spaces/normalize/compute_norm.h"
 
 #include <cstdlib>
 #include <limits>
@@ -625,6 +627,20 @@ TEST(CommonAPITest, NormalizeFloat16) {
     ASSERT_NEAR(1.0, norm, 0.001);
 }
 
+TEST(CommonAPITest, NormalizeInt8) {
+    size_t dim = 20;
+    int8_t v[dim + sizeof(float)];
+
+    test_utils::populate_int8_vec(v, dim);
+
+    VecSim_Normalize(v, dim, VecSimType_INT8);
+
+    float res_norm = *(reinterpret_cast<float *>(v + dim));
+    float expected_norm = spaces::IntegralType_ComputeNorm<int8_t>(v, dim);
+
+    ASSERT_FLOAT_EQ(res_norm, expected_norm);
+}
+
 class CommonTypeMetricTests : public testing::TestWithParam<std::tuple<VecSimType, VecSimMetric>> {
 protected:
     template <typename algo_params>
diff --git a/tests/unit/test_int8.cpp b/tests/unit/test_int8.cpp
index a2d3904bb..27da6b6fd 100644
--- a/tests/unit/test_int8.cpp
+++ b/tests/unit/test_int8.cpp
@@ -1,14 +1,13 @@
 #include "gtest/gtest.h"
 #include "VecSim/vec_sim.h"
 #include "VecSim/algorithms/hnsw/hnsw_single.h"
-// #include "VecSim/index_factories/hnsw_factory.h"
 #include "tests_utils.h"
 #include "unit_test_utils.h"
-// #include "VecSim/utils/serializer.h"
 #include "mock_thread_pool.h"
-// #include "VecSim/query_result_definitions.h"
-// #include "VecSim/vec_sim_debug.h"
-// #include "VecSim/spaces/L2/L2.h"
+#include "VecSim/spaces/normalize/compute_norm.h"
+#include "VecSim/vec_sim_debug.h"
+#include "VecSim/spaces/L2/L2.h"
+#include "VecSim/spaces/IP/IP.h"
 
 class INT8Test : public ::testing::Test {
 protected:
@@ -38,15 +37,59 @@ class INT8Test : public ::testing::Test {
         return dynamic_cast<algo_t *>(vecsim_index);
     }
 
-    void GenerateVector(int8_t *out_vec) { test_utils::populate_int8_vec(out_vec, dim); }
+    virtual HNSWIndex<int8_t, float> *CastToHNSW() { return CastIndex<HNSWIndex<int8_t, float>>(); }
 
-    int GenerateAndAddVector(size_t id) {
+    void PopulateRandomVector(int8_t *out_vec) { test_utils::populate_int8_vec(out_vec, dim); }
+    int PopulateRandomAndAddVector(size_t id, int8_t *out_vec) {
+        PopulateRandomVector(out_vec);
+        return VecSimIndex_AddVector(index, out_vec, id);
+    }
+
+    virtual int GenerateAndAddVector(size_t id, int8_t value = 1) {
+        // use unit_test_utils.h
+        return ::GenerateAndAddVector<int8_t>(index, dim, id, value);
+    }
+
+    void GenerateVector(int8_t *out_vec, int8_t value) {
+        // use unit_test_utils.h
+        return ::GenerateVector<int8_t>(out_vec, this->dim, value);
+    }
+
+    virtual int GenerateRandomAndAddVector(size_t id) {
         int8_t v[dim];
-        GenerateVector(v);
+        PopulateRandomVector(v);
         return VecSimIndex_AddVector(index, v, id);
     }
+
+    size_t GetValidVectorsCount() {
+        VecSimIndexInfo info = VecSimIndex_Info(index);
+        return info.commonInfo.indexLabelCount;
+    }
+
     template <typename params_t>
     void create_index_test(params_t index_params);
+    template <typename params_t>
+    void element_size_test(params_t index_params);
+    template <typename params_t>
+    void search_by_id_test(params_t index_params);
+    template <typename params_t>
+    void search_by_score_test(params_t index_params);
+    template <typename params_t>
+    void metrics_test(params_t index_params);
+    template <typename params_t>
+    void search_empty_index_test(params_t index_params);
+    template <typename params_t>
+    void test_override(params_t index_params);
+    template <typename params_t>
+    void test_range_query(params_t index_params);
+    template <typename params_t>
+    void test_batch_iterator_basic(params_t index_params);
+    template <typename params_t>
+    VecSimIndexInfo test_info(params_t index_params);
+    template <typename params_t>
+    void test_info_iterator(VecSimMetric metric);
+    template <typename params_t>
+    void get_element_neighbors(params_t index_params);
 
     VecSimIndex *index;
     size_t dim;
@@ -64,6 +107,17 @@ class INT8HNSWTest : public INT8Test {
     virtual const void *GetDataByInternalId(idType id) override {
         return CastIndex<HNSWIndex_Single<int8_t, float>>()->getDataByInternalId(id);
     }
+
+    virtual HNSWIndex<int8_t, float> *CastToHNSW() override {
+        return CastIndex<HNSWIndex<int8_t, float>>(index);
+    }
+
+    HNSWIndex<int8_t, float> *CastToHNSW(VecSimIndex *new_index) {
+        return CastIndex<HNSWIndex<int8_t, float>>(new_index);
+    }
+
+    void test_info(bool is_multi);
+    void test_serialization(bool is_multi);
 };
 
 class INT8BruteForceTest : public INT8Test {
@@ -78,11 +132,18 @@ class INT8BruteForceTest : public INT8Test {
     virtual const void *GetDataByInternalId(idType id) override {
         return CastIndex<BruteForceIndex_Single<int8_t, float>>()->getDataByInternalId(id);
     }
+
+    virtual HNSWIndex<int8_t, float> *CastToHNSW() override {
+        ADD_FAILURE() << "INT8BruteForceTest::CastToHNSW() this method should not be called";
+        return nullptr;
+    }
+
+    void test_info(bool is_multi);
 };
 
 class INT8TieredTest : public INT8Test {
 protected:
-    TieredIndexParams generate_tiered_params(HNSWParams &hnsw_params, size_t swap_job_threshold = 0,
+    TieredIndexParams generate_tiered_params(HNSWParams &hnsw_params, size_t swap_job_threshold = 1,
                                              size_t flat_buffer_limit = SIZE_MAX) {
         hnsw_params.type = VecSimType_INT8;
         vecsim_hnsw_params = CreateParams(hnsw_params);
@@ -117,11 +178,38 @@ class INT8TieredTest : public INT8Test {
             ->getDataByInternalId(id);
     }
 
+    virtual HNSWIndex<int8_t, float> *CastToHNSW() override {
+        auto tiered_index = dynamic_cast<TieredHNSWIndex<int8_t, float> *>(index);
+        return tiered_index->getHNSWIndex();
+    }
+
+    virtual HNSWIndex_Single<int8_t, float> *CastToHNSWSingle() {
+        return CastIndex<HNSWIndex_Single<int8_t, float>>(CastToHNSW());
+    }
+
     VecSimIndexAbstract<int8_t, float> *CastToBruteForce() {
         auto tiered_index = dynamic_cast<TieredHNSWIndex<int8_t, float> *>(index);
         return tiered_index->getFlatBufferIndex();
     }
 
+    int GenerateRandomAndAddVector(size_t id) override {
+        int8_t v[dim];
+        PopulateRandomVector(v);
+        int ret = VecSimIndex_AddVector(index, v, id);
+        mock_thread_pool.thread_iteration();
+        return ret;
+    }
+
+    int GenerateAndAddVector(size_t id, int8_t value) override {
+        // use unit_test_utils.h
+        int ret = INT8Test::GenerateAndAddVector(id, value);
+        mock_thread_pool.thread_iteration();
+        return ret;
+    }
+
+    void test_info(bool is_multi);
+    void test_info_iterator(VecSimMetric metric);
+
     VecSimParams vecsim_hnsw_params;
     tieredIndexMock mock_thread_pool;
 };
@@ -135,7 +223,7 @@ void INT8Test::create_index_test(params_t index_params) {
     ASSERT_EQ(VecSimIndex_IndexSize(index), 0);
 
     int8_t vector[dim];
-    this->GenerateVector(vector);
+    this->PopulateRandomVector(vector);
     VecSimIndex_AddVector(index, vector, 0);
 
     ASSERT_EQ(VecSimIndex_IndexSize(index), 1);
@@ -165,3 +253,744 @@ TEST_F(INT8TieredTest, createIndex) {
     ASSERT_EQ(index->basicInfo().type, VecSimType_INT8);
     ASSERT_EQ(index->basicInfo().isTiered, true);
 }
+
+/* ---------------------------- Size Estimation tests ---------------------------- */
+
+template <typename params_t>
+void INT8Test::element_size_test(params_t index_params) {
+    SetUp(index_params);
+
+    // Estimate the memory delta of adding a single vector that requires a full new block.
+    size_t estimation = EstimateElementSize(index_params) * DEFAULT_BLOCK_SIZE;
+    size_t before = index->getAllocationSize();
+    ASSERT_EQ(this->GenerateRandomAndAddVector(0), 1);
+    size_t actual = index->getAllocationSize() - before;
+
+    // We check that the actual size is within 1% of the estimation.
+    ASSERT_GE(estimation, actual * 0.99);
+    ASSERT_LE(estimation, actual * 1.01);
+}
+
+TEST_F(INT8HNSWTest, elementSizeEstimation) {
+    size_t M = 64;
+
+    HNSWParams params = {.dim = 4, .M = M};
+    EXPECT_NO_FATAL_FAILURE(element_size_test(params));
+}
+
+TEST_F(INT8BruteForceTest, elementSizeEstimation) {
+    BFParams params = {.dim = 4};
+    EXPECT_NO_FATAL_FAILURE(element_size_test(params));
+}
+
+TEST_F(INT8TieredTest, elementSizeEstimation) {
+    size_t M = 64;
+    HNSWParams hnsw_params = {.dim = 4, .M = M};
+    VecSimParams vecsim_hnsw_params = CreateParams(hnsw_params);
+    TieredIndexParams tiered_params =
+        test_utils::CreateTieredParams(vecsim_hnsw_params, this->mock_thread_pool);
+    EXPECT_NO_FATAL_FAILURE(element_size_test(tiered_params));
+}
+
+/* ---------------------------- Functionality tests ---------------------------- */
+
+template <typename params_t>
+void INT8Test::search_by_id_test(params_t index_params) {
+    SetUp(index_params);
+
+    size_t k = 11;
+    int8_t n = 100;
+
+    for (int8_t i = 0; i < n; i++) {
+        this->GenerateAndAddVector(i, i); // {i, i, i, i}
+    }
+    ASSERT_EQ(VecSimIndex_IndexSize(index), n);
+
+    int8_t query[dim];
+    this->GenerateVector(query, 50); // {50, 50, 50, 50}
+
+    // Vectors values are equal to the id, so the 11 closest vectors are 45, 46...50
+    // (closest), 51...55
+    static size_t expected_res_order[] = {45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55};
+    auto verify_res = [&](size_t id, double score, size_t index) {
+        ASSERT_EQ(id, expected_res_order[index]);    // results are sorted by ID
+        ASSERT_EQ(score, 4 * (50 - id) * (50 - id)); // L2 distance
+    };
+
+    runTopKSearchTest(index, query, k, verify_res, nullptr, BY_ID);
+}
+
+TEST_F(INT8HNSWTest, searchByID) {
+    HNSWParams params = {.dim = 4, .M = 16, .efConstruction = 200};
+    EXPECT_NO_FATAL_FAILURE(search_by_id_test(params));
+}
+
+TEST_F(INT8BruteForceTest, searchByID) {
+    BFParams params = {.dim = 4};
+    EXPECT_NO_FATAL_FAILURE(search_by_id_test(params));
+}
+
+TEST_F(INT8TieredTest, searchByID) {
+    HNSWParams params = {.dim = 4, .M = 16, .efConstruction = 200};
+    EXPECT_NO_FATAL_FAILURE(search_by_id_test(params));
+}
+
+template <typename params_t>
+void INT8Test::search_by_score_test(params_t index_params) {
+    SetUp(index_params);
+
+    size_t k = 11;
+    size_t n = 100;
+
+    for (size_t i = 0; i < n; i++) {
+        this->GenerateAndAddVector(i, i); // {i, i, i, i}
+    }
+    ASSERT_EQ(VecSimIndex_IndexSize(index), n);
+
+    int8_t query[dim];
+    this->GenerateVector(query, 50); // {50, 50, 50, 50}
+
+    // Vectors values are equal to the id, so the 11 closest vectors are
+    // 45, 46...50 (closest), 51...55
+    static size_t expected_res_order[] = {50, 49, 51, 48, 52, 47, 53, 46, 54, 45, 55};
+    auto verify_res = [&](size_t id, double score, size_t index) {
+        ASSERT_EQ(id, expected_res_order[index]);
+        ASSERT_EQ(score, 4 * (50 - id) * (50 - id)); // L2 distance
+    };
+
+    // Search by score
+    runTopKSearchTest(index, query, k, verify_res);
+}
+
+TEST_F(INT8HNSWTest, searchByScore) {
+    HNSWParams params = {.dim = 4, .M = 16, .efConstruction = 200};
+    EXPECT_NO_FATAL_FAILURE(search_by_score_test(params));
+}
+
+TEST_F(INT8BruteForceTest, searchByScore) {
+    BFParams params = {.dim = 4};
+    EXPECT_NO_FATAL_FAILURE(search_by_score_test(params));
+}
+
+TEST_F(INT8TieredTest, searchByScore) {
+    HNSWParams params = {.dim = 4, .M = 16, .efConstruction = 200};
+    EXPECT_NO_FATAL_FAILURE(search_by_score_test(params));
+}
+
+template <typename params_t>
+void INT8Test::metrics_test(params_t index_params) {
+    SetUp(index_params);
+    size_t n = 10;
+    VecSimMetric metric = index_params.metric;
+    double expected_score = 0;
+
+    auto verify_res = [&](size_t id, double score, size_t index) {
+        ASSERT_EQ(score, expected_score) << "failed at vector id:" << id;
+    };
+
+    for (size_t i = 0; i < n; i++) {
+        int8_t vector[dim];
+        this->PopulateRandomAndAddVector(i, vector);
+
+        if (metric == VecSimMetric_Cosine) {
+            // compare with the norm stored in the index vector
+            const int8_t *index_vector = static_cast<const int8_t *>(this->GetDataByInternalId(i));
+            float index_vector_norm = *(reinterpret_cast<const float *>(index_vector + dim));
+            float vector_norm = spaces::IntegralType_ComputeNorm<int8_t>(vector, dim);
+            ASSERT_EQ(index_vector_norm, vector_norm) << "wrong vector norm for vector id:" << i;
+        } else if (metric == VecSimMetric_IP) {
+            expected_score = INT8_InnerProduct(vector, vector, dim);
+        }
+
+        // query index with k = 1 expect to get the vector
+        runTopKSearchTest(index, vector, 1, verify_res);
+        ASSERT_EQ(VecSimIndex_IndexSize(index), i + 1);
+    }
+}
+
+TEST_F(INT8HNSWTest, CosineTest) {
+    HNSWParams params = {.dim = 40, .metric = VecSimMetric_Cosine, .M = 16, .efConstruction = 200};
+    EXPECT_NO_FATAL_FAILURE(metrics_test(params));
+}
+TEST_F(INT8HNSWTest, IPTest) {
+    HNSWParams params = {.dim = 40, .metric = VecSimMetric_IP, .M = 16, .efConstruction = 200};
+    EXPECT_NO_FATAL_FAILURE((metrics_test)(params));
+}
+TEST_F(INT8HNSWTest, L2Test) {
+    HNSWParams params = {.dim = 40, .metric = VecSimMetric_L2, .M = 16, .efConstruction = 200};
+    EXPECT_NO_FATAL_FAILURE(metrics_test(params));
+}
+
+TEST_F(INT8BruteForceTest, CosineTest) {
+    BFParams params = {.dim = 40, .metric = VecSimMetric_Cosine};
+    EXPECT_NO_FATAL_FAILURE(metrics_test(params));
+}
+TEST_F(INT8BruteForceTest, IPTest) {
+    BFParams params = {.dim = 40, .metric = VecSimMetric_IP};
+    EXPECT_NO_FATAL_FAILURE((metrics_test)(params));
+}
+TEST_F(INT8BruteForceTest, L2Test) {
+    BFParams params = {.dim = 40, .metric = VecSimMetric_L2};
+    EXPECT_NO_FATAL_FAILURE(metrics_test(params));
+}
+
+TEST_F(INT8TieredTest, CosineTest) {
+    HNSWParams params = {.dim = 40, .metric = VecSimMetric_Cosine, .M = 16, .efConstruction = 200};
+    EXPECT_NO_FATAL_FAILURE(metrics_test(params));
+}
+TEST_F(INT8TieredTest, IPTest) {
+    HNSWParams params = {.dim = 40, .metric = VecSimMetric_IP, .M = 16, .efConstruction = 200};
+    EXPECT_NO_FATAL_FAILURE((metrics_test)(params));
+}
+TEST_F(INT8TieredTest, L2Test) {
+    HNSWParams params = {.dim = 40, .metric = VecSimMetric_L2, .M = 16, .efConstruction = 200};
+    EXPECT_NO_FATAL_FAILURE(metrics_test(params));
+}
+
+template <typename params_t>
+void INT8Test::search_empty_index_test(params_t params) {
+    size_t n = 100;
+    size_t k = 11;
+
+    SetUp(params);
+    ASSERT_EQ(VecSimIndex_IndexSize(index), 0);
+
+    int8_t query[dim];
+    this->GenerateVector(query, 50); // {50, 50, 50, 50}
+
+    // We do not expect any results.
+    VecSimQueryReply *res = VecSimIndex_TopKQuery(index, query, k, NULL, BY_SCORE);
+    ASSERT_EQ(VecSimQueryReply_Len(res), 0);
+    VecSimQueryReply_Iterator *it = VecSimQueryReply_GetIterator(res);
+    ASSERT_EQ(VecSimQueryReply_IteratorNext(it), nullptr);
+    VecSimQueryReply_IteratorFree(it);
+    VecSimQueryReply_Free(res);
+
+    res = VecSimIndex_RangeQuery(index, query, 1.0, NULL, BY_SCORE);
+    ASSERT_EQ(VecSimQueryReply_Len(res), 0);
+    VecSimQueryReply_Free(res);
+
+    // Add some vectors and remove them all from index, so it will be empty again.
+    for (size_t i = 0; i < n; i++) {
+        this->GenerateAndAddVector(i);
+    }
+    ASSERT_EQ(VecSimIndex_IndexSize(index), n);
+    for (size_t i = 0; i < n; i++) {
+        VecSimIndex_DeleteVector(index, i);
+    }
+    // vectors marked as deleted will be included in VecSimIndex_IndexSize
+    ASSERT_EQ(GetValidVectorsCount(), 0);
+
+    // Again - we do not expect any results.
+    res = VecSimIndex_TopKQuery(index, query, k, NULL, BY_SCORE);
+    ASSERT_EQ(VecSimQueryReply_Len(res), 0);
+    it = VecSimQueryReply_GetIterator(res);
+    ASSERT_EQ(VecSimQueryReply_IteratorNext(it), nullptr);
+    VecSimQueryReply_IteratorFree(it);
+    VecSimQueryReply_Free(res);
+
+    res = VecSimIndex_RangeQuery(index, query, 1.0, NULL, BY_SCORE);
+    ASSERT_EQ(VecSimQueryReply_Len(res), 0);
+    VecSimQueryReply_Free(res);
+}
+
+TEST_F(INT8HNSWTest, SearchEmptyIndex) {
+    HNSWParams params = {.dim = 4, .initialCapacity = 0};
+    EXPECT_NO_FATAL_FAILURE(search_empty_index_test(params));
+}
+
+TEST_F(INT8BruteForceTest, SearchEmptyIndex) {
+    BFParams params = {.dim = 4, .initialCapacity = 0};
+    EXPECT_NO_FATAL_FAILURE(search_empty_index_test(params));
+}
+
+TEST_F(INT8TieredTest, SearchEmptyIndex) {
+    HNSWParams params = {.dim = 4, .initialCapacity = 0};
+    EXPECT_NO_FATAL_FAILURE(search_empty_index_test(params));
+}
+
+template <typename params_t>
+void INT8Test::test_override(params_t params) {
+    size_t n = 50;
+    size_t new_n = 120;
+    SetUp(params);
+
+    // Insert n vectors.
+    for (size_t i = 0; i < n; i++) {
+        ASSERT_EQ(GenerateAndAddVector(i, i), 1);
+    }
+    ASSERT_EQ(VecSimIndex_IndexSize(index), n);
+
+    // Override n vectors, the first 100 will be overwritten (deleted first).
+    for (size_t i = 0; i < n; i++) {
+        ASSERT_EQ(this->GenerateAndAddVector(i, i), 0);
+    }
+
+    // Add up to new_n vectors.
+    for (size_t i = n; i < new_n; i++) {
+        ASSERT_EQ(this->GenerateAndAddVector(i, i), 1);
+    }
+
+    int8_t query[dim];
+    this->GenerateVector(query, new_n);
+
+    // Vectors values equals their id, so we expect the larger the id the closest it will be to the
+    // query.
+    auto verify_res = [&](size_t id, double score, size_t index) {
+        ASSERT_EQ(id, new_n - 1 - index) << "id: " << id << " score: " << score;
+        float diff = new_n - id;
+        float exp_score = 4 * diff * diff;
+        ASSERT_EQ(score, exp_score) << "id: " << id << " score: " << score;
+    };
+    runTopKSearchTest(index, query, 300, verify_res);
+}
+
+TEST_F(INT8HNSWTest, Override) {
+    HNSWParams params = {
+        .dim = 4, .initialCapacity = 100, .M = 8, .efConstruction = 20, .efRuntime = 250};
+    EXPECT_NO_FATAL_FAILURE(test_override(params));
+}
+
+TEST_F(INT8BruteForceTest, Override) {
+    BFParams params = {.dim = 4, .initialCapacity = 100};
+    EXPECT_NO_FATAL_FAILURE(test_override(params));
+}
+
+TEST_F(INT8TieredTest, Override) {
+    HNSWParams params = {
+        .dim = 4, .initialCapacity = 100, .M = 8, .efConstruction = 20, .efRuntime = 250};
+    EXPECT_NO_FATAL_FAILURE(test_override(params));
+}
+
+template <typename params_t>
+void INT8Test::test_range_query(params_t params) {
+    size_t n = 100;
+    SetUp(params);
+
+    int8_t pivot_value = 1;
+    int8_t pivot_vec[dim];
+    this->GenerateVector(pivot_vec, pivot_value);
+
+    int8_t radius = 20;
+    std::mt19937 gen(42);
+    std::uniform_int_distribution<int16_t> dis(pivot_value - radius, pivot_value + radius);
+
+    // insert 20 vectors near a pivot vector.
+    size_t n_close = 20;
+    for (size_t i = 0; i < n_close; i++) {
+        int8_t random_number = static_cast<int8_t>(dis(gen));
+        this->GenerateAndAddVector(i, random_number);
+    }
+
+    int8_t max_vec[dim];
+    GenerateVector(max_vec, pivot_value + radius);
+    float max_dist = INT8_L2Sqr(pivot_vec, max_vec, dim);
+
+    // Add more vectors far from the pivot vector
+    for (size_t i = n_close; i < n; i++) {
+        int8_t random_number = static_cast<int8_t>(dis(gen));
+        GenerateAndAddVector(i, 50 + random_number);
+    }
+    ASSERT_EQ(VecSimIndex_IndexSize(index), n);
+
+    auto verify_res_by_score = [&](size_t id, double score, size_t index) {
+        ASSERT_LE(id, n_close - 1) << "score: " << score;
+        ASSERT_LE(score, max_dist);
+    };
+    size_t expected_num_results = n_close;
+
+    runRangeQueryTest(index, pivot_vec, max_dist, verify_res_by_score, expected_num_results,
+                      BY_SCORE);
+}
+
+TEST_F(INT8HNSWTest, rangeQuery) {
+    HNSWParams params = {.dim = 4};
+    EXPECT_NO_FATAL_FAILURE(test_range_query(params));
+}
+
+TEST_F(INT8BruteForceTest, rangeQuery) {
+    BFParams params = {.dim = 4};
+    EXPECT_NO_FATAL_FAILURE(test_range_query(params));
+}
+
+TEST_F(INT8TieredTest, rangeQuery) {
+    HNSWParams params = {.dim = 4};
+    EXPECT_NO_FATAL_FAILURE(test_range_query(params));
+}
+
+/* ---------------------------- Batch iterator tests ---------------------------- */
+
+template <typename params_t>
+void INT8Test::test_batch_iterator_basic(params_t params) {
+    SetUp(params);
+    size_t n = 100;
+
+    // For every i, add the vector (i,i,i,i) under the label i.
+    for (size_t i = 0; i < n; i++) {
+        ASSERT_EQ(this->GenerateAndAddVector(i, i), 1);
+    }
+
+    ASSERT_EQ(VecSimIndex_IndexSize(index), n);
+
+    // Query for (n,n,n,n) vector (recall that n-1 is the largest id in te index).
+    int8_t query[dim];
+    GenerateVector(query, n);
+
+    VecSimBatchIterator *batchIterator = VecSimBatchIterator_New(index, query, nullptr);
+    size_t iteration_num = 0;
+
+    // Get the 5 vectors whose ids are the maximal among those that hasn't been returned yet
+    // in every iteration. The results order should be sorted by their score (distance from the
+    // query vector), which means sorted from the largest id to the lowest.
+    size_t n_res = 5;
+    while (VecSimBatchIterator_HasNext(batchIterator)) {
+        std::vector<size_t> expected_ids(n_res);
+        for (size_t i = 0; i < n_res; i++) {
+            expected_ids[i] = (n - iteration_num * n_res - i - 1);
+        }
+        auto verify_res = [&](size_t id, double score, size_t index) {
+            ASSERT_EQ(expected_ids[index], id)
+                << "iteration_num: " << iteration_num << " index: " << index << " score: " << score;
+        };
+        runBatchIteratorSearchTest(batchIterator, n_res, verify_res);
+        iteration_num++;
+    }
+    ASSERT_EQ(iteration_num, n / n_res);
+    VecSimBatchIterator_Free(batchIterator);
+}
+
+TEST_F(INT8HNSWTest, BatchIteratorBasic) {
+    HNSWParams params = {.dim = 4, .M = 8, .efConstruction = 20, .efRuntime = 100};
+    EXPECT_NO_FATAL_FAILURE(test_batch_iterator_basic(params));
+}
+
+TEST_F(INT8BruteForceTest, BatchIteratorBasic) {
+    BFParams params = {.dim = 4};
+    EXPECT_NO_FATAL_FAILURE(test_batch_iterator_basic(params));
+}
+
+TEST_F(INT8TieredTest, BatchIteratorBasic) {
+    HNSWParams params = {.dim = 4, .M = 8, .efConstruction = 20, .efRuntime = 100};
+    EXPECT_NO_FATAL_FAILURE(test_batch_iterator_basic(params));
+}
+
+/* ---------------------------- Info tests ---------------------------- */
+
+template <typename params_t>
+VecSimIndexInfo INT8Test::test_info(params_t params) {
+    SetUp(params);
+    VecSimIndexInfo info = VecSimIndex_Info(index);
+    EXPECT_EQ(info.commonInfo.basicInfo.dim, params.dim);
+    EXPECT_EQ(info.commonInfo.basicInfo.isMulti, params.multi);
+    EXPECT_EQ(info.commonInfo.basicInfo.type, VecSimType_INT8);
+    EXPECT_EQ(info.commonInfo.basicInfo.blockSize, DEFAULT_BLOCK_SIZE);
+    EXPECT_EQ(info.commonInfo.indexSize, 0);
+    EXPECT_EQ(info.commonInfo.indexLabelCount, 0);
+    EXPECT_EQ(info.commonInfo.memory, index->getAllocationSize());
+    EXPECT_EQ(info.commonInfo.basicInfo.metric, VecSimMetric_L2);
+
+    // Validate that basic info returns the right restricted info as well.
+    VecSimIndexBasicInfo s_info = VecSimIndex_BasicInfo(index);
+    EXPECT_EQ(info.commonInfo.basicInfo.algo, s_info.algo);
+    EXPECT_EQ(info.commonInfo.basicInfo.dim, s_info.dim);
+    EXPECT_EQ(info.commonInfo.basicInfo.blockSize, s_info.blockSize);
+    EXPECT_EQ(info.commonInfo.basicInfo.type, s_info.type);
+    EXPECT_EQ(info.commonInfo.basicInfo.isMulti, s_info.isMulti);
+    EXPECT_EQ(info.commonInfo.basicInfo.type, s_info.type);
+    EXPECT_EQ(info.commonInfo.basicInfo.isTiered, s_info.isTiered);
+
+    return info;
+}
+
+void INT8HNSWTest::test_info(bool is_multi) {
+    HNSWParams params = {.dim = 128, .multi = is_multi};
+    VecSimIndexInfo info = INT8Test::test_info(params);
+    ASSERT_EQ(info.commonInfo.basicInfo.algo, VecSimAlgo_HNSWLIB);
+
+    ASSERT_EQ(info.hnswInfo.M, HNSW_DEFAULT_M);
+    ASSERT_EQ(info.hnswInfo.efConstruction, HNSW_DEFAULT_EF_C);
+    ASSERT_EQ(info.hnswInfo.efRuntime, HNSW_DEFAULT_EF_RT);
+    ASSERT_DOUBLE_EQ(info.hnswInfo.epsilon, HNSW_DEFAULT_EPSILON);
+}
+TEST_F(INT8HNSWTest, testInfoSingle) { test_info(false); }
+TEST_F(INT8HNSWTest, testInfoMulti) { test_info(true); }
+
+void INT8BruteForceTest::test_info(bool is_multi) {
+    BFParams params = {.dim = 128, .multi = is_multi};
+    VecSimIndexInfo info = INT8Test::test_info(params);
+    ASSERT_EQ(info.commonInfo.basicInfo.algo, VecSimAlgo_BF);
+}
+
+TEST_F(INT8BruteForceTest, testInfoSingle) { test_info(false); }
+TEST_F(INT8BruteForceTest, testInfoMulti) { test_info(true); }
+
+void INT8TieredTest::test_info(bool is_multi) {
+    size_t bufferLimit = SIZE_MAX;
+    HNSWParams hnsw_params = {.dim = 128, .multi = is_multi};
+
+    VecSimIndexInfo info = INT8Test::test_info(hnsw_params);
+    ASSERT_EQ(info.commonInfo.basicInfo.algo, VecSimAlgo_HNSWLIB);
+    VecSimIndexInfo frontendIndexInfo = CastToBruteForce()->info();
+    VecSimIndexInfo backendIndexInfo = CastToHNSW()->info();
+
+    compareCommonInfo(info.tieredInfo.frontendCommonInfo, frontendIndexInfo.commonInfo);
+    compareFlatInfo(info.tieredInfo.bfInfo, frontendIndexInfo.bfInfo);
+    compareCommonInfo(info.tieredInfo.backendCommonInfo, backendIndexInfo.commonInfo);
+    compareHNSWInfo(info.tieredInfo.backendInfo.hnswInfo, backendIndexInfo.hnswInfo);
+
+    EXPECT_EQ(info.commonInfo.memory, info.tieredInfo.management_layer_memory +
+                                          backendIndexInfo.commonInfo.memory +
+                                          frontendIndexInfo.commonInfo.memory);
+    EXPECT_EQ(info.tieredInfo.backgroundIndexing, false);
+    EXPECT_EQ(info.tieredInfo.bufferLimit, bufferLimit);
+    EXPECT_EQ(info.tieredInfo.specificTieredBackendInfo.hnswTieredInfo.pendingSwapJobsThreshold, 1);
+
+    INT8Test::GenerateAndAddVector(1, 1);
+    info = index->info();
+
+    EXPECT_EQ(info.commonInfo.indexSize, 1);
+    EXPECT_EQ(info.commonInfo.indexLabelCount, 1);
+    EXPECT_EQ(info.tieredInfo.backendCommonInfo.indexSize, 0);
+    EXPECT_EQ(info.tieredInfo.backendCommonInfo.indexLabelCount, 0);
+    EXPECT_EQ(info.tieredInfo.frontendCommonInfo.indexSize, 1);
+    EXPECT_EQ(info.tieredInfo.frontendCommonInfo.indexLabelCount, 1);
+    EXPECT_EQ(info.commonInfo.memory, info.tieredInfo.management_layer_memory +
+                                          info.tieredInfo.backendCommonInfo.memory +
+                                          info.tieredInfo.frontendCommonInfo.memory);
+    EXPECT_EQ(info.tieredInfo.backgroundIndexing, true);
+
+    mock_thread_pool.thread_iteration();
+    info = index->info();
+
+    EXPECT_EQ(info.commonInfo.indexSize, 1);
+    EXPECT_EQ(info.commonInfo.indexLabelCount, 1);
+    EXPECT_EQ(info.tieredInfo.backendCommonInfo.indexSize, 1);
+    EXPECT_EQ(info.tieredInfo.backendCommonInfo.indexLabelCount, 1);
+    EXPECT_EQ(info.tieredInfo.frontendCommonInfo.indexSize, 0);
+    EXPECT_EQ(info.tieredInfo.frontendCommonInfo.indexLabelCount, 0);
+    EXPECT_EQ(info.commonInfo.memory, info.tieredInfo.management_layer_memory +
+                                          info.tieredInfo.backendCommonInfo.memory +
+                                          info.tieredInfo.frontendCommonInfo.memory);
+    EXPECT_EQ(info.tieredInfo.backgroundIndexing, false);
+
+    if (is_multi) {
+        INT8Test::GenerateAndAddVector(1, 1);
+        info = index->info();
+
+        EXPECT_EQ(info.commonInfo.indexSize, 2);
+        EXPECT_EQ(info.commonInfo.indexLabelCount, 1);
+        EXPECT_EQ(info.tieredInfo.backendCommonInfo.indexSize, 1);
+        EXPECT_EQ(info.tieredInfo.backendCommonInfo.indexLabelCount, 1);
+        EXPECT_EQ(info.tieredInfo.frontendCommonInfo.indexSize, 1);
+        EXPECT_EQ(info.tieredInfo.frontendCommonInfo.indexLabelCount, 1);
+        EXPECT_EQ(info.commonInfo.memory, info.tieredInfo.management_layer_memory +
+                                              info.tieredInfo.backendCommonInfo.memory +
+                                              info.tieredInfo.frontendCommonInfo.memory);
+        EXPECT_EQ(info.tieredInfo.backgroundIndexing, true);
+    }
+
+    VecSimIndex_DeleteVector(index, 1);
+    info = index->info();
+
+    EXPECT_EQ(info.commonInfo.indexSize, 0);
+    EXPECT_EQ(info.commonInfo.indexLabelCount, 0);
+    EXPECT_EQ(info.tieredInfo.backendCommonInfo.indexSize, 0);
+    EXPECT_EQ(info.tieredInfo.backendCommonInfo.indexLabelCount, 0);
+    EXPECT_EQ(info.tieredInfo.frontendCommonInfo.indexSize, 0);
+    EXPECT_EQ(info.tieredInfo.frontendCommonInfo.indexLabelCount, 0);
+    EXPECT_EQ(info.commonInfo.memory, info.tieredInfo.management_layer_memory +
+                                          info.tieredInfo.backendCommonInfo.memory +
+                                          info.tieredInfo.frontendCommonInfo.memory);
+    EXPECT_EQ(info.tieredInfo.backgroundIndexing, false);
+}
+
+TEST_F(INT8TieredTest, testInfoSingle) { test_info(false); }
+TEST_F(INT8TieredTest, testInfoMulti) { test_info(true); }
+
+template <typename params_t>
+void INT8Test::test_info_iterator(VecSimMetric metric) {
+    params_t params = {.dim = 128, .metric = metric};
+    SetUp(params);
+    VecSimIndexInfo info = VecSimIndex_Info(index);
+    VecSimInfoIterator *infoIter = VecSimIndex_InfoIterator(index);
+    VecSimAlgo algo = info.commonInfo.basicInfo.algo;
+    if (algo == VecSimAlgo_HNSWLIB) {
+        compareHNSWIndexInfoToIterator(info, infoIter);
+    } else if (algo == VecSimAlgo_BF) {
+        compareFlatIndexInfoToIterator(info, infoIter);
+    }
+    VecSimInfoIterator_Free(infoIter);
+}
+
+TEST_F(INT8BruteForceTest, InfoIteratorCosine) {
+    test_info_iterator<BFParams>(VecSimMetric_Cosine);
+}
+TEST_F(INT8BruteForceTest, InfoIteratorIP) { test_info_iterator<BFParams>(VecSimMetric_IP); }
+TEST_F(INT8BruteForceTest, InfoIteratorL2) { test_info_iterator<BFParams>(VecSimMetric_L2); }
+TEST_F(INT8HNSWTest, InfoIteratorCosine) { test_info_iterator<HNSWParams>(VecSimMetric_Cosine); }
+TEST_F(INT8HNSWTest, InfoIteratorIP) { test_info_iterator<HNSWParams>(VecSimMetric_IP); }
+TEST_F(INT8HNSWTest, InfoIteratorL2) { test_info_iterator<HNSWParams>(VecSimMetric_L2); }
+
+void INT8TieredTest::test_info_iterator(VecSimMetric metric) {
+    size_t n = 100;
+    size_t d = 128;
+    HNSWParams params = {.dim = d, .metric = metric, .initialCapacity = n};
+    SetUp(params);
+    VecSimIndexInfo info = VecSimIndex_Info(index);
+    VecSimInfoIterator *infoIter = VecSimIndex_InfoIterator(index);
+    VecSimIndexInfo frontendIndexInfo = CastToBruteForce()->info();
+    VecSimIndexInfo backendIndexInfo = CastToHNSW()->info();
+    VecSimInfoIterator_Free(infoIter);
+}
+
+TEST_F(INT8TieredTest, InfoIteratorCosine) { test_info_iterator(VecSimMetric_Cosine); }
+TEST_F(INT8TieredTest, InfoIteratorIP) { test_info_iterator(VecSimMetric_IP); }
+TEST_F(INT8TieredTest, InfoIteratorL2) { test_info_iterator(VecSimMetric_L2); }
+
+/* ---------------------------- HNSW specific tests ---------------------------- */
+
+void INT8HNSWTest::test_serialization(bool is_multi) {
+    size_t dim = 4;
+    size_t n = 1001;
+    size_t n_labels[] = {n, 100};
+    size_t M = 8;
+    size_t ef = 10;
+    double epsilon = 0.004;
+    size_t blockSize = 20;
+    std::string multiToString[] = {"single", "multi_100labels_"};
+
+    HNSWParams params{.type = VecSimType_INT8,
+                      .dim = dim,
+                      .metric = VecSimMetric_Cosine,
+                      .multi = is_multi,
+                      .initialCapacity = n,
+                      .blockSize = blockSize,
+                      .M = M,
+                      .efConstruction = ef,
+                      .efRuntime = ef,
+                      .epsilon = epsilon};
+    SetUp(params);
+
+    auto *hnsw_index = this->CastToHNSW();
+
+    int8_t data[n * dim];
+
+    for (size_t i = 0; i < n * dim; i += dim) {
+        test_utils::populate_int8_vec(data + i, dim, i);
+    }
+
+    for (size_t j = 0; j < n; ++j) {
+        VecSimIndex_AddVector(index, data + dim * j, j % n_labels[is_multi]);
+    }
+
+    auto file_name = std::string(getenv("ROOT")) + "/tests/unit/1k-d4-L2-M8-ef_c10_" +
+                     VecSimType_ToString(VecSimType_INT8) + "_" + multiToString[is_multi] +
+                     ".hnsw_current_version";
+
+    // Save the index with the default version (V3).
+    hnsw_index->saveIndex(file_name);
+
+    // Fetch info after saving, as memory size change during saving.
+    VecSimIndexInfo info = VecSimIndex_Info(index);
+    ASSERT_EQ(info.commonInfo.basicInfo.algo, VecSimAlgo_HNSWLIB);
+    ASSERT_EQ(info.hnswInfo.M, M);
+    ASSERT_EQ(info.hnswInfo.efConstruction, ef);
+    ASSERT_EQ(info.hnswInfo.efRuntime, ef);
+    ASSERT_EQ(info.commonInfo.indexSize, n);
+    ASSERT_EQ(info.commonInfo.basicInfo.metric, VecSimMetric_Cosine);
+    ASSERT_EQ(info.commonInfo.basicInfo.type, VecSimType_INT8);
+    ASSERT_EQ(info.commonInfo.basicInfo.dim, dim);
+    ASSERT_EQ(info.commonInfo.indexLabelCount, n_labels[is_multi]);
+
+    // Load the index from the file.
+    VecSimIndex *serialized_index = HNSWFactory::NewIndex(file_name);
+    auto *serialized_hnsw_index = this->CastToHNSW(serialized_index);
+
+    // Verify that the index was loaded as expected.
+    ASSERT_TRUE(serialized_hnsw_index->checkIntegrity().valid_state);
+
+    VecSimIndexInfo info2 = VecSimIndex_Info(serialized_index);
+    ASSERT_EQ(info2.commonInfo.basicInfo.algo, VecSimAlgo_HNSWLIB);
+    ASSERT_EQ(info2.hnswInfo.M, M);
+    ASSERT_EQ(info2.commonInfo.basicInfo.isMulti, is_multi);
+    ASSERT_EQ(info2.commonInfo.basicInfo.blockSize, blockSize);
+    ASSERT_EQ(info2.hnswInfo.efConstruction, ef);
+    ASSERT_EQ(info2.hnswInfo.efRuntime, ef);
+    ASSERT_EQ(info2.commonInfo.indexSize, n);
+    ASSERT_EQ(info2.commonInfo.basicInfo.metric, VecSimMetric_Cosine);
+    ASSERT_EQ(info2.commonInfo.basicInfo.type, VecSimType_INT8);
+    ASSERT_EQ(info2.commonInfo.basicInfo.dim, dim);
+    ASSERT_EQ(info2.commonInfo.indexLabelCount, n_labels[is_multi]);
+    ASSERT_EQ(info2.hnswInfo.epsilon, epsilon);
+
+    // Check the functionality of the loaded index.
+
+    int8_t new_vec[dim];
+    this->PopulateRandomVector(new_vec);
+    VecSimIndex_AddVector(serialized_index, new_vec, n);
+    auto verify_res = [&](size_t id, double score, size_t index) {
+        ASSERT_EQ(id, n) << "score: " << score;
+        ASSERT_EQ(score, 0);
+    };
+    runTopKSearchTest(serialized_index, new_vec, 1, verify_res);
+    VecSimIndex_DeleteVector(serialized_index, 1);
+
+    size_t n_per_label = n / n_labels[is_multi];
+    ASSERT_TRUE(serialized_hnsw_index->checkIntegrity().valid_state);
+    ASSERT_EQ(VecSimIndex_IndexSize(serialized_index), n + 1 - n_per_label);
+
+    // Clean up.
+    remove(file_name.c_str());
+    VecSimIndex_Free(serialized_index);
+}
+
+TEST_F(INT8HNSWTest, SerializationCurrentVersion) { test_serialization(false); }
+
+TEST_F(INT8HNSWTest, SerializationCurrentVersionMulti) { test_serialization(true); }
+
+template <typename params_t>
+void INT8Test::get_element_neighbors(params_t params) {
+    size_t n = 0;
+
+    SetUp(params);
+    auto *hnsw_index = CastToHNSW();
+
+    // Add vectors until we have at least 2 vectors at level 1.
+    size_t vectors_in_higher_levels = 0;
+    while (vectors_in_higher_levels < 2) {
+        GenerateAndAddVector(n, n);
+        if (hnsw_index->getGraphDataByInternalId(n)->toplevel > 0) {
+            vectors_in_higher_levels++;
+        }
+        n++;
+    }
+    ASSERT_GE(n, 1) << "n: " << n;
+
+    // Go over all vectors and validate that the getElementNeighbors debug command returns the
+    // neighbors properly.
+    for (size_t id = 0; id < n; id++) {
+        ElementLevelData &cur = hnsw_index->getElementLevelData(id, 0);
+        int **neighbors_output;
+        VecSimDebug_GetElementNeighborsInHNSWGraph(index, id, &neighbors_output);
+        auto graph_data = hnsw_index->getGraphDataByInternalId(id);
+        for (size_t l = 0; l <= graph_data->toplevel; l++) {
+            auto &level_data = hnsw_index->getElementLevelData(graph_data, l);
+            auto &neighbours = neighbors_output[l];
+            ASSERT_EQ(neighbours[0], level_data.numLinks);
+            for (size_t j = 1; j <= neighbours[0]; j++) {
+                ASSERT_EQ(neighbours[j], level_data.links[j - 1]);
+            }
+        }
+        VecSimDebug_ReleaseElementNeighborsInHNSWGraph(neighbors_output);
+    }
+}
+
+TEST_F(INT8HNSWTest, getElementNeighbors) {
+    HNSWParams params = {.dim = 4, .M = 20};
+    get_element_neighbors(params);
+}
+
+TEST_F(INT8TieredTest, getElementNeighbors) {
+    HNSWParams params = {.dim = 4, .M = 20};
+    get_element_neighbors(params);
+}
diff --git a/tests/unit/unit_test_utils.cpp b/tests/unit/unit_test_utils.cpp
index 196a7caa9..91525caa6 100644
--- a/tests/unit/unit_test_utils.cpp
+++ b/tests/unit/unit_test_utils.cpp
@@ -46,6 +46,7 @@ VecSimQueryParams CreateQueryParams(const HNSWRuntimeParams &RuntimeParams) {
 
 static bool is_async_index(VecSimIndex *index) {
     return dynamic_cast<VecSimTieredIndex<float, float> *>(index) != nullptr ||
+           dynamic_cast<VecSimTieredIndex<int8_t, float> *>(index) != nullptr ||
            dynamic_cast<VecSimTieredIndex<double, double> *>(index) != nullptr;
 }
 

From b7d6aed28ba243d5c1d33d26dd357a04afa1461d Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Tue, 17 Dec 2024 16:10:29 +0000
Subject: [PATCH 26/33] remove duplicated  GetDistFunc<int8_t, float>

move ASSERT_DEBUG_DEATH of CalcIndexDataSize to a separate test
---
 src/VecSim/spaces/spaces.cpp | 13 -------------
 tests/unit/test_common.cpp   | 12 ++++++++++--
 2 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/src/VecSim/spaces/spaces.cpp b/src/VecSim/spaces/spaces.cpp
index b9312234a..c73ec997f 100644
--- a/src/VecSim/spaces/spaces.cpp
+++ b/src/VecSim/spaces/spaces.cpp
@@ -69,19 +69,6 @@ dist_func_t<double> GetDistFunc<double, double>(VecSimMetric metric, size_t dim,
     }
     throw std::invalid_argument("Invalid metric");
 }
-template <>
-dist_func_t<float> GetDistFunc<int8_t, float>(VecSimMetric metric, size_t dim,
-                                              unsigned char *alignment) {
-    switch (metric) {
-    case VecSimMetric_Cosine:
-        return Cosine_INT8_GetDistFunc(dim, alignment);
-    case VecSimMetric_IP:
-        return IP_INT8_GetDistFunc(dim, alignment);
-    case VecSimMetric_L2:
-        return L2_INT8_GetDistFunc(dim, alignment);
-    }
-    throw std::invalid_argument("Invalid metric");
-}
 
 template <>
 dist_func_t<float> GetDistFunc<int8_t, float>(VecSimMetric metric, size_t dim,
diff --git a/tests/unit/test_common.cpp b/tests/unit/test_common.cpp
index 96be49345..07a39fac8 100644
--- a/tests/unit/test_common.cpp
+++ b/tests/unit/test_common.cpp
@@ -700,6 +700,16 @@ class CommonTypeMetricTieredTests : public CommonTypeMetricTests {
     tieredIndexMock mock_thread_pool;
 };
 
+TEST_P(CommonTypeMetricTieredTests, TestCalcIndexDataSizeAssertion) {
+    size_t dim = 4;
+    VecSimType type = std::get<0>(GetParam());
+    VecSimMetric metric = std::get<1>(GetParam());
+
+    HNSWParams hnsw_params = {.type = type, .dim = 4, .metric = metric};
+    VecSimIndex *index = test_utils::CreateNewTieredHNSWIndex(hnsw_params, this->mock_thread_pool);
+    ASSERT_DEBUG_DEATH(test_utils::CalcIndexDataSize(index, type), "dynamic_cast failed");
+}
+
 TEST_P(CommonTypeMetricTieredTests, TestDataSizeTieredHNSW) {
     size_t dim = 4;
     VecSimType type = std::get<0>(GetParam());
@@ -707,8 +717,6 @@ TEST_P(CommonTypeMetricTieredTests, TestDataSizeTieredHNSW) {
 
     HNSWParams hnsw_params = {.type = type, .dim = 4, .metric = metric};
     VecSimIndex *index = test_utils::CreateNewTieredHNSWIndex(hnsw_params, this->mock_thread_pool);
-    // TODO:move death test to a separate test
-    // ASSERT_DEBUG_DEATH(test_utils::CalcIndexDataSize(index, type), "dynamic_cast failed");
 
     auto verify_data_size = [&](const auto &tiered_index) {
         auto hnsw_index = tiered_index->getHNSWIndex();

From 939cc4708bfa763ac08f13c07f87089246f21302 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Wed, 18 Dec 2024 05:30:31 +0000
Subject: [PATCH 27/33] remove assert test, the statement is excuted and causes
 crash

---
 tests/unit/test_common.cpp | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/tests/unit/test_common.cpp b/tests/unit/test_common.cpp
index 07a39fac8..6584e2318 100644
--- a/tests/unit/test_common.cpp
+++ b/tests/unit/test_common.cpp
@@ -700,16 +700,6 @@ class CommonTypeMetricTieredTests : public CommonTypeMetricTests {
     tieredIndexMock mock_thread_pool;
 };
 
-TEST_P(CommonTypeMetricTieredTests, TestCalcIndexDataSizeAssertion) {
-    size_t dim = 4;
-    VecSimType type = std::get<0>(GetParam());
-    VecSimMetric metric = std::get<1>(GetParam());
-
-    HNSWParams hnsw_params = {.type = type, .dim = 4, .metric = metric};
-    VecSimIndex *index = test_utils::CreateNewTieredHNSWIndex(hnsw_params, this->mock_thread_pool);
-    ASSERT_DEBUG_DEATH(test_utils::CalcIndexDataSize(index, type), "dynamic_cast failed");
-}
-
 TEST_P(CommonTypeMetricTieredTests, TestDataSizeTieredHNSW) {
     size_t dim = 4;
     VecSimType type = std::get<0>(GetParam());

From e3ad80c8423aa0f50cbfbc7742daa87c51ff893d Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Wed, 18 Dec 2024 05:46:54 +0000
Subject: [PATCH 28/33] imporve normalize test

---
 src/VecSim/spaces/normalize/normalize_naive.h | 4 ++--
 tests/unit/test_common.cpp                    | 9 +++++++--
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/VecSim/spaces/normalize/normalize_naive.h b/src/VecSim/spaces/normalize/normalize_naive.h
index 2264b7da7..88967e39a 100644
--- a/src/VecSim/spaces/normalize/normalize_naive.h
+++ b/src/VecSim/spaces/normalize/normalize_naive.h
@@ -75,12 +75,12 @@ static inline void float16_normalizeVector(void *vec, const size_t dim) {
 }
 
 static inline void int8_normalizeVector(void *vec, const size_t dim) {
-    int8_t *input_vector = (int8_t *)vec;
+    int8_t *input_vector = static_cast<int8_t *>(vec);
 
     float norm = IntegralType_ComputeNorm<int8_t>(input_vector, dim);
 
     // Store norm at the end of the vector.
-    *(float *)(input_vector + dim) = norm;
+    *reinterpret_cast<float *>(input_vector + dim) = norm;
 }
 
 } // namespace spaces
diff --git a/tests/unit/test_common.cpp b/tests/unit/test_common.cpp
index 6584e2318..43d685239 100644
--- a/tests/unit/test_common.cpp
+++ b/tests/unit/test_common.cpp
@@ -636,9 +636,14 @@ TEST(CommonAPITest, NormalizeInt8) {
     VecSim_Normalize(v, dim, VecSimType_INT8);
 
     float res_norm = *(reinterpret_cast<float *>(v + dim));
-    float expected_norm = spaces::IntegralType_ComputeNorm<int8_t>(v, dim);
+    // Check that the normalized vector norm is 1.
+    float norm = 0;
+    for (size_t i = 0; i < dim; ++i) {
+        float val = v[i] / res_norm;
+        norm += val * val;
+    }
 
-    ASSERT_FLOAT_EQ(res_norm, expected_norm);
+    ASSERT_FLOAT_EQ(norm, 1.0);
 }
 
 class CommonTypeMetricTests : public testing::TestWithParam<std::tuple<VecSimType, VecSimMetric>> {

From 58fa8e2fbc5fb8c556b2b9db4940fdc32d58b6bb Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Wed, 18 Dec 2024 06:19:48 +0000
Subject: [PATCH 29/33] rename test_utils::compute_norm ->
 test_utils::integral_compute_norm

remove test_normalize.cpp file
---
 .../spaces_benchmarks/bm_spaces_int8.cpp      |  4 ++--
 tests/unit/CMakeLists.txt                     |  2 +-
 tests/unit/test_common.cpp                    |  1 -
 tests/unit/test_int8.cpp                      |  1 -
 tests/unit/test_normalize.cpp                 | 21 -------------------
 tests/unit/test_spaces.cpp                    |  8 +++----
 tests/utils/tests_utils.h                     | 11 ++++------
 7 files changed, 11 insertions(+), 37 deletions(-)
 delete mode 100644 tests/unit/test_normalize.cpp

diff --git a/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp b/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp
index 0adde8972..25b7e85e0 100644
--- a/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp
+++ b/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp
@@ -28,8 +28,8 @@ class BM_VecSimSpaces_Integers_INT8 : public benchmark::Fixture {
         test_utils::populate_int8_vec(v2, dim, 1234);
 
         // Store the norm in the extra space for cosine calculations
-        *(float *)(v1 + dim) = test_utils::compute_norm(v1, dim);
-        *(float *)(v2 + dim) = test_utils::compute_norm(v2, dim);
+        *(float *)(v1 + dim) = test_utils::integral_compute_norm(v1, dim);
+        *(float *)(v2 + dim) = test_utils::integral_compute_norm(v2, dim);
     }
     void TearDown(const ::benchmark::State &state) {
         delete v1;
diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt
index 2320b08ff..62f864f3f 100644
--- a/tests/unit/CMakeLists.txt
+++ b/tests/unit/CMakeLists.txt
@@ -34,7 +34,7 @@ add_executable(test_hnsw ../utils/mock_thread_pool.cpp test_hnsw.cpp test_hnsw_m
 add_executable(test_hnsw_parallel test_hnsw_parallel.cpp ../utils/mock_thread_pool.cpp unit_test_utils.cpp)
 add_executable(test_bruteforce test_bruteforce.cpp test_bruteforce_multi.cpp ../utils/mock_thread_pool.cpp unit_test_utils.cpp)
 add_executable(test_allocator test_allocator.cpp ../utils/mock_thread_pool.cpp unit_test_utils.cpp)
-add_executable(test_spaces test_spaces.cpp test_normalize.cpp)
+add_executable(test_spaces test_spaces.cpp)
 add_executable(test_types test_types.cpp)
 add_executable(test_common ../utils/mock_thread_pool.cpp test_common.cpp unit_test_utils.cpp )
 add_executable(test_bf16 ../utils/mock_thread_pool.cpp test_bf16.cpp unit_test_utils.cpp)
diff --git a/tests/unit/test_common.cpp b/tests/unit/test_common.cpp
index 43d685239..2a9d71725 100644
--- a/tests/unit/test_common.cpp
+++ b/tests/unit/test_common.cpp
@@ -20,7 +20,6 @@
 #include "VecSim/spaces/spaces.h"
 #include "VecSim/types/bfloat16.h"
 #include "VecSim/types/float16.h"
-#include "VecSim/spaces/normalize/compute_norm.h"
 
 #include <cstdlib>
 #include <limits>
diff --git a/tests/unit/test_int8.cpp b/tests/unit/test_int8.cpp
index 27da6b6fd..cbbd370bc 100644
--- a/tests/unit/test_int8.cpp
+++ b/tests/unit/test_int8.cpp
@@ -4,7 +4,6 @@
 #include "tests_utils.h"
 #include "unit_test_utils.h"
 #include "mock_thread_pool.h"
-#include "VecSim/spaces/normalize/compute_norm.h"
 #include "VecSim/vec_sim_debug.h"
 #include "VecSim/spaces/L2/L2.h"
 #include "VecSim/spaces/IP/IP.h"
diff --git a/tests/unit/test_normalize.cpp b/tests/unit/test_normalize.cpp
deleted file mode 100644
index 568b58b7a..000000000
--- a/tests/unit/test_normalize.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- *Copyright Redis Ltd. 2021 - present
- *Licensed under your choice of the Redis Source Available License 2.0 (RSALv2) or
- *the Server Side Public License v1 (SSPLv1).
- */
-
-#include <random> //TODO: remove once callinng populate_int8_vec
-
-#include "gtest/gtest.h"
-#include "VecSim/spaces/normalize/compute_norm.h"
-class NormalizeTest : public ::testing::Test {};
-
-TEST_F(NormalizeTest, TestINT8ComputeNorm) {
-    size_t dim = 4;
-    int8_t v[] = {-68, -100, 24, 127};
-    float expected_norm = 177.0; // manually calculated
-
-    float norm = spaces::IntegralType_ComputeNorm<int8_t>(v, dim);
-
-    ASSERT_EQ(norm, expected_norm);
-}
diff --git a/tests/unit/test_spaces.cpp b/tests/unit/test_spaces.cpp
index 9931d318a..ad87ff07b 100644
--- a/tests/unit/test_spaces.cpp
+++ b/tests/unit/test_spaces.cpp
@@ -249,8 +249,8 @@ TEST_F(SpacesTest, int8_Cosine_no_optimization_func_test) {
     test_utils::populate_int8_vec(v2, dim, 123);
 
     // write the norm at the end of the vector
-    *(float *)(v1 + dim) = test_utils::compute_norm(v1, dim);
-    *(float *)(v2 + dim) = test_utils::compute_norm(v2, dim);
+    *(float *)(v1 + dim) = test_utils::integral_compute_norm(v1, dim);
+    *(float *)(v2 + dim) = test_utils::integral_compute_norm(v2, dim);
 
     float dist = INT8_Cosine((const void *)v1, (const void *)v2, dim);
     ASSERT_NEAR(dist, 0.0, 0.000001);
@@ -996,8 +996,8 @@ TEST_P(INT8SpacesOptimizationTest, INT8CosineTest) {
     test_utils::populate_int8_vec(v2, dim, 1234);
 
     // write the norm at the end of the vector
-    *(float *)(v1 + dim) = test_utils::compute_norm(v1, dim);
-    *(float *)(v2 + dim) = test_utils::compute_norm(v2, dim);
+    *(float *)(v1 + dim) = test_utils::integral_compute_norm(v1, dim);
+    *(float *)(v2 + dim) = test_utils::integral_compute_norm(v2, dim);
 
     dist_func_t<float> arch_opt_func;
     float baseline = INT8_Cosine(v1, v2, dim);
diff --git a/tests/utils/tests_utils.h b/tests/utils/tests_utils.h
index 31dc3d9ef..0bf8bca53 100644
--- a/tests/utils/tests_utils.h
+++ b/tests/utils/tests_utils.h
@@ -2,6 +2,7 @@
 
 #include <random>
 #include <vector>
+#include "VecSim/spaces/normalize/compute_norm.h"
 
 namespace test_utils {
 
@@ -19,13 +20,9 @@ static void populate_int8_vec(int8_t *v, size_t dim, int seed = 1234) {
     }
 }
 
-// TODO: replace with normalize function from VecSim
-float compute_norm(const int8_t *vec, size_t dim) {
-    int norm = 0;
-    for (size_t i = 0; i < dim; i++) {
-        norm += vec[i] * vec[i];
-    }
-    return sqrt(norm);
+template <typename datatype>
+float integral_compute_norm(const datatype *vec, size_t dim) {
+    return spaces::IntegralType_ComputeNorm<datatype>(vec, dim);
 }
 
 } // namespace test_utils

From 736e30b99fb7afd4a5430518b125471c139d60bb Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Wed, 18 Dec 2024 12:11:29 +0000
Subject: [PATCH 30/33] use stack allocation instead of heap allocation in
 tests

---
 tests/unit/test_spaces.cpp | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/unit/test_spaces.cpp b/tests/unit/test_spaces.cpp
index ad87ff07b..e554c88ef 100644
--- a/tests/unit/test_spaces.cpp
+++ b/tests/unit/test_spaces.cpp
@@ -242,8 +242,8 @@ TEST_F(SpacesTest, int8_ip_no_optimization_func_test) {
 TEST_F(SpacesTest, int8_Cosine_no_optimization_func_test) {
     size_t dim = 4;
     // create a vector with extra space for the norm
-    int8_t *v1 = new int8_t[dim + sizeof(float)];
-    int8_t *v2 = new int8_t[dim + sizeof(float)];
+    int8_t v1[dim + sizeof(float)];
+    int8_t v2[dim + sizeof(float)];
 
     test_utils::populate_int8_vec(v1, dim, 123);
     test_utils::populate_int8_vec(v2, dim, 123);
@@ -917,8 +917,8 @@ class INT8SpacesOptimizationTest : public testing::TestWithParam<size_t> {};
 TEST_P(INT8SpacesOptimizationTest, INT8L2SqrTest) {
     auto optimization = cpu_features::GetX86Info().features;
     size_t dim = GetParam();
-    int8_t *v1 = new int8_t[dim];
-    int8_t *v2 = new int8_t[dim];
+    int8_t v1[dim];
+    int8_t v2[dim];
     test_utils::populate_int8_vec(v1, dim, 123);
     test_utils::populate_int8_vec(v2, dim, 1234);
 
@@ -953,8 +953,8 @@ TEST_P(INT8SpacesOptimizationTest, INT8L2SqrTest) {
 TEST_P(INT8SpacesOptimizationTest, INT8InnerProductTest) {
     auto optimization = cpu_features::GetX86Info().features;
     size_t dim = GetParam();
-    int8_t *v1 = new int8_t[dim];
-    int8_t *v2 = new int8_t[dim];
+    int8_t v1[dim];
+    int8_t v2[dim];
     test_utils::populate_int8_vec(v1, dim, 123);
     test_utils::populate_int8_vec(v2, dim, 1234);
 
@@ -990,8 +990,8 @@ TEST_P(INT8SpacesOptimizationTest, INT8InnerProductTest) {
 TEST_P(INT8SpacesOptimizationTest, INT8CosineTest) {
     auto optimization = cpu_features::GetX86Info().features;
     size_t dim = GetParam();
-    int8_t *v1 = new int8_t[dim + sizeof(float)];
-    int8_t *v2 = new int8_t[dim + sizeof(float)];
+    int8_t v1[dim + sizeof(float)];
+    int8_t v2[dim + sizeof(float)];
     test_utils::populate_int8_vec(v1, dim, 123);
     test_utils::populate_int8_vec(v2, dim, 1234);
 

From 4a9bb69d166b6c72e4bc74a854ca5e0281cb5047 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Wed, 18 Dec 2024 16:05:45 +0200
Subject: [PATCH 31/33] fix float comparison in test_serialization

avoid evaluating statement in typeid to avoid clang warnig
---
 tests/unit/test_hnsw_tiered.cpp | 3 ++-
 tests/unit/test_int8.cpp        | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/unit/test_hnsw_tiered.cpp b/tests/unit/test_hnsw_tiered.cpp
index 676424fd7..deb81c6aa 100644
--- a/tests/unit/test_hnsw_tiered.cpp
+++ b/tests/unit/test_hnsw_tiered.cpp
@@ -163,7 +163,8 @@ TYPED_TEST(HNSWTieredIndexTest, testIndexesAttributes) {
         dynamic_cast<MultiPreprocessorsContainer<TEST_DATA_T, 1> *>(bf_preprocessors)
             ->getPreprocessors();
     const std::type_info &bf_pp_expected_type = typeid(CosinePreprocessor<TEST_DATA_T>);
-    const std::type_info &bf_pp_actual_type = typeid(*pp_arr[0]);
+    PreprocessorInterface *bf_pp = pp_arr[0];
+    const std::type_info &bf_pp_actual_type = typeid(*bf_pp);
     ASSERT_EQ(bf_pp_actual_type, bf_pp_expected_type);
 
     // hnsw - simple
diff --git a/tests/unit/test_int8.cpp b/tests/unit/test_int8.cpp
index cbbd370bc..e298232ba 100644
--- a/tests/unit/test_int8.cpp
+++ b/tests/unit/test_int8.cpp
@@ -929,7 +929,7 @@ void INT8HNSWTest::test_serialization(bool is_multi) {
     VecSimIndex_AddVector(serialized_index, new_vec, n);
     auto verify_res = [&](size_t id, double score, size_t index) {
         ASSERT_EQ(id, n) << "score: " << score;
-        ASSERT_EQ(score, 0);
+        ASSERT_NEAR(score, 0.0, 1e-7);
     };
     runTopKSearchTest(serialized_index, new_vec, 1, verify_res);
     VecSimIndex_DeleteVector(serialized_index, 1);

From 9796d59c06e46dbbd1e425b449b3c6ca33b2aeeb Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Thu, 19 Dec 2024 13:00:38 +0000
Subject: [PATCH 32/33] renae CalcIndexDataSize -> CalcVectorDataSize

move components tests from test_common to test_components
---
 tests/unit/CMakeLists.txt      |   5 +-
 tests/unit/test_common.cpp     | 578 +-------------------------------
 tests/unit/test_components.cpp | 587 +++++++++++++++++++++++++++++++++
 tests/unit/unit_test_utils.cpp |   2 +-
 tests/unit/unit_test_utils.h   |   2 +-
 5 files changed, 594 insertions(+), 580 deletions(-)
 create mode 100644 tests/unit/test_components.cpp

diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt
index 62f864f3f..5a4cc5108 100644
--- a/tests/unit/CMakeLists.txt
+++ b/tests/unit/CMakeLists.txt
@@ -36,7 +36,8 @@ add_executable(test_bruteforce test_bruteforce.cpp test_bruteforce_multi.cpp ../
 add_executable(test_allocator test_allocator.cpp ../utils/mock_thread_pool.cpp unit_test_utils.cpp)
 add_executable(test_spaces test_spaces.cpp)
 add_executable(test_types test_types.cpp)
-add_executable(test_common ../utils/mock_thread_pool.cpp test_common.cpp unit_test_utils.cpp )
+add_executable(test_common ../utils/mock_thread_pool.cpp test_common.cpp unit_test_utils.cpp)
+add_executable(test_components test_components.cpp ../utils/mock_thread_pool.cpp unit_test_utils.cpp)
 add_executable(test_bf16 ../utils/mock_thread_pool.cpp test_bf16.cpp unit_test_utils.cpp)
 add_executable(test_fp16 ../utils/mock_thread_pool.cpp test_fp16.cpp unit_test_utils.cpp)
 add_executable(test_int8 ../utils/mock_thread_pool.cpp test_int8.cpp unit_test_utils.cpp)
@@ -47,6 +48,7 @@ target_link_libraries(test_bruteforce PUBLIC gtest_main VectorSimilarity)
 target_link_libraries(test_allocator PUBLIC gtest_main VectorSimilarity)
 target_link_libraries(test_spaces PUBLIC gtest_main VectorSimilarity)
 target_link_libraries(test_common PUBLIC gtest_main VectorSimilarity)
+target_link_libraries(test_components PUBLIC gtest_main VectorSimilarity)
 target_link_libraries(test_types PUBLIC gtest_main VectorSimilarity)
 target_link_libraries(test_bf16 PUBLIC gtest_main VectorSimilarity)
 target_link_libraries(test_fp16 PUBLIC gtest_main VectorSimilarity)
@@ -60,6 +62,7 @@ gtest_discover_tests(test_bruteforce)
 gtest_discover_tests(test_allocator)
 gtest_discover_tests(test_spaces)
 gtest_discover_tests(test_common)
+gtest_discover_tests(test_components)
 gtest_discover_tests(test_types)
 gtest_discover_tests(test_bf16 TEST_PREFIX BF16UNIT_)
 gtest_discover_tests(test_fp16 TEST_PREFIX FP16UNIT_)
diff --git a/tests/unit/test_common.cpp b/tests/unit/test_common.cpp
index 2a9d71725..8bb3baf16 100644
--- a/tests/unit/test_common.cpp
+++ b/tests/unit/test_common.cpp
@@ -665,7 +665,7 @@ void CommonTypeMetricTests::test_datasize() {
     VecSimMetric metric = std::get<1>(GetParam());
     algo_params params = {.dim = dim, .metric = metric};
     this->index = test_utils::CreateNewIndex(params, type);
-    size_t actual = test_utils::CalcIndexDataSize(index, type);
+    size_t actual = test_utils::CalcVectorDataSize(index, type);
     size_t expected = dim * VecSimType_sizeof(type);
     if (type == VecSimType_INT8 && metric == VecSimMetric_Cosine) {
         expected += sizeof(float);
@@ -799,579 +799,3 @@ INSTANTIATE_TEST_SUITE_P(
         std::string test_name(type);
         return test_name + "_" + metric;
     });
-
-class IndexCalculatorTest : public ::testing::Test {};
-namespace dummyCalcultor {
-
-using DummyType = int;
-using dummy_dist_func_t = DummyType (*)(int);
-
-int dummyDistFunc(int value) { return value; }
-
-template <typename DistType>
-class DistanceCalculatorDummy : public DistanceCalculatorInterface<DistType, dummy_dist_func_t> {
-public:
-    DistanceCalculatorDummy(std::shared_ptr<VecSimAllocator> allocator, dummy_dist_func_t dist_func)
-        : DistanceCalculatorInterface<DistType, dummy_dist_func_t>(allocator, dist_func) {}
-
-    virtual DistType calcDistance(const void *v1, const void *v2, size_t dim) const {
-        return this->dist_func(7);
-    }
-};
-
-} // namespace dummyCalcultor
-
-TEST(IndexCalculatorTest, TestIndexCalculator) {
-
-    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
-
-    // Test computer with a distance function signature different from dim(v1, v2, dim()).
-    using namespace dummyCalcultor;
-    auto distance_calculator = DistanceCalculatorDummy<DummyType>(allocator, dummyDistFunc);
-
-    ASSERT_EQ(distance_calculator.calcDistance(nullptr, nullptr, 0), 7);
-}
-
-class PreprocessorsTest : public ::testing::Test {};
-
-namespace dummyPreprocessors {
-
-using DummyType = int;
-
-enum pp_mode { STORAGE_ONLY, QUERY_ONLY, BOTH, EMPTY };
-
-// Dummy storage preprocessor
-template <typename DataType>
-class DummyStoragePreprocessor : public PreprocessorInterface {
-public:
-    DummyStoragePreprocessor(std::shared_ptr<VecSimAllocator> allocator, int value_to_add_storage,
-                             int value_to_add_query = 0)
-        : PreprocessorInterface(allocator), value_to_add_storage(value_to_add_storage),
-          value_to_add_query(value_to_add_query) {
-        if (!value_to_add_query)
-            value_to_add_query = value_to_add_storage;
-    }
-
-    void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
-                    size_t processed_bytes_count, unsigned char alignment) const override {
-
-        this->preprocessForStorage(original_blob, storage_blob, processed_bytes_count);
-    }
-
-    void preprocessForStorage(const void *original_blob, void *&blob,
-                              size_t processed_bytes_count) const override {
-        // If the blob was not allocated yet, allocate it.
-        if (blob == nullptr) {
-            blob = this->allocator->allocate(processed_bytes_count);
-            memcpy(blob, original_blob, processed_bytes_count);
-        }
-        static_cast<DataType *>(blob)[0] += value_to_add_storage;
-    }
-    void preprocessQueryInPlace(void *blob, size_t processed_bytes_count,
-                                unsigned char alignment) const override {}
-    void preprocessQuery(const void *original_blob, void *&blob, size_t processed_bytes_count,
-                         unsigned char alignment) const override {
-        /* do nothing*/
-    }
-
-private:
-    int value_to_add_storage;
-    int value_to_add_query;
-};
-
-// Dummy query preprocessor
-template <typename DataType>
-class DummyQueryPreprocessor : public PreprocessorInterface {
-public:
-    DummyQueryPreprocessor(std::shared_ptr<VecSimAllocator> allocator, int value_to_add_storage,
-                           int _value_to_add_query = 0)
-        : PreprocessorInterface(allocator), value_to_add_storage(value_to_add_storage),
-          value_to_add_query(_value_to_add_query) {
-        if (!_value_to_add_query)
-            value_to_add_query = value_to_add_storage;
-    }
-
-    void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
-                    size_t processed_bytes_count, unsigned char alignment) const override {
-        this->preprocessQuery(original_blob, query_blob, processed_bytes_count, alignment);
-    }
-
-    void preprocessForStorage(const void *original_blob, void *&blob,
-                              size_t processed_bytes_count) const override {
-        /* do nothing*/
-    }
-    void preprocessQueryInPlace(void *blob, size_t processed_bytes_count,
-                                unsigned char alignment) const override {
-        static_cast<DataType *>(blob)[0] += value_to_add_query;
-    }
-    void preprocessQuery(const void *original_blob, void *&blob, size_t processed_bytes_count,
-                         unsigned char alignment) const override {
-        // If the blob was not allocated yet, allocate it.
-        if (blob == nullptr) {
-            blob = this->allocator->allocate_aligned(processed_bytes_count, alignment);
-            memcpy(blob, original_blob, processed_bytes_count);
-        }
-        static_cast<DataType *>(blob)[0] += value_to_add_query;
-    }
-
-private:
-    int value_to_add_storage;
-    int value_to_add_query;
-};
-
-// Dummy mixed preprocessor (precesses the blobs  differently)
-template <typename DataType>
-class DummyMixedPreprocessor : public PreprocessorInterface {
-public:
-    DummyMixedPreprocessor(std::shared_ptr<VecSimAllocator> allocator, int value_to_add_storage,
-                           int value_to_add_query)
-        : PreprocessorInterface(allocator), value_to_add_storage(value_to_add_storage),
-          value_to_add_query(value_to_add_query) {}
-    void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
-                    size_t processed_bytes_count, unsigned char alignment) const override {
-
-        // One blob was already allocated by a previous preprocessor(s) that process both blobs the
-        // same. The blobs are pointing to the same memory, we need to allocate another memory slot
-        // to split them.
-        if ((storage_blob == query_blob) && (query_blob != nullptr)) {
-            storage_blob = this->allocator->allocate(processed_bytes_count);
-            memcpy(storage_blob, query_blob, processed_bytes_count);
-        }
-
-        // Either both are nullptr or they are pointing to different memory slots. Both cases are
-        // handled by the designated functions.
-        this->preprocessForStorage(original_blob, storage_blob, processed_bytes_count);
-        this->preprocessQuery(original_blob, query_blob, processed_bytes_count, alignment);
-    }
-
-    void preprocessForStorage(const void *original_blob, void *&blob,
-                              size_t processed_bytes_count) const override {
-        // If the blob was not allocated yet, allocate it.
-        if (blob == nullptr) {
-            blob = this->allocator->allocate(processed_bytes_count);
-            memcpy(blob, original_blob, processed_bytes_count);
-        }
-        static_cast<DataType *>(blob)[0] += value_to_add_storage;
-    }
-    void preprocessQueryInPlace(void *blob, size_t processed_bytes_count,
-                                unsigned char alignment) const override {}
-    void preprocessQuery(const void *original_blob, void *&blob, size_t processed_bytes_count,
-                         unsigned char alignment) const override {
-        // If the blob was not allocated yet, allocate it.
-        if (blob == nullptr) {
-            blob = this->allocator->allocate_aligned(processed_bytes_count, alignment);
-            memcpy(blob, original_blob, processed_bytes_count);
-        }
-        static_cast<DataType *>(blob)[0] += value_to_add_query;
-    }
-
-private:
-    int value_to_add_storage;
-    int value_to_add_query;
-};
-} // namespace dummyPreprocessors
-
-TEST(PreprocessorsTest, PreprocessorsTestBasicAlignmentTest) {
-    using namespace dummyPreprocessors;
-    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
-
-    unsigned char alignment = 5;
-    auto preprocessor = PreprocessorsContainerAbstract(allocator, alignment);
-    const int original_blob[4] = {1, 1, 1, 1};
-    size_t processed_bytes_count = sizeof(original_blob);
-
-    {
-        auto aligned_query = preprocessor.preprocessQuery(original_blob, processed_bytes_count);
-        unsigned char address_alignment = (uintptr_t)(aligned_query.get()) % alignment;
-        ASSERT_EQ(address_alignment, 0);
-    }
-
-    // The index computer is responsible for releasing the distance calculator.
-}
-
-template <unsigned char alignment>
-void MultiPPContainerEmpty() {
-    using namespace dummyPreprocessors;
-    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
-    constexpr size_t dim = 4;
-    const int original_blob[dim] = {1, 2, 3, 4};
-    const int original_blob_cpy[dim] = {1, 2, 3, 4};
-
-    constexpr size_t n_preprocessors = 3;
-
-    auto multiPPContainer =
-        MultiPreprocessorsContainer<DummyType, n_preprocessors>(allocator, alignment);
-
-    {
-        ProcessedBlobs processed_blobs =
-            multiPPContainer.preprocess(original_blob, sizeof(original_blob));
-        // Original blob should not be changed
-        CompareVectors(original_blob, original_blob_cpy, dim);
-
-        const void *storage_blob = processed_blobs.getStorageBlob();
-        const void *query_blob = processed_blobs.getQueryBlob();
-
-        // Storage blob should not be reallocated or changed
-        ASSERT_EQ(storage_blob, (const int *)original_blob);
-        CompareVectors(original_blob, (const int *)storage_blob, dim);
-
-        // query blob *values* should not be changed
-        CompareVectors(original_blob, (const int *)query_blob, dim);
-
-        // If alignment is set the query blob address should be aligned to the specified alignment.
-        if constexpr (alignment) {
-            unsigned char address_alignment = (uintptr_t)(query_blob) % alignment;
-            ASSERT_EQ(address_alignment, 0);
-        }
-    }
-}
-
-TEST(PreprocessorsTest, MultiPPContainerEmptyNoAlignment) {
-    using namespace dummyPreprocessors;
-    MultiPPContainerEmpty<0>();
-}
-
-TEST(PreprocessorsTest, MultiPPContainerEmptyAlignment) {
-    using namespace dummyPreprocessors;
-    MultiPPContainerEmpty<5>();
-}
-
-template <typename PreprocessorType>
-void MultiPreprocessorsContainerNoAlignment(dummyPreprocessors::pp_mode MODE) {
-    using namespace dummyPreprocessors;
-    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
-
-    constexpr size_t n_preprocessors = 2;
-    unsigned char alignment = 0;
-    int initial_value = 1;
-    int value_to_add = 7;
-    const int original_blob[4] = {initial_value, initial_value, initial_value, initial_value};
-    size_t processed_bytes_count = sizeof(original_blob);
-
-    // Test computer with multiple preprocessors of the same type.
-    auto multiPPContainer =
-        MultiPreprocessorsContainer<DummyType, n_preprocessors>(allocator, alignment);
-
-    auto verify_preprocess = [&](int expected_processed_value) {
-        ProcessedBlobs processed_blobs =
-            multiPPContainer.preprocess(original_blob, processed_bytes_count);
-        // Original blob should not be changed
-        ASSERT_EQ(original_blob[0], initial_value);
-
-        const void *storage_blob = processed_blobs.getStorageBlob();
-        const void *query_blob = processed_blobs.getQueryBlob();
-        if (MODE == STORAGE_ONLY) {
-            // New storage blob should be allocated
-            ASSERT_NE(storage_blob, original_blob);
-            // query blob should be unprocessed
-            ASSERT_EQ(query_blob, original_blob);
-            ASSERT_EQ(((const int *)storage_blob)[0], expected_processed_value);
-        } else if (MODE == QUERY_ONLY) {
-            // New query blob should be allocated
-            ASSERT_NE(query_blob, original_blob);
-            // Storage blob should be unprocessed
-            ASSERT_EQ(storage_blob, original_blob);
-            ASSERT_EQ(((const int *)query_blob)[0], expected_processed_value);
-        }
-    };
-
-    /* ==== Add the first preprocessor ==== */
-    auto preprocessor0 = new (allocator) PreprocessorType(allocator, value_to_add);
-    // add preprocessor returns next free spot in its preprocessors array.
-    ASSERT_EQ(multiPPContainer.addPreprocessor(preprocessor0), 1);
-    verify_preprocess(initial_value + value_to_add);
-
-    /* ==== Add the second preprocessor ==== */
-    auto preprocessor1 = new (allocator) PreprocessorType(allocator, value_to_add);
-    // add preprocessor returns 0 when adding the last preprocessor.
-    ASSERT_EQ(multiPPContainer.addPreprocessor(preprocessor1), 0);
-    ASSERT_NO_FATAL_FAILURE(verify_preprocess(initial_value + 2 * value_to_add));
-}
-
-TEST(PreprocessorsTest, MultiPreprocessorsContainerStorageNoAlignment) {
-    using namespace dummyPreprocessors;
-    MultiPreprocessorsContainerNoAlignment<DummyStoragePreprocessor<DummyType>>(
-        pp_mode::STORAGE_ONLY);
-}
-
-TEST(PreprocessorsTest, MultiPreprocessorsContainerQueryNoAlignment) {
-    using namespace dummyPreprocessors;
-    MultiPreprocessorsContainerNoAlignment<DummyQueryPreprocessor<DummyType>>(pp_mode::QUERY_ONLY);
-}
-
-template <typename FirstPreprocessorType, typename SecondPreprocessorType>
-void multiPPContainerMixedPreprocessorNoAlignment() {
-    using namespace dummyPreprocessors;
-    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
-
-    constexpr size_t n_preprocessors = 3;
-    unsigned char alignment = 0;
-    int initial_value = 1;
-    int value_to_add_storage = 7;
-    int value_to_add_query = 2;
-    const int original_blob[4] = {initial_value, initial_value, initial_value, initial_value};
-    size_t processed_bytes_count = sizeof(original_blob);
-
-    // Test multiple preprocessors of the same type.
-    auto multiPPContainer =
-        MultiPreprocessorsContainer<DummyType, n_preprocessors>(allocator, alignment);
-
-    /* ==== Add one preprocessor of each type ==== */
-    auto preprocessor0 =
-        new (allocator) FirstPreprocessorType(allocator, value_to_add_storage, value_to_add_query);
-    ASSERT_EQ(multiPPContainer.addPreprocessor(preprocessor0), 1);
-    auto preprocessor1 =
-        new (allocator) SecondPreprocessorType(allocator, value_to_add_storage, value_to_add_query);
-    ASSERT_EQ(multiPPContainer.addPreprocessor(preprocessor1), 2);
-
-    // scope this section so the blobs are released before the allocator.
-    {
-        ProcessedBlobs processed_blobs =
-            multiPPContainer.preprocess(original_blob, processed_bytes_count);
-        // Original blob should not be changed
-        ASSERT_EQ(original_blob[0], initial_value);
-
-        // Both blobs should be allocated
-        const void *storage_blob = processed_blobs.getStorageBlob();
-        const void *query_blob = processed_blobs.getQueryBlob();
-
-        // Ensure the computer process returns a new allocation of the expected processed blob with
-        // the new value.
-        ASSERT_NE(storage_blob, original_blob);
-        ASSERT_NE(query_blob, original_blob);
-        ASSERT_NE(query_blob, storage_blob);
-
-        ASSERT_EQ(((const int *)storage_blob)[0], initial_value + value_to_add_storage);
-        ASSERT_EQ(((const int *)query_blob)[0], initial_value + value_to_add_query);
-    }
-
-    /* ==== Add a preprocessor that processes both storage and query ==== */
-    auto preprocessor2 = new (allocator)
-        DummyMixedPreprocessor<DummyType>(allocator, value_to_add_storage, value_to_add_query);
-    // add preprocessor returns 0 when adding the last preprocessor.
-    ASSERT_EQ(multiPPContainer.addPreprocessor(preprocessor2), 0);
-    {
-        ProcessedBlobs mixed_processed_blobs =
-            multiPPContainer.preprocess(original_blob, processed_bytes_count);
-
-        const void *mixed_pp_storage_blob = mixed_processed_blobs.getStorageBlob();
-        const void *mixed_pp_query_blob = mixed_processed_blobs.getQueryBlob();
-
-        // Ensure the computer process both blobs.
-        ASSERT_EQ(((const int *)mixed_pp_storage_blob)[0],
-                  initial_value + 2 * value_to_add_storage);
-        ASSERT_EQ(((const int *)mixed_pp_query_blob)[0], initial_value + 2 * value_to_add_query);
-    }
-
-    // try adding another preprocessor and fail.
-    ASSERT_EQ(multiPPContainer.addPreprocessor(preprocessor2), -1);
-}
-
-TEST(PreprocessorsTest, multiPPContainerMixedPreprocessorQueryFirst) {
-    using namespace dummyPreprocessors;
-    multiPPContainerMixedPreprocessorNoAlignment<DummyQueryPreprocessor<DummyType>,
-                                                 DummyStoragePreprocessor<DummyType>>();
-}
-
-TEST(PreprocessorsTest, multiPPContainerMixedPreprocessorStorageFirst) {
-    using namespace dummyPreprocessors;
-    multiPPContainerMixedPreprocessorNoAlignment<DummyStoragePreprocessor<DummyType>,
-                                                 DummyQueryPreprocessor<DummyType>>();
-}
-
-template <typename PreprocessorType>
-void multiPPContainerAlignment(dummyPreprocessors::pp_mode MODE) {
-    using namespace dummyPreprocessors;
-    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
-
-    unsigned char alignment = 5;
-    constexpr size_t n_preprocessors = 1;
-    int initial_value = 1;
-    int value_to_add = 7;
-    const int original_blob[4] = {initial_value, initial_value, initial_value, initial_value};
-    size_t processed_bytes_count = sizeof(original_blob);
-
-    auto multiPPContainer =
-        MultiPreprocessorsContainer<DummyType, n_preprocessors>(allocator, alignment);
-
-    auto verify_preprocess = [&](int expected_processed_value) {
-        ProcessedBlobs processed_blobs =
-            multiPPContainer.preprocess(original_blob, processed_bytes_count);
-
-        const void *storage_blob = processed_blobs.getStorageBlob();
-        const void *query_blob = processed_blobs.getQueryBlob();
-        if (MODE == STORAGE_ONLY) {
-            // New storage blob should be allocated and processed
-            ASSERT_NE(storage_blob, original_blob);
-            ASSERT_EQ(((const int *)storage_blob)[0], expected_processed_value);
-            // query blob *values* should be unprocessed, however, it might be allocated if the
-            // original blob is not aligned.
-            ASSERT_EQ(((const int *)query_blob)[0], original_blob[0]);
-        } else if (MODE == QUERY_ONLY) {
-            // New query blob should be allocated
-            ASSERT_NE(query_blob, original_blob);
-            // Storage blob should be unprocessed and not allocated.
-            ASSERT_EQ(storage_blob, original_blob);
-            ASSERT_EQ(((const int *)query_blob)[0], expected_processed_value);
-        }
-
-        // anyway the query blob should be aligned
-        unsigned char address_alignment = (uintptr_t)(query_blob) % alignment;
-        ASSERT_EQ(address_alignment, 0);
-    };
-
-    auto preprocessor0 = new (allocator) PreprocessorType(allocator, value_to_add);
-    // add preprocessor returns next free spot in its preprocessors array.
-    ASSERT_EQ(multiPPContainer.addPreprocessor(preprocessor0), 0);
-    verify_preprocess(initial_value + value_to_add);
-}
-
-TEST(PreprocessorsTest, StoragePreprocessorWithAlignment) {
-    using namespace dummyPreprocessors;
-    multiPPContainerAlignment<DummyStoragePreprocessor<DummyType>>(pp_mode::STORAGE_ONLY);
-}
-
-TEST(PreprocessorsTest, QueryPreprocessorWithAlignment) {
-    using namespace dummyPreprocessors;
-    multiPPContainerAlignment<DummyQueryPreprocessor<DummyType>>(pp_mode::QUERY_ONLY);
-}
-
-TEST(PreprocessorsTest, multiPPContainerCosineThenMixedPreprocess) {
-    using namespace dummyPreprocessors;
-    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
-
-    constexpr size_t n_preprocessors = 2;
-    constexpr size_t dim = 4;
-    unsigned char alignment = 5;
-
-    float initial_value = 1.0f;
-    float normalized_value = 0.5f;
-    float value_to_add_storage = 7.0f;
-    float value_to_add_query = 2.0f;
-    const float original_blob[dim] = {initial_value, initial_value, initial_value, initial_value};
-
-    auto multiPPContainer =
-        MultiPreprocessorsContainer<DummyType, n_preprocessors>(allocator, alignment);
-
-    // adding cosine preprocessor
-    auto cosine_preprocessor = new (allocator) CosinePreprocessor<float>(allocator, dim);
-    multiPPContainer.addPreprocessor(cosine_preprocessor);
-    {
-        ProcessedBlobs processed_blobs =
-            multiPPContainer.preprocess(original_blob, sizeof(original_blob));
-        const void *storage_blob = processed_blobs.getStorageBlob();
-        const void *query_blob = processed_blobs.getQueryBlob();
-        // blobs should point to the same memory slot
-        ASSERT_EQ(storage_blob, query_blob);
-        // memory should be aligned
-        unsigned char address_alignment = (uintptr_t)(storage_blob) % alignment;
-        ASSERT_EQ(address_alignment, 0);
-        // They need to be allocated and processed
-        ASSERT_NE(storage_blob, nullptr);
-        ASSERT_EQ(((const float *)storage_blob)[0], normalized_value);
-        // the original blob should not change
-        ASSERT_NE(storage_blob, original_blob);
-    }
-    // adding mixed preprocessor
-    auto mixed_preprocessor = new (allocator)
-        DummyMixedPreprocessor<float>(allocator, value_to_add_storage, value_to_add_query);
-    multiPPContainer.addPreprocessor(mixed_preprocessor);
-    {
-        ProcessedBlobs processed_blobs =
-            multiPPContainer.preprocess(original_blob, sizeof(original_blob));
-        const void *storage_blob = processed_blobs.getStorageBlob();
-        const void *query_blob = processed_blobs.getQueryBlob();
-        // blobs should point to a different memory slot
-        ASSERT_NE(storage_blob, query_blob);
-        ASSERT_NE(storage_blob, nullptr);
-        ASSERT_NE(query_blob, nullptr);
-
-        // query blob should be aligned
-        unsigned char address_alignment = (uintptr_t)(query_blob) % alignment;
-        ASSERT_EQ(address_alignment, 0);
-
-        // They need to be processed by both processors.
-        ASSERT_EQ(((const float *)storage_blob)[0], normalized_value + value_to_add_storage);
-        ASSERT_EQ(((const float *)query_blob)[0], normalized_value + value_to_add_query);
-
-        // the original blob should not change
-        ASSERT_NE(storage_blob, original_blob);
-        ASSERT_NE(query_blob, original_blob);
-    }
-    // The preprocessors should be released by the preprocessors container.
-}
-
-TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) {
-    using namespace dummyPreprocessors;
-    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
-
-    constexpr size_t n_preprocessors = 2;
-    constexpr size_t dim = 4;
-    unsigned char alignment = 5;
-
-    float initial_value = 1.0f;
-    float normalized_value = 0.5f;
-    float value_to_add_storage = 7.0f;
-    float value_to_add_query = 2.0f;
-    const float original_blob[dim] = {initial_value, initial_value, initial_value, initial_value};
-
-    // Creating multi preprocessors container
-    auto mixed_preprocessor = new (allocator)
-        DummyMixedPreprocessor<float>(allocator, value_to_add_storage, value_to_add_query);
-    auto multiPPContainer =
-        MultiPreprocessorsContainer<DummyType, n_preprocessors>(allocator, alignment);
-    multiPPContainer.addPreprocessor(mixed_preprocessor);
-
-    {
-        ProcessedBlobs processed_blobs =
-            multiPPContainer.preprocess(original_blob, sizeof(original_blob));
-        const void *storage_blob = processed_blobs.getStorageBlob();
-        const void *query_blob = processed_blobs.getQueryBlob();
-        // blobs should point to a different memory slot
-        ASSERT_NE(storage_blob, query_blob);
-        ASSERT_NE(storage_blob, nullptr);
-        ASSERT_NE(query_blob, nullptr);
-
-        // query blob should be aligned
-        unsigned char address_alignment = (uintptr_t)(query_blob) % alignment;
-        ASSERT_EQ(address_alignment, 0);
-
-        // They need to be processed by both processors.
-        ASSERT_EQ(((const float *)storage_blob)[0], initial_value + value_to_add_storage);
-        ASSERT_EQ(((const float *)query_blob)[0], initial_value + value_to_add_query);
-
-        // the original blob should not change
-        ASSERT_NE(storage_blob, original_blob);
-        ASSERT_NE(query_blob, original_blob);
-    }
-
-    // adding cosine preprocessor
-    auto cosine_preprocessor = new (allocator) CosinePreprocessor<float>(allocator, dim);
-    multiPPContainer.addPreprocessor(cosine_preprocessor);
-    {
-        ProcessedBlobs processed_blobs =
-            multiPPContainer.preprocess(original_blob, sizeof(original_blob));
-        const void *storage_blob = processed_blobs.getStorageBlob();
-        const void *query_blob = processed_blobs.getQueryBlob();
-        // blobs should point to a different memory slot
-        ASSERT_NE(storage_blob, query_blob);
-        // query memory should be aligned
-        unsigned char address_alignment = (uintptr_t)(query_blob) % alignment;
-        ASSERT_EQ(address_alignment, 0);
-        // They need to be allocated and processed
-        ASSERT_NE(storage_blob, nullptr);
-        ASSERT_NE(query_blob, nullptr);
-        float expected_processed_storage[dim] = {initial_value + value_to_add_storage,
-                                                 initial_value, initial_value, initial_value};
-        float expected_processed_query[dim] = {initial_value + value_to_add_query, initial_value,
-                                               initial_value, initial_value};
-        VecSim_Normalize(expected_processed_storage, dim, VecSimType_FLOAT32);
-        VecSim_Normalize(expected_processed_query, dim, VecSimType_FLOAT32);
-        ASSERT_EQ(((const float *)storage_blob)[0], expected_processed_storage[0]);
-        ASSERT_EQ(((const float *)query_blob)[0], expected_processed_query[0]);
-        // the original blob should not change
-        ASSERT_NE(storage_blob, original_blob);
-        ASSERT_NE(query_blob, original_blob);
-    }
-    // The preprocessors should be released by the preprocessors container.
-}
diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp
new file mode 100644
index 000000000..af49b12a8
--- /dev/null
+++ b/tests/unit/test_components.cpp
@@ -0,0 +1,587 @@
+/*
+ *Copyright Redis Ltd. 2021 - present
+ *Licensed under your choice of the Redis Source Available License 2.0 (RSALv2) or
+ *the Server Side Public License v1 (SSPLv1).
+ */
+
+#include "gtest/gtest.h"
+#include "VecSim/vec_sim.h"
+#include "VecSim/spaces/computer/preprocessor_container.h"
+#include "VecSim/spaces/computer/calculator.h"
+#include "unit_test_utils.h"
+
+class IndexCalculatorTest : public ::testing::Test {};
+namespace dummyCalcultor {
+
+using DummyType = int;
+using dummy_dist_func_t = DummyType (*)(int);
+
+int dummyDistFunc(int value) { return value; }
+
+template <typename DistType>
+class DistanceCalculatorDummy : public DistanceCalculatorInterface<DistType, dummy_dist_func_t> {
+public:
+    DistanceCalculatorDummy(std::shared_ptr<VecSimAllocator> allocator, dummy_dist_func_t dist_func)
+        : DistanceCalculatorInterface<DistType, dummy_dist_func_t>(allocator, dist_func) {}
+
+    virtual DistType calcDistance(const void *v1, const void *v2, size_t dim) const {
+        return this->dist_func(7);
+    }
+};
+
+} // namespace dummyCalcultor
+
+TEST(IndexCalculatorTest, TestIndexCalculator) {
+
+    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+
+    // Test computer with a distance function signature different from dim(v1, v2, dim()).
+    using namespace dummyCalcultor;
+    auto distance_calculator = DistanceCalculatorDummy<DummyType>(allocator, dummyDistFunc);
+
+    ASSERT_EQ(distance_calculator.calcDistance(nullptr, nullptr, 0), 7);
+}
+
+class PreprocessorsTest : public ::testing::Test {};
+
+namespace dummyPreprocessors {
+
+using DummyType = int;
+
+enum pp_mode { STORAGE_ONLY, QUERY_ONLY, BOTH, EMPTY };
+
+// Dummy storage preprocessor
+template <typename DataType>
+class DummyStoragePreprocessor : public PreprocessorInterface {
+public:
+    DummyStoragePreprocessor(std::shared_ptr<VecSimAllocator> allocator, int value_to_add_storage,
+                             int value_to_add_query = 0)
+        : PreprocessorInterface(allocator), value_to_add_storage(value_to_add_storage),
+          value_to_add_query(value_to_add_query) {
+        if (!value_to_add_query)
+            value_to_add_query = value_to_add_storage;
+    }
+
+    void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
+                    size_t processed_bytes_count, unsigned char alignment) const override {
+
+        this->preprocessForStorage(original_blob, storage_blob, processed_bytes_count);
+    }
+
+    void preprocessForStorage(const void *original_blob, void *&blob,
+                              size_t processed_bytes_count) const override {
+        // If the blob was not allocated yet, allocate it.
+        if (blob == nullptr) {
+            blob = this->allocator->allocate(processed_bytes_count);
+            memcpy(blob, original_blob, processed_bytes_count);
+        }
+        static_cast<DataType *>(blob)[0] += value_to_add_storage;
+    }
+    void preprocessQueryInPlace(void *blob, size_t processed_bytes_count,
+                                unsigned char alignment) const override {}
+    void preprocessQuery(const void *original_blob, void *&blob, size_t processed_bytes_count,
+                         unsigned char alignment) const override {
+        /* do nothing*/
+    }
+
+private:
+    int value_to_add_storage;
+    int value_to_add_query;
+};
+
+// Dummy query preprocessor
+template <typename DataType>
+class DummyQueryPreprocessor : public PreprocessorInterface {
+public:
+    DummyQueryPreprocessor(std::shared_ptr<VecSimAllocator> allocator, int value_to_add_storage,
+                           int _value_to_add_query = 0)
+        : PreprocessorInterface(allocator), value_to_add_storage(value_to_add_storage),
+          value_to_add_query(_value_to_add_query) {
+        if (!_value_to_add_query)
+            value_to_add_query = value_to_add_storage;
+    }
+
+    void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
+                    size_t processed_bytes_count, unsigned char alignment) const override {
+        this->preprocessQuery(original_blob, query_blob, processed_bytes_count, alignment);
+    }
+
+    void preprocessForStorage(const void *original_blob, void *&blob,
+                              size_t processed_bytes_count) const override {
+        /* do nothing*/
+    }
+    void preprocessQueryInPlace(void *blob, size_t processed_bytes_count,
+                                unsigned char alignment) const override {
+        static_cast<DataType *>(blob)[0] += value_to_add_query;
+    }
+    void preprocessQuery(const void *original_blob, void *&blob, size_t processed_bytes_count,
+                         unsigned char alignment) const override {
+        // If the blob was not allocated yet, allocate it.
+        if (blob == nullptr) {
+            blob = this->allocator->allocate_aligned(processed_bytes_count, alignment);
+            memcpy(blob, original_blob, processed_bytes_count);
+        }
+        static_cast<DataType *>(blob)[0] += value_to_add_query;
+    }
+
+private:
+    int value_to_add_storage;
+    int value_to_add_query;
+};
+
+// Dummy mixed preprocessor (precesses the blobs  differently)
+template <typename DataType>
+class DummyMixedPreprocessor : public PreprocessorInterface {
+public:
+    DummyMixedPreprocessor(std::shared_ptr<VecSimAllocator> allocator, int value_to_add_storage,
+                           int value_to_add_query)
+        : PreprocessorInterface(allocator), value_to_add_storage(value_to_add_storage),
+          value_to_add_query(value_to_add_query) {}
+    void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
+                    size_t processed_bytes_count, unsigned char alignment) const override {
+
+        // One blob was already allocated by a previous preprocessor(s) that process both blobs the
+        // same. The blobs are pointing to the same memory, we need to allocate another memory slot
+        // to split them.
+        if ((storage_blob == query_blob) && (query_blob != nullptr)) {
+            storage_blob = this->allocator->allocate(processed_bytes_count);
+            memcpy(storage_blob, query_blob, processed_bytes_count);
+        }
+
+        // Either both are nullptr or they are pointing to different memory slots. Both cases are
+        // handled by the designated functions.
+        this->preprocessForStorage(original_blob, storage_blob, processed_bytes_count);
+        this->preprocessQuery(original_blob, query_blob, processed_bytes_count, alignment);
+    }
+
+    void preprocessForStorage(const void *original_blob, void *&blob,
+                              size_t processed_bytes_count) const override {
+        // If the blob was not allocated yet, allocate it.
+        if (blob == nullptr) {
+            blob = this->allocator->allocate(processed_bytes_count);
+            memcpy(blob, original_blob, processed_bytes_count);
+        }
+        static_cast<DataType *>(blob)[0] += value_to_add_storage;
+    }
+    void preprocessQueryInPlace(void *blob, size_t processed_bytes_count,
+                                unsigned char alignment) const override {}
+    void preprocessQuery(const void *original_blob, void *&blob, size_t processed_bytes_count,
+                         unsigned char alignment) const override {
+        // If the blob was not allocated yet, allocate it.
+        if (blob == nullptr) {
+            blob = this->allocator->allocate_aligned(processed_bytes_count, alignment);
+            memcpy(blob, original_blob, processed_bytes_count);
+        }
+        static_cast<DataType *>(blob)[0] += value_to_add_query;
+    }
+
+private:
+    int value_to_add_storage;
+    int value_to_add_query;
+};
+} // namespace dummyPreprocessors
+
+TEST(PreprocessorsTest, PreprocessorsTestBasicAlignmentTest) {
+    using namespace dummyPreprocessors;
+    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+
+    unsigned char alignment = 5;
+    auto preprocessor = PreprocessorsContainerAbstract(allocator, alignment);
+    const int original_blob[4] = {1, 1, 1, 1};
+    size_t processed_bytes_count = sizeof(original_blob);
+
+    {
+        auto aligned_query = preprocessor.preprocessQuery(original_blob, processed_bytes_count);
+        unsigned char address_alignment = (uintptr_t)(aligned_query.get()) % alignment;
+        ASSERT_EQ(address_alignment, 0);
+    }
+
+    // The index computer is responsible for releasing the distance calculator.
+}
+
+template <unsigned char alignment>
+void MultiPPContainerEmpty() {
+    using namespace dummyPreprocessors;
+    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+    constexpr size_t dim = 4;
+    const int original_blob[dim] = {1, 2, 3, 4};
+    const int original_blob_cpy[dim] = {1, 2, 3, 4};
+
+    constexpr size_t n_preprocessors = 3;
+
+    auto multiPPContainer =
+        MultiPreprocessorsContainer<DummyType, n_preprocessors>(allocator, alignment);
+
+    {
+        ProcessedBlobs processed_blobs =
+            multiPPContainer.preprocess(original_blob, sizeof(original_blob));
+        // Original blob should not be changed
+        CompareVectors(original_blob, original_blob_cpy, dim);
+
+        const void *storage_blob = processed_blobs.getStorageBlob();
+        const void *query_blob = processed_blobs.getQueryBlob();
+
+        // Storage blob should not be reallocated or changed
+        ASSERT_EQ(storage_blob, (const int *)original_blob);
+        CompareVectors(original_blob, (const int *)storage_blob, dim);
+
+        // query blob *values* should not be changed
+        CompareVectors(original_blob, (const int *)query_blob, dim);
+
+        // If alignment is set the query blob address should be aligned to the specified alignment.
+        if constexpr (alignment) {
+            unsigned char address_alignment = (uintptr_t)(query_blob) % alignment;
+            ASSERT_EQ(address_alignment, 0);
+        }
+    }
+}
+
+TEST(PreprocessorsTest, MultiPPContainerEmptyNoAlignment) {
+    using namespace dummyPreprocessors;
+    MultiPPContainerEmpty<0>();
+}
+
+TEST(PreprocessorsTest, MultiPPContainerEmptyAlignment) {
+    using namespace dummyPreprocessors;
+    MultiPPContainerEmpty<5>();
+}
+
+template <typename PreprocessorType>
+void MultiPreprocessorsContainerNoAlignment(dummyPreprocessors::pp_mode MODE) {
+    using namespace dummyPreprocessors;
+    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+
+    constexpr size_t n_preprocessors = 2;
+    unsigned char alignment = 0;
+    int initial_value = 1;
+    int value_to_add = 7;
+    const int original_blob[4] = {initial_value, initial_value, initial_value, initial_value};
+    size_t processed_bytes_count = sizeof(original_blob);
+
+    // Test computer with multiple preprocessors of the same type.
+    auto multiPPContainer =
+        MultiPreprocessorsContainer<DummyType, n_preprocessors>(allocator, alignment);
+
+    auto verify_preprocess = [&](int expected_processed_value) {
+        ProcessedBlobs processed_blobs =
+            multiPPContainer.preprocess(original_blob, processed_bytes_count);
+        // Original blob should not be changed
+        ASSERT_EQ(original_blob[0], initial_value);
+
+        const void *storage_blob = processed_blobs.getStorageBlob();
+        const void *query_blob = processed_blobs.getQueryBlob();
+        if (MODE == STORAGE_ONLY) {
+            // New storage blob should be allocated
+            ASSERT_NE(storage_blob, original_blob);
+            // query blob should be unprocessed
+            ASSERT_EQ(query_blob, original_blob);
+            ASSERT_EQ(((const int *)storage_blob)[0], expected_processed_value);
+        } else if (MODE == QUERY_ONLY) {
+            // New query blob should be allocated
+            ASSERT_NE(query_blob, original_blob);
+            // Storage blob should be unprocessed
+            ASSERT_EQ(storage_blob, original_blob);
+            ASSERT_EQ(((const int *)query_blob)[0], expected_processed_value);
+        }
+    };
+
+    /* ==== Add the first preprocessor ==== */
+    auto preprocessor0 = new (allocator) PreprocessorType(allocator, value_to_add);
+    // add preprocessor returns next free spot in its preprocessors array.
+    ASSERT_EQ(multiPPContainer.addPreprocessor(preprocessor0), 1);
+    verify_preprocess(initial_value + value_to_add);
+
+    /* ==== Add the second preprocessor ==== */
+    auto preprocessor1 = new (allocator) PreprocessorType(allocator, value_to_add);
+    // add preprocessor returns 0 when adding the last preprocessor.
+    ASSERT_EQ(multiPPContainer.addPreprocessor(preprocessor1), 0);
+    ASSERT_NO_FATAL_FAILURE(verify_preprocess(initial_value + 2 * value_to_add));
+}
+
+TEST(PreprocessorsTest, MultiPreprocessorsContainerStorageNoAlignment) {
+    using namespace dummyPreprocessors;
+    MultiPreprocessorsContainerNoAlignment<DummyStoragePreprocessor<DummyType>>(
+        pp_mode::STORAGE_ONLY);
+}
+
+TEST(PreprocessorsTest, MultiPreprocessorsContainerQueryNoAlignment) {
+    using namespace dummyPreprocessors;
+    MultiPreprocessorsContainerNoAlignment<DummyQueryPreprocessor<DummyType>>(pp_mode::QUERY_ONLY);
+}
+
+template <typename FirstPreprocessorType, typename SecondPreprocessorType>
+void multiPPContainerMixedPreprocessorNoAlignment() {
+    using namespace dummyPreprocessors;
+    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+
+    constexpr size_t n_preprocessors = 3;
+    unsigned char alignment = 0;
+    int initial_value = 1;
+    int value_to_add_storage = 7;
+    int value_to_add_query = 2;
+    const int original_blob[4] = {initial_value, initial_value, initial_value, initial_value};
+    size_t processed_bytes_count = sizeof(original_blob);
+
+    // Test multiple preprocessors of the same type.
+    auto multiPPContainer =
+        MultiPreprocessorsContainer<DummyType, n_preprocessors>(allocator, alignment);
+
+    /* ==== Add one preprocessor of each type ==== */
+    auto preprocessor0 =
+        new (allocator) FirstPreprocessorType(allocator, value_to_add_storage, value_to_add_query);
+    ASSERT_EQ(multiPPContainer.addPreprocessor(preprocessor0), 1);
+    auto preprocessor1 =
+        new (allocator) SecondPreprocessorType(allocator, value_to_add_storage, value_to_add_query);
+    ASSERT_EQ(multiPPContainer.addPreprocessor(preprocessor1), 2);
+
+    // scope this section so the blobs are released before the allocator.
+    {
+        ProcessedBlobs processed_blobs =
+            multiPPContainer.preprocess(original_blob, processed_bytes_count);
+        // Original blob should not be changed
+        ASSERT_EQ(original_blob[0], initial_value);
+
+        // Both blobs should be allocated
+        const void *storage_blob = processed_blobs.getStorageBlob();
+        const void *query_blob = processed_blobs.getQueryBlob();
+
+        // Ensure the computer process returns a new allocation of the expected processed blob with
+        // the new value.
+        ASSERT_NE(storage_blob, original_blob);
+        ASSERT_NE(query_blob, original_blob);
+        ASSERT_NE(query_blob, storage_blob);
+
+        ASSERT_EQ(((const int *)storage_blob)[0], initial_value + value_to_add_storage);
+        ASSERT_EQ(((const int *)query_blob)[0], initial_value + value_to_add_query);
+    }
+
+    /* ==== Add a preprocessor that processes both storage and query ==== */
+    auto preprocessor2 = new (allocator)
+        DummyMixedPreprocessor<DummyType>(allocator, value_to_add_storage, value_to_add_query);
+    // add preprocessor returns 0 when adding the last preprocessor.
+    ASSERT_EQ(multiPPContainer.addPreprocessor(preprocessor2), 0);
+    {
+        ProcessedBlobs mixed_processed_blobs =
+            multiPPContainer.preprocess(original_blob, processed_bytes_count);
+
+        const void *mixed_pp_storage_blob = mixed_processed_blobs.getStorageBlob();
+        const void *mixed_pp_query_blob = mixed_processed_blobs.getQueryBlob();
+
+        // Ensure the computer process both blobs.
+        ASSERT_EQ(((const int *)mixed_pp_storage_blob)[0],
+                  initial_value + 2 * value_to_add_storage);
+        ASSERT_EQ(((const int *)mixed_pp_query_blob)[0], initial_value + 2 * value_to_add_query);
+    }
+
+    // try adding another preprocessor and fail.
+    ASSERT_EQ(multiPPContainer.addPreprocessor(preprocessor2), -1);
+}
+
+TEST(PreprocessorsTest, multiPPContainerMixedPreprocessorQueryFirst) {
+    using namespace dummyPreprocessors;
+    multiPPContainerMixedPreprocessorNoAlignment<DummyQueryPreprocessor<DummyType>,
+                                                 DummyStoragePreprocessor<DummyType>>();
+}
+
+TEST(PreprocessorsTest, multiPPContainerMixedPreprocessorStorageFirst) {
+    using namespace dummyPreprocessors;
+    multiPPContainerMixedPreprocessorNoAlignment<DummyStoragePreprocessor<DummyType>,
+                                                 DummyQueryPreprocessor<DummyType>>();
+}
+
+template <typename PreprocessorType>
+void multiPPContainerAlignment(dummyPreprocessors::pp_mode MODE) {
+    using namespace dummyPreprocessors;
+    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+
+    unsigned char alignment = 5;
+    constexpr size_t n_preprocessors = 1;
+    int initial_value = 1;
+    int value_to_add = 7;
+    const int original_blob[4] = {initial_value, initial_value, initial_value, initial_value};
+    size_t processed_bytes_count = sizeof(original_blob);
+
+    auto multiPPContainer =
+        MultiPreprocessorsContainer<DummyType, n_preprocessors>(allocator, alignment);
+
+    auto verify_preprocess = [&](int expected_processed_value) {
+        ProcessedBlobs processed_blobs =
+            multiPPContainer.preprocess(original_blob, processed_bytes_count);
+
+        const void *storage_blob = processed_blobs.getStorageBlob();
+        const void *query_blob = processed_blobs.getQueryBlob();
+        if (MODE == STORAGE_ONLY) {
+            // New storage blob should be allocated and processed
+            ASSERT_NE(storage_blob, original_blob);
+            ASSERT_EQ(((const int *)storage_blob)[0], expected_processed_value);
+            // query blob *values* should be unprocessed, however, it might be allocated if the
+            // original blob is not aligned.
+            ASSERT_EQ(((const int *)query_blob)[0], original_blob[0]);
+        } else if (MODE == QUERY_ONLY) {
+            // New query blob should be allocated
+            ASSERT_NE(query_blob, original_blob);
+            // Storage blob should be unprocessed and not allocated.
+            ASSERT_EQ(storage_blob, original_blob);
+            ASSERT_EQ(((const int *)query_blob)[0], expected_processed_value);
+        }
+
+        // anyway the query blob should be aligned
+        unsigned char address_alignment = (uintptr_t)(query_blob) % alignment;
+        ASSERT_EQ(address_alignment, 0);
+    };
+
+    auto preprocessor0 = new (allocator) PreprocessorType(allocator, value_to_add);
+    // add preprocessor returns next free spot in its preprocessors array.
+    ASSERT_EQ(multiPPContainer.addPreprocessor(preprocessor0), 0);
+    verify_preprocess(initial_value + value_to_add);
+}
+
+TEST(PreprocessorsTest, StoragePreprocessorWithAlignment) {
+    using namespace dummyPreprocessors;
+    multiPPContainerAlignment<DummyStoragePreprocessor<DummyType>>(pp_mode::STORAGE_ONLY);
+}
+
+TEST(PreprocessorsTest, QueryPreprocessorWithAlignment) {
+    using namespace dummyPreprocessors;
+    multiPPContainerAlignment<DummyQueryPreprocessor<DummyType>>(pp_mode::QUERY_ONLY);
+}
+
+TEST(PreprocessorsTest, multiPPContainerCosineThenMixedPreprocess) {
+    using namespace dummyPreprocessors;
+    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+
+    constexpr size_t n_preprocessors = 2;
+    constexpr size_t dim = 4;
+    unsigned char alignment = 5;
+
+    float initial_value = 1.0f;
+    float normalized_value = 0.5f;
+    float value_to_add_storage = 7.0f;
+    float value_to_add_query = 2.0f;
+    const float original_blob[dim] = {initial_value, initial_value, initial_value, initial_value};
+
+    auto multiPPContainer =
+        MultiPreprocessorsContainer<DummyType, n_preprocessors>(allocator, alignment);
+
+    // adding cosine preprocessor
+    auto cosine_preprocessor = new (allocator) CosinePreprocessor<float>(allocator, dim);
+    multiPPContainer.addPreprocessor(cosine_preprocessor);
+    {
+        ProcessedBlobs processed_blobs =
+            multiPPContainer.preprocess(original_blob, sizeof(original_blob));
+        const void *storage_blob = processed_blobs.getStorageBlob();
+        const void *query_blob = processed_blobs.getQueryBlob();
+        // blobs should point to the same memory slot
+        ASSERT_EQ(storage_blob, query_blob);
+        // memory should be aligned
+        unsigned char address_alignment = (uintptr_t)(storage_blob) % alignment;
+        ASSERT_EQ(address_alignment, 0);
+        // They need to be allocated and processed
+        ASSERT_NE(storage_blob, nullptr);
+        ASSERT_EQ(((const float *)storage_blob)[0], normalized_value);
+        // the original blob should not change
+        ASSERT_NE(storage_blob, original_blob);
+    }
+    // adding mixed preprocessor
+    auto mixed_preprocessor = new (allocator)
+        DummyMixedPreprocessor<float>(allocator, value_to_add_storage, value_to_add_query);
+    multiPPContainer.addPreprocessor(mixed_preprocessor);
+    {
+        ProcessedBlobs processed_blobs =
+            multiPPContainer.preprocess(original_blob, sizeof(original_blob));
+        const void *storage_blob = processed_blobs.getStorageBlob();
+        const void *query_blob = processed_blobs.getQueryBlob();
+        // blobs should point to a different memory slot
+        ASSERT_NE(storage_blob, query_blob);
+        ASSERT_NE(storage_blob, nullptr);
+        ASSERT_NE(query_blob, nullptr);
+
+        // query blob should be aligned
+        unsigned char address_alignment = (uintptr_t)(query_blob) % alignment;
+        ASSERT_EQ(address_alignment, 0);
+
+        // They need to be processed by both processors.
+        ASSERT_EQ(((const float *)storage_blob)[0], normalized_value + value_to_add_storage);
+        ASSERT_EQ(((const float *)query_blob)[0], normalized_value + value_to_add_query);
+
+        // the original blob should not change
+        ASSERT_NE(storage_blob, original_blob);
+        ASSERT_NE(query_blob, original_blob);
+    }
+    // The preprocessors should be released by the preprocessors container.
+}
+
+TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) {
+    using namespace dummyPreprocessors;
+    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+
+    constexpr size_t n_preprocessors = 2;
+    constexpr size_t dim = 4;
+    unsigned char alignment = 5;
+
+    float initial_value = 1.0f;
+    float normalized_value = 0.5f;
+    float value_to_add_storage = 7.0f;
+    float value_to_add_query = 2.0f;
+    const float original_blob[dim] = {initial_value, initial_value, initial_value, initial_value};
+
+    // Creating multi preprocessors container
+    auto mixed_preprocessor = new (allocator)
+        DummyMixedPreprocessor<float>(allocator, value_to_add_storage, value_to_add_query);
+    auto multiPPContainer =
+        MultiPreprocessorsContainer<DummyType, n_preprocessors>(allocator, alignment);
+    multiPPContainer.addPreprocessor(mixed_preprocessor);
+
+    {
+        ProcessedBlobs processed_blobs =
+            multiPPContainer.preprocess(original_blob, sizeof(original_blob));
+        const void *storage_blob = processed_blobs.getStorageBlob();
+        const void *query_blob = processed_blobs.getQueryBlob();
+        // blobs should point to a different memory slot
+        ASSERT_NE(storage_blob, query_blob);
+        ASSERT_NE(storage_blob, nullptr);
+        ASSERT_NE(query_blob, nullptr);
+
+        // query blob should be aligned
+        unsigned char address_alignment = (uintptr_t)(query_blob) % alignment;
+        ASSERT_EQ(address_alignment, 0);
+
+        // They need to be processed by both processors.
+        ASSERT_EQ(((const float *)storage_blob)[0], initial_value + value_to_add_storage);
+        ASSERT_EQ(((const float *)query_blob)[0], initial_value + value_to_add_query);
+
+        // the original blob should not change
+        ASSERT_NE(storage_blob, original_blob);
+        ASSERT_NE(query_blob, original_blob);
+    }
+
+    // adding cosine preprocessor
+    auto cosine_preprocessor = new (allocator) CosinePreprocessor<float>(allocator, dim);
+    multiPPContainer.addPreprocessor(cosine_preprocessor);
+    {
+        ProcessedBlobs processed_blobs =
+            multiPPContainer.preprocess(original_blob, sizeof(original_blob));
+        const void *storage_blob = processed_blobs.getStorageBlob();
+        const void *query_blob = processed_blobs.getQueryBlob();
+        // blobs should point to a different memory slot
+        ASSERT_NE(storage_blob, query_blob);
+        // query memory should be aligned
+        unsigned char address_alignment = (uintptr_t)(query_blob) % alignment;
+        ASSERT_EQ(address_alignment, 0);
+        // They need to be allocated and processed
+        ASSERT_NE(storage_blob, nullptr);
+        ASSERT_NE(query_blob, nullptr);
+        float expected_processed_storage[dim] = {initial_value + value_to_add_storage,
+                                                 initial_value, initial_value, initial_value};
+        float expected_processed_query[dim] = {initial_value + value_to_add_query, initial_value,
+                                               initial_value, initial_value};
+        VecSim_Normalize(expected_processed_storage, dim, VecSimType_FLOAT32);
+        VecSim_Normalize(expected_processed_query, dim, VecSimType_FLOAT32);
+        ASSERT_EQ(((const float *)storage_blob)[0], expected_processed_storage[0]);
+        ASSERT_EQ(((const float *)query_blob)[0], expected_processed_query[0]);
+        // the original blob should not change
+        ASSERT_NE(storage_blob, original_blob);
+        ASSERT_NE(query_blob, original_blob);
+    }
+    // The preprocessors should be released by the preprocessors container.
+}
diff --git a/tests/unit/unit_test_utils.cpp b/tests/unit/unit_test_utils.cpp
index 91525caa6..41d8dbcb5 100644
--- a/tests/unit/unit_test_utils.cpp
+++ b/tests/unit/unit_test_utils.cpp
@@ -378,7 +378,7 @@ size_t getLabelsLookupNodeSize() {
     return memory_after - memory_before;
 }
 namespace test_utils {
-size_t CalcIndexDataSize(VecSimIndex *index, VecSimType data_type) {
+size_t CalcVectorDataSize(VecSimIndex *index, VecSimType data_type) {
     switch (data_type) {
     case VecSimType_FLOAT32: {
         VecSimIndexAbstract<float, float> *abs_index =
diff --git a/tests/unit/unit_test_utils.h b/tests/unit/unit_test_utils.h
index 4e653a469..cafde7552 100644
--- a/tests/unit/unit_test_utils.h
+++ b/tests/unit/unit_test_utils.h
@@ -170,7 +170,7 @@ inline double GetInfVal(VecSimType type) {
 }
 // TODO: Move all test_utils to this namespace
 namespace test_utils {
-size_t CalcIndexDataSize(VecSimIndex *index, VecSimType data_type);
+size_t CalcVectorDataSize(VecSimIndex *index, VecSimType data_type);
 
 template <typename data_t, typename dist_t>
 TieredHNSWIndex<data_t, dist_t> *cast_to_tiered_index(VecSimIndex *index) {

From 21520ad055433f3aea1254ad4ea96ea02eb8ea07 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Thu, 19 Dec 2024 13:02:59 +0000
Subject: [PATCH 33/33] add comment to INSTANTIATE_TEST_SUITE_P

---
 tests/unit/test_common.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/unit/test_common.cpp b/tests/unit/test_common.cpp
index 8bb3baf16..e0ccd8d4c 100644
--- a/tests/unit/test_common.cpp
+++ b/tests/unit/test_common.cpp
@@ -778,6 +778,7 @@ constexpr VecSimType vecsim_datatypes[] = {VecSimType_FLOAT32, VecSimType_FLOAT6
                                            VecSimType_BFLOAT16, VecSimType_FLOAT16,
                                            VecSimType_INT8};
 
+/** Run all CommonTypeMetricTests tests for each {VecSimType, VecSimMetric} combination */
 INSTANTIATE_TEST_SUITE_P(CommonTest, CommonTypeMetricTests,
                          testing::Combine(testing::ValuesIn(vecsim_datatypes),
                                           testing::Values(VecSimMetric_L2, VecSimMetric_IP,
@@ -789,6 +790,7 @@ INSTANTIATE_TEST_SUITE_P(CommonTest, CommonTypeMetricTests,
                              return test_name + "_" + metric;
                          });
 
+/** Run all CommonTypeMetricTieredTests tests for each {VecSimType, VecSimMetric} combination */
 INSTANTIATE_TEST_SUITE_P(
     CommonTieredTest, CommonTypeMetricTieredTests,
     testing::Combine(testing::ValuesIn(vecsim_datatypes),