diff --git a/.github/workflows/flow-temp.yml b/.github/workflows/flow-temp.yml
index fa35dd541..b6b0de1ca 100644
--- a/.github/workflows/flow-temp.yml
+++ b/.github/workflows/flow-temp.yml
@@ -11,11 +11,11 @@ on:
   push:
     branches-ignore: ['**'] # ignore all branches. Comment this line to run your workflow below on every push.
 jobs:
-  jammy:
-    uses: ./.github/workflows/task-unit-test.yml
-    with:
-      container: ubuntu:jammy
-      run-valgrind: true
+  # jammy:
+  #   uses: ./.github/workflows/task-unit-test.yml
+  #   with:
+  #     container: ubuntu:jammy
+  #     run-valgrind: true
   # alpine3:
   #   uses: ./.github/workflows/task-unit-test.yml
   #   with:
@@ -32,11 +32,11 @@ jobs:
   #   with:
   #     container: ubuntu:focal
   #     run-valgrind: false
-  # bullseye:
-  #   uses: ./.github/workflows/task-unit-test.yml
-  #   with:
-  #     container: debian:bullseye
-  #     run-valgrind: false
+  bullseye:
+    uses: ./.github/workflows/task-unit-test.yml
+    with:
+      container: debian:bullseye
+      run-valgrind: false
   # amazonlinux2:
   #   uses: ./.github/workflows/task-unit-test.yml
   #   with:
diff --git a/cmake/x86_64InstructionFlags.cmake b/cmake/x86_64InstructionFlags.cmake
index 1fedda7fe..1ff8f48f2 100644
--- a/cmake/x86_64InstructionFlags.cmake
+++ b/cmake/x86_64InstructionFlags.cmake
@@ -13,6 +13,7 @@ if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "(x86_64)|(AMD64|amd64)|(^i.86$)")
 	CHECK_CXX_COMPILER_FLAG(-mavx512vbmi2 CXX_AVX512VBMI2)
 	CHECK_CXX_COMPILER_FLAG(-mavx512fp16 CXX_AVX512FP16)
 	CHECK_CXX_COMPILER_FLAG(-mavx512f CXX_AVX512F)
+	CHECK_CXX_COMPILER_FLAG(-mavx512vnni CXX_AVX512VNNI)
 	CHECK_CXX_COMPILER_FLAG(-mavx2 CXX_AVX2)
 	CHECK_CXX_COMPILER_FLAG(-mavx CXX_AVX)
 	CHECK_CXX_COMPILER_FLAG(-mf16c CXX_F16C)
@@ -48,6 +49,10 @@ if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "(x86_64)|(AMD64|amd64)|(^i.86$)")
 		add_compile_definitions(OPT_AVX512_BW_VBMI2)
 	endif()
 
+	if(CXX_AVX512F AND CXX_AVX512BW AND CXX_AVX512VL AND CXX_AVX512VNNI)
+		add_compile_definitions(OPT_AVX512_F_BW_VL_VNNI)
+	endif()
+
 	if(CXX_F16C AND CXX_FMA AND CXX_AVX)
 		add_compile_definitions(OPT_F16C)
 	endif()
diff --git a/src/VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h b/src/VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h
index b4cec5fef..6a37fe48a 100644
--- a/src/VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h
+++ b/src/VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h
@@ -57,6 +57,8 @@ INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTestBasic_switchDeleteModes_Test)
 
 friend class BF16TieredTest;
 friend class FP16TieredTest;
+friend class INT8TieredTest;
+friend class CommonTypeMetricTieredTests_TestDataSizeTieredHNSW_Test;
 
 INDEX_TEST_FRIEND_CLASS(BM_VecSimBasics)
 INDEX_TEST_FRIEND_CLASS(BM_VecSimCommon)
diff --git a/src/VecSim/index_factories/brute_force_factory.cpp b/src/VecSim/index_factories/brute_force_factory.cpp
index e2919d75f..a7afb9c88 100644
--- a/src/VecSim/index_factories/brute_force_factory.cpp
+++ b/src/VecSim/index_factories/brute_force_factory.cpp
@@ -33,10 +33,12 @@ inline VecSimIndex *NewIndex_ChooseMultiOrSingle(const BFParams *params,
 static AbstractIndexInitParams NewAbstractInitParams(const VecSimParams *params) {
 
     const BFParams *bfParams = &params->algoParams.bfParams;
+    size_t dataSize = VecSimParams_GetDataSize(bfParams->type, bfParams->dim, bfParams->metric);
     AbstractIndexInitParams abstractInitParams = {.allocator =
                                                       VecSimAllocator::newVecsimAllocator(),
                                                   .dim = bfParams->dim,
                                                   .vecType = bfParams->type,
+                                                  .dataSize = dataSize,
                                                   .metric = bfParams->metric,
                                                   .blockSize = bfParams->blockSize,
                                                   .multi = bfParams->multi,
@@ -52,32 +54,30 @@ VecSimIndex *NewIndex(const VecSimParams *params, bool is_normalized) {
 
 VecSimIndex *NewIndex(const BFParams *bfparams, const AbstractIndexInitParams &abstractInitParams,
                       bool is_normalized) {
-    // If the index metric is Cosine, and is_normalized == true, we will skip normalizing vectors
-    // and query blobs.
-    VecSimMetric metric;
-    if (is_normalized && bfparams->metric == VecSimMetric_Cosine) {
-        metric = VecSimMetric_IP;
-    } else {
-        metric = bfparams->metric;
-    }
+
     if (bfparams->type == VecSimType_FLOAT32) {
         IndexComponents<float, float> indexComponents = CreateIndexComponents<float, float>(
-            abstractInitParams.allocator, metric, bfparams->dim);
+            abstractInitParams.allocator, bfparams->metric, bfparams->dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<float>(bfparams, abstractInitParams, indexComponents);
     } else if (bfparams->type == VecSimType_FLOAT64) {
         IndexComponents<double, double> indexComponents = CreateIndexComponents<double, double>(
-            abstractInitParams.allocator, metric, bfparams->dim);
+            abstractInitParams.allocator, bfparams->metric, bfparams->dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<double>(bfparams, abstractInitParams, indexComponents);
     } else if (bfparams->type == VecSimType_BFLOAT16) {
         IndexComponents<bfloat16, float> indexComponents = CreateIndexComponents<bfloat16, float>(
-            abstractInitParams.allocator, metric, bfparams->dim);
+            abstractInitParams.allocator, bfparams->metric, bfparams->dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<bfloat16, float>(bfparams, abstractInitParams,
                                                              indexComponents);
     } else if (bfparams->type == VecSimType_FLOAT16) {
         IndexComponents<float16, float> indexComponents = CreateIndexComponents<float16, float>(
-            abstractInitParams.allocator, metric, bfparams->dim);
+            abstractInitParams.allocator, bfparams->metric, bfparams->dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<float16, float>(bfparams, abstractInitParams,
                                                             indexComponents);
+    } else if (bfparams->type == VecSimType_INT8) {
+        IndexComponents<int8_t, float> indexComponents = CreateIndexComponents<int8_t, float>(
+            abstractInitParams.allocator, bfparams->metric, bfparams->dim, is_normalized);
+        return NewIndex_ChooseMultiOrSingle<int8_t, float>(bfparams, abstractInitParams,
+                                                           indexComponents);
     }
 
     // If we got here something is wrong.
@@ -117,6 +117,11 @@ size_t EstimateInitialSize(const BFParams *params, bool is_normalized) {
     } else if (params->type == VecSimType_FLOAT16) {
         est += EstimateComponentsMemory<float16, float>(params->metric, is_normalized);
         est += EstimateInitialSize_ChooseMultiOrSingle<float16, float>(params->multi);
+    } else if (params->type == VecSimType_INT8) {
+        est += EstimateComponentsMemory<int8_t, float>(params->metric, is_normalized);
+        est += EstimateInitialSize_ChooseMultiOrSingle<int8_t, float>(params->multi);
+    } else {
+        throw std::invalid_argument("Invalid params->type");
     }
 
     est += sizeof(DataBlocksContainer) + allocations_overhead;
diff --git a/src/VecSim/index_factories/components/components_factory.h b/src/VecSim/index_factories/components/components_factory.h
index 6f4e984d1..13eb0eb3c 100644
--- a/src/VecSim/index_factories/components/components_factory.h
+++ b/src/VecSim/index_factories/components/components_factory.h
@@ -14,14 +14,24 @@
 
 template <typename DataType, typename DistType>
 IndexComponents<DataType, DistType>
-CreateIndexComponents(std::shared_ptr<VecSimAllocator> allocator, VecSimMetric metric, size_t dim) {
+CreateIndexComponents(std::shared_ptr<VecSimAllocator> allocator, VecSimMetric metric, size_t dim,
+                      bool is_normalized) {
     unsigned char alignment = 0;
     spaces::dist_func_t<DistType> distFunc =
         spaces::GetDistFunc<DataType, DistType>(metric, dim, &alignment);
     // Currently we have only one distance calculator implementation
     auto indexCalculator = new (allocator) DistanceCalculatorCommon<DistType>(allocator, distFunc);
 
-    PreprocessorsContainerParams ppParams = {.metric = metric, .dim = dim, .alignment = alignment};
+    // If the index metric is Cosine, and is_normalized == true, we will skip normalizing vectors
+    // and query blobs.
+    VecSimMetric pp_metric;
+    if (is_normalized && metric == VecSimMetric_Cosine) {
+        pp_metric = VecSimMetric_IP;
+    } else {
+        pp_metric = metric;
+    }
+    PreprocessorsContainerParams ppParams = {
+        .metric = pp_metric, .dim = dim, .alignment = alignment};
     auto preprocessors = CreatePreprocessorsContainer<DataType>(allocator, ppParams);
 
     return {indexCalculator, preprocessors};
diff --git a/src/VecSim/index_factories/hnsw_factory.cpp b/src/VecSim/index_factories/hnsw_factory.cpp
index dbb5843a8..05f1ae1b7 100644
--- a/src/VecSim/index_factories/hnsw_factory.cpp
+++ b/src/VecSim/index_factories/hnsw_factory.cpp
@@ -33,10 +33,14 @@ NewIndex_ChooseMultiOrSingle(const HNSWParams *params,
 
 static AbstractIndexInitParams NewAbstractInitParams(const VecSimParams *params) {
     const HNSWParams *hnswParams = &params->algoParams.hnswParams;
+
+    size_t dataSize =
+        VecSimParams_GetDataSize(hnswParams->type, hnswParams->dim, hnswParams->metric);
     AbstractIndexInitParams abstractInitParams = {.allocator =
                                                       VecSimAllocator::newVecsimAllocator(),
                                                   .dim = hnswParams->dim,
                                                   .vecType = hnswParams->type,
+                                                  .dataSize = dataSize,
                                                   .metric = hnswParams->metric,
                                                   .blockSize = hnswParams->blockSize,
                                                   .multi = hnswParams->multi,
@@ -48,36 +52,32 @@ VecSimIndex *NewIndex(const VecSimParams *params, bool is_normalized) {
     const HNSWParams *hnswParams = &params->algoParams.hnswParams;
     AbstractIndexInitParams abstractInitParams = NewAbstractInitParams(params);
 
-    // If the index metric is Cosine, and is_normalized == true, we will skip normalizing vectors
-    // and query blobs.
-    VecSimMetric metric;
-    if (is_normalized && hnswParams->metric == VecSimMetric_Cosine) {
-        metric = VecSimMetric_IP;
-    } else {
-        metric = hnswParams->metric;
-    }
-
     if (hnswParams->type == VecSimType_FLOAT32) {
         IndexComponents<float, float> indexComponents = CreateIndexComponents<float, float>(
-            abstractInitParams.allocator, metric, hnswParams->dim);
+            abstractInitParams.allocator, hnswParams->metric, hnswParams->dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<float>(hnswParams, abstractInitParams, indexComponents);
 
     } else if (hnswParams->type == VecSimType_FLOAT64) {
         IndexComponents<double, double> indexComponents = CreateIndexComponents<double, double>(
-            abstractInitParams.allocator, metric, hnswParams->dim);
+            abstractInitParams.allocator, hnswParams->metric, hnswParams->dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<double>(hnswParams, abstractInitParams,
                                                     indexComponents);
 
     } else if (hnswParams->type == VecSimType_BFLOAT16) {
         IndexComponents<bfloat16, float> indexComponents = CreateIndexComponents<bfloat16, float>(
-            abstractInitParams.allocator, metric, hnswParams->dim);
+            abstractInitParams.allocator, hnswParams->metric, hnswParams->dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<bfloat16, float>(hnswParams, abstractInitParams,
                                                              indexComponents);
     } else if (hnswParams->type == VecSimType_FLOAT16) {
         IndexComponents<float16, float> indexComponents = CreateIndexComponents<float16, float>(
-            abstractInitParams.allocator, metric, hnswParams->dim);
+            abstractInitParams.allocator, hnswParams->metric, hnswParams->dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<float16, float>(hnswParams, abstractInitParams,
                                                             indexComponents);
+    } else if (hnswParams->type == VecSimType_INT8) {
+        IndexComponents<int8_t, float> indexComponents = CreateIndexComponents<int8_t, float>(
+            abstractInitParams.allocator, hnswParams->metric, hnswParams->dim, is_normalized);
+        return NewIndex_ChooseMultiOrSingle<int8_t, float>(hnswParams, abstractInitParams,
+                                                           indexComponents);
     }
 
     // If we got here something is wrong.
@@ -114,6 +114,11 @@ size_t EstimateInitialSize(const HNSWParams *params, bool is_normalized) {
     } else if (params->type == VecSimType_FLOAT16) {
         est += EstimateComponentsMemory<float16, float>(params->metric, is_normalized);
         est += EstimateInitialSize_ChooseMultiOrSingle<float16, float>(params->multi);
+    } else if (params->type == VecSimType_INT8) {
+        est += EstimateComponentsMemory<int8_t, float>(params->metric, is_normalized);
+        est += EstimateInitialSize_ChooseMultiOrSingle<int8_t, float>(params->multi);
+    } else {
+        throw std::invalid_argument("Invalid params->type");
     }
     est += sizeof(DataBlocksContainer) + allocations_overhead;
 
@@ -205,34 +210,32 @@ VecSimIndex *NewIndex(const std::string &location, bool is_normalized) {
     VecSimParams vecsimParams = {.algo = VecSimAlgo_HNSWLIB,
                                  .algoParams = {.hnswParams = HNSWParams{params}}};
 
-    VecSimMetric metric;
-    if (is_normalized && params.metric == VecSimMetric_Cosine) {
-        metric = VecSimMetric_IP;
-    } else {
-        metric = params.metric;
-    }
-
     AbstractIndexInitParams abstractInitParams = NewAbstractInitParams(&vecsimParams);
     if (params.type == VecSimType_FLOAT32) {
         IndexComponents<float, float> indexComponents = CreateIndexComponents<float, float>(
-            abstractInitParams.allocator, metric, abstractInitParams.dim);
+            abstractInitParams.allocator, params.metric, abstractInitParams.dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<float>(input, &params, abstractInitParams,
                                                    indexComponents, version);
     } else if (params.type == VecSimType_FLOAT64) {
         IndexComponents<double, double> indexComponents = CreateIndexComponents<double, double>(
-            abstractInitParams.allocator, metric, abstractInitParams.dim);
+            abstractInitParams.allocator, params.metric, abstractInitParams.dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<double>(input, &params, abstractInitParams,
                                                     indexComponents, version);
     } else if (params.type == VecSimType_BFLOAT16) {
         IndexComponents<bfloat16, float> indexComponents = CreateIndexComponents<bfloat16, float>(
-            abstractInitParams.allocator, metric, abstractInitParams.dim);
+            abstractInitParams.allocator, params.metric, abstractInitParams.dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<bfloat16, float>(input, &params, abstractInitParams,
                                                              indexComponents, version);
     } else if (params.type == VecSimType_FLOAT16) {
         IndexComponents<float16, float> indexComponents = CreateIndexComponents<float16, float>(
-            abstractInitParams.allocator, metric, abstractInitParams.dim);
+            abstractInitParams.allocator, params.metric, abstractInitParams.dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<float16, float>(input, &params, abstractInitParams,
                                                             indexComponents, version);
+    } else if (params.type == VecSimType_INT8) {
+        IndexComponents<int8_t, float> indexComponents = CreateIndexComponents<int8_t, float>(
+            abstractInitParams.allocator, params.metric, abstractInitParams.dim, is_normalized);
+        return NewIndex_ChooseMultiOrSingle<int8_t, float>(input, &params, abstractInitParams,
+                                                           indexComponents, version);
     } else {
         auto bad_name = VecSimType_ToString(params.type);
         if (bad_name == nullptr) {
diff --git a/src/VecSim/index_factories/tiered_factory.cpp b/src/VecSim/index_factories/tiered_factory.cpp
index 68635e4ca..930630692 100644
--- a/src/VecSim/index_factories/tiered_factory.cpp
+++ b/src/VecSim/index_factories/tiered_factory.cpp
@@ -42,9 +42,12 @@ inline VecSimIndex *NewIndex(const TieredIndexParams *params) {
     BFParams bf_params = NewBFParams(params);
 
     std::shared_ptr<VecSimAllocator> flat_allocator = VecSimAllocator::newVecsimAllocator();
+    size_t dataSize = VecSimParams_GetDataSize(bf_params.type, bf_params.dim, bf_params.metric);
+
     AbstractIndexInitParams abstractInitParams = {.allocator = flat_allocator,
                                                   .dim = bf_params.dim,
                                                   .vecType = bf_params.type,
+                                                  .dataSize = dataSize,
                                                   .metric = bf_params.metric,
                                                   .blockSize = bf_params.blockSize,
                                                   .multi = bf_params.multi,
@@ -80,6 +83,10 @@ inline size_t EstimateInitialSize(const TieredIndexParams *params) {
         est += sizeof(TieredHNSWIndex<bfloat16, float>);
     } else if (hnsw_params.type == VecSimType_FLOAT16) {
         est += sizeof(TieredHNSWIndex<float16, float>);
+    } else if (hnsw_params.type == VecSimType_INT8) {
+        est += sizeof(TieredHNSWIndex<int8_t, float>);
+    } else {
+        throw std::invalid_argument("Invalid hnsw_params.type");
     }
 
     return est;
@@ -96,6 +103,8 @@ VecSimIndex *NewIndex(const TieredIndexParams *params) {
         return TieredHNSWFactory::NewIndex<bfloat16, float>(params);
     } else if (type == VecSimType_FLOAT16) {
         return TieredHNSWFactory::NewIndex<float16, float>(params);
+    } else if (type == VecSimType_INT8) {
+        return TieredHNSWFactory::NewIndex<int8_t, float>(params);
     }
     return nullptr; // Invalid type.
 }
diff --git a/src/VecSim/spaces/CMakeLists.txt b/src/VecSim/spaces/CMakeLists.txt
index 9cc0baaaf..1fc9473b2 100644
--- a/src/VecSim/spaces/CMakeLists.txt
+++ b/src/VecSim/spaces/CMakeLists.txt
@@ -44,6 +44,12 @@ if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "(x86_64)|(AMD64|amd64)|(^i.86$)")
 		list(APPEND OPTIMIZATIONS functions/AVX512F.cpp)
 	endif()
 
+	if(CXX_AVX512F AND CXX_AVX512BW AND CXX_AVX512VL AND CXX_AVX512VNNI)
+		message("Building with AVX512F, AVX512BW, AVX512VL and AVX512VNNI")
+		set_source_files_properties(functions/AVX512F_BW_VL_VNNI.cpp PROPERTIES COMPILE_FLAGS "-mavx512f -mavx512bw -mavx512vl -mavx512vnni")
+		list(APPEND OPTIMIZATIONS functions/AVX512F_BW_VL_VNNI.cpp)
+	endif()
+
 	if(CXX_AVX2)
 		message("Building with AVX2")
 		set_source_files_properties(functions/AVX2.cpp PROPERTIES COMPILE_FLAGS -mavx2)
diff --git a/src/VecSim/spaces/IP/IP.cpp b/src/VecSim/spaces/IP/IP.cpp
index 98ad07676..0884df3bb 100644
--- a/src/VecSim/spaces/IP/IP.cpp
+++ b/src/VecSim/spaces/IP/IP.cpp
@@ -66,3 +66,27 @@ float FP16_InnerProduct(const void *pVect1, const void *pVect2, size_t dimension
     }
     return 1.0f - res;
 }
+
+static inline int INT8_InnerProductImp(const void *pVect1v, const void *pVect2v, size_t dimension) {
+    int8_t *pVect1 = (int8_t *)pVect1v;
+    int8_t *pVect2 = (int8_t *)pVect2v;
+
+    int res = 0;
+    for (size_t i = 0; i < dimension; i++) {
+        res += pVect1[i] * pVect2[i];
+    }
+    return res;
+}
+
+float INT8_InnerProduct(const void *pVect1v, const void *pVect2v, size_t dimension) {
+    return 1 - INT8_InnerProductImp(pVect1v, pVect2v, dimension);
+}
+
+float INT8_Cosine(const void *pVect1v, const void *pVect2v, size_t dimension) {
+    // We expect the vectors' norm to be stored at the end of the vector.
+    float norm_v1 =
+        *reinterpret_cast<const float *>(static_cast<const int8_t *>(pVect1v) + dimension);
+    float norm_v2 =
+        *reinterpret_cast<const float *>(static_cast<const int8_t *>(pVect2v) + dimension);
+    return 1.0f - float(INT8_InnerProductImp(pVect1v, pVect2v, dimension)) / (norm_v1 * norm_v2);
+}
diff --git a/src/VecSim/spaces/IP/IP.h b/src/VecSim/spaces/IP/IP.h
index 50fecef33..d712499ed 100644
--- a/src/VecSim/spaces/IP/IP.h
+++ b/src/VecSim/spaces/IP/IP.h
@@ -16,3 +16,6 @@ float FP16_InnerProduct(const void *pVect1, const void *pVect2, size_t dimension
 
 float BF16_InnerProduct_LittleEndian(const void *pVect1v, const void *pVect2v, size_t dimension);
 float BF16_InnerProduct_BigEndian(const void *pVect1v, const void *pVect2v, size_t dimension);
+
+float INT8_InnerProduct(const void *pVect1, const void *pVect2, size_t dimension);
+float INT8_Cosine(const void *pVect1, const void *pVect2, size_t dimension);
diff --git a/src/VecSim/spaces/IP/IP_AVX512F_BW_VL_VNNI_INT8.h b/src/VecSim/spaces/IP/IP_AVX512F_BW_VL_VNNI_INT8.h
new file mode 100644
index 000000000..35223d8b9
--- /dev/null
+++ b/src/VecSim/spaces/IP/IP_AVX512F_BW_VL_VNNI_INT8.h
@@ -0,0 +1,77 @@
+/*
+ *Copyright Redis Ltd. 2021 - present
+ *Licensed under your choice of the Redis Source Available License 2.0 (RSALv2) or
+ *the Server Side Public License v1 (SSPLv1).
+ */
+
+#include "VecSim/spaces/space_includes.h"
+
+static inline void InnerProductStep(int8_t *&pVect1, int8_t *&pVect2, __m512i &sum) {
+    __m256i temp_a = _mm256_loadu_epi8(pVect1);
+    __m512i va = _mm512_cvtepi8_epi16(temp_a);
+    pVect1 += 32;
+
+    __m256i temp_b = _mm256_loadu_epi8(pVect2);
+    __m512i vb = _mm512_cvtepi8_epi16(temp_b);
+    pVect2 += 32;
+
+    // _mm512_dpwssd_epi32(src, a, b)
+    // Multiply groups of 2 adjacent pairs of signed 16-bit integers in `a` with corresponding
+    // 16-bit integers in `b`, producing 2 intermediate signed 32-bit results. Sum these 2 results
+    // with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
+    sum = _mm512_dpwssd_epi32(sum, va, vb);
+}
+
+template <unsigned char residual> // 0..64
+static inline int INT8_InnerProductImp(const void *pVect1v, const void *pVect2v, size_t dimension) {
+    int8_t *pVect1 = (int8_t *)pVect1v;
+    int8_t *pVect2 = (int8_t *)pVect2v;
+
+    const int8_t *pEnd1 = pVect1 + dimension;
+
+    __m512i sum = _mm512_setzero_epi32();
+
+    // Deal with remainder first. `dim` is more than 32, so we have at least one 32-int_8 block,
+    // so mask loading is guaranteed to be safe
+    if constexpr (residual % 32) {
+        constexpr __mmask32 mask = (1LU << (residual % 32)) - 1;
+        __m256i temp_a = _mm256_maskz_loadu_epi8(mask, pVect1);
+        __m512i va = _mm512_cvtepi8_epi16(temp_a);
+        pVect1 += residual % 32;
+
+        __m256i temp_b = _mm256_maskz_loadu_epi8(mask, pVect2);
+        __m512i vb = _mm512_cvtepi8_epi16(temp_b);
+        pVect2 += residual % 32;
+
+        sum = _mm512_dpwssd_epi32(sum, va, vb);
+    }
+
+    if constexpr (residual >= 32) {
+        InnerProductStep(pVect1, pVect2, sum);
+    }
+
+    // We dealt with the residual part. We are left with some multiple of 64-int_8.
+    while (pVect1 < pEnd1) {
+        InnerProductStep(pVect1, pVect2, sum);
+        InnerProductStep(pVect1, pVect2, sum);
+    }
+
+    return _mm512_reduce_add_epi32(sum);
+}
+
+template <unsigned char residual> // 0..64
+float INT8_InnerProductSIMD64_AVX512F_BW_VL_VNNI(const void *pVect1v, const void *pVect2v,
+                                                 size_t dimension) {
+
+    return 1 - INT8_InnerProductImp<residual>(pVect1v, pVect2v, dimension);
+}
+template <unsigned char residual> // 0..64
+float INT8_CosineSIMD64_AVX512F_BW_VL_VNNI(const void *pVect1v, const void *pVect2v,
+                                           size_t dimension) {
+    float ip = INT8_InnerProductImp<residual>(pVect1v, pVect2v, dimension);
+    float norm_v1 =
+        *reinterpret_cast<const float *>(static_cast<const int8_t *>(pVect1v) + dimension);
+    float norm_v2 =
+        *reinterpret_cast<const float *>(static_cast<const int8_t *>(pVect2v) + dimension);
+    return 1.0f - ip / (norm_v1 * norm_v2);
+}
diff --git a/src/VecSim/spaces/IP_space.cpp b/src/VecSim/spaces/IP_space.cpp
index e6da26947..e7129b2e8 100644
--- a/src/VecSim/spaces/IP_space.cpp
+++ b/src/VecSim/spaces/IP_space.cpp
@@ -16,6 +16,7 @@
 #include "VecSim/spaces/functions/AVX512BW_VBMI2.h"
 #include "VecSim/spaces/functions/AVX512FP16_VL.h"
 #include "VecSim/spaces/functions/AVX512BF16_VL.h"
+#include "VecSim/spaces/functions/AVX512F_BW_VL_VNNI.h"
 #include "VecSim/spaces/functions/AVX2.h"
 #include "VecSim/spaces/functions/SSE3.h"
 
@@ -196,4 +197,59 @@ dist_func_t<float> IP_FP16_GetDistFunc(size_t dim, unsigned char *alignment, con
     return ret_dist_func;
 }
 
+dist_func_t<float> IP_INT8_GetDistFunc(size_t dim, unsigned char *alignment, const void *arch_opt) {
+    unsigned char dummy_alignment;
+    if (alignment == nullptr) {
+        alignment = &dummy_alignment;
+    }
+
+    dist_func_t<float> ret_dist_func = INT8_InnerProduct;
+    // Optimizations assume at least 32 int8. If we have less, we use the naive implementation.
+    if (dim < 32) {
+        return ret_dist_func;
+    }
+#ifdef CPU_FEATURES_ARCH_X86_64
+    auto features = (arch_opt == nullptr)
+                        ? cpu_features::GetX86Info().features
+                        : *static_cast<const cpu_features::X86Features *>(arch_opt);
+#ifdef OPT_AVX512_F_BW_VL_VNNI
+    if (features.avx512f && features.avx512bw && features.avx512vl && features.avx512vnni) {
+        if (dim % 32 == 0) // no point in aligning if we have an offsetting residual
+            *alignment = 32 * sizeof(int8_t); // align to 256 bits.
+        return Choose_INT8_IP_implementation_AVX512F_BW_VL_VNNI(dim);
+    }
+#endif
+#endif // __x86_64__
+    return ret_dist_func;
+}
+
+dist_func_t<float> Cosine_INT8_GetDistFunc(size_t dim, unsigned char *alignment,
+                                           const void *arch_opt) {
+    unsigned char dummy_alignment;
+    if (alignment == nullptr) {
+        alignment = &dummy_alignment;
+    }
+
+    dist_func_t<float> ret_dist_func = INT8_Cosine;
+    // Optimizations assume at least 32 int8. If we have less, we use the naive implementation.
+    if (dim < 32) {
+        return ret_dist_func;
+    }
+#ifdef CPU_FEATURES_ARCH_X86_64
+    auto features = (arch_opt == nullptr)
+                        ? cpu_features::GetX86Info().features
+                        : *static_cast<const cpu_features::X86Features *>(arch_opt);
+#ifdef OPT_AVX512_F_BW_VL_VNNI
+    if (features.avx512f && features.avx512bw && features.avx512vl && features.avx512vnni) {
+        // For int8 vectors with cosine distance, the extra float for the norm shifts alignment to
+        // `(dim + sizeof(float)) % 32`.
+        // Vectors satisfying this have a residual, causing offset loads during calculation.
+        // To avoid complexity, we skip alignment here, assuming the performance impact is
+        // negligible.
+        return Choose_INT8_Cosine_implementation_AVX512F_BW_VL_VNNI(dim);
+    }
+#endif
+#endif // __x86_64__
+    return ret_dist_func;
+}
 } // namespace spaces
diff --git a/src/VecSim/spaces/IP_space.h b/src/VecSim/spaces/IP_space.h
index a3ab0f4f6..0d8c3a836 100644
--- a/src/VecSim/spaces/IP_space.h
+++ b/src/VecSim/spaces/IP_space.h
@@ -16,4 +16,8 @@ dist_func_t<float> IP_BF16_GetDistFunc(size_t dim, unsigned char *alignment = nu
                                        const void *arch_opt = nullptr);
 dist_func_t<float> IP_FP16_GetDistFunc(size_t dim, unsigned char *alignment = nullptr,
                                        const void *arch_opt = nullptr);
+dist_func_t<float> IP_INT8_GetDistFunc(size_t dim, unsigned char *alignment = nullptr,
+                                       const void *arch_opt = nullptr);
+dist_func_t<float> Cosine_INT8_GetDistFunc(size_t dim, unsigned char *alignment = nullptr,
+                                           const void *arch_opt = nullptr);
 } // namespace spaces
diff --git a/src/VecSim/spaces/L2/L2.cpp b/src/VecSim/spaces/L2/L2.cpp
index 5fba0555e..ef310418b 100644
--- a/src/VecSim/spaces/L2/L2.cpp
+++ b/src/VecSim/spaces/L2/L2.cpp
@@ -70,3 +70,17 @@ float FP16_L2Sqr(const void *pVect1, const void *pVect2, size_t dimension) {
     }
     return res;
 }
+
+float INT8_L2Sqr(const void *pVect1v, const void *pVect2v, size_t dimension) {
+    int8_t *pVect1 = (int8_t *)pVect1v;
+    int8_t *pVect2 = (int8_t *)pVect2v;
+
+    int res = 0;
+    for (size_t i = 0; i < dimension; i++) {
+        int16_t a = pVect1[i];
+        int16_t b = pVect2[i];
+        int16_t diff = a - b;
+        res += diff * diff;
+    }
+    return float(res);
+}
diff --git a/src/VecSim/spaces/L2/L2.h b/src/VecSim/spaces/L2/L2.h
index c367f2ee1..65649d4eb 100644
--- a/src/VecSim/spaces/L2/L2.h
+++ b/src/VecSim/spaces/L2/L2.h
@@ -16,3 +16,5 @@ float BF16_L2Sqr_LittleEndian(const void *pVect1v, const void *pVect2v, size_t d
 float BF16_L2Sqr_BigEndian(const void *pVect1v, const void *pVect2v, size_t dimension);
 
 float FP16_L2Sqr(const void *pVect1, const void *pVect2, size_t dimension);
+
+float INT8_L2Sqr(const void *pVect1v, const void *pVect2v, size_t dimension);
diff --git a/src/VecSim/spaces/L2/L2_AVX512F_BW_VL_VNNI_INT8.h b/src/VecSim/spaces/L2/L2_AVX512F_BW_VL_VNNI_INT8.h
new file mode 100644
index 000000000..2c8b846af
--- /dev/null
+++ b/src/VecSim/spaces/L2/L2_AVX512F_BW_VL_VNNI_INT8.h
@@ -0,0 +1,63 @@
+/*
+ *Copyright Redis Ltd. 2021 - present
+ *Licensed under your choice of the Redis Source Available License 2.0 (RSALv2) or
+ *the Server Side Public License v1 (SSPLv1).
+ */
+
+#include "VecSim/spaces/space_includes.h"
+
+static inline void L2SqrStep(int8_t *&pVect1, int8_t *&pVect2, __m512i &sum) {
+    __m256i temp_a = _mm256_loadu_epi8(pVect1);
+    __m512i va = _mm512_cvtepi8_epi16(temp_a);
+    pVect1 += 32;
+
+    __m256i temp_b = _mm256_loadu_epi8(pVect2);
+    __m512i vb = _mm512_cvtepi8_epi16(temp_b);
+    pVect2 += 32;
+
+    __m512i diff = _mm512_sub_epi16(va, vb);
+    // _mm512_dpwssd_epi32(src, a, b)
+    // Multiply groups of 2 adjacent pairs of signed 16-bit integers in `a` with corresponding
+    // 16-bit integers in `b`, producing 2 intermediate signed 32-bit results. Sum these 2 results
+    // with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
+    sum = _mm512_dpwssd_epi32(sum, diff, diff);
+}
+
+template <unsigned char residual> // 0..64
+float INT8_L2SqrSIMD64_AVX512F_BW_VL_VNNI(const void *pVect1v, const void *pVect2v,
+                                          size_t dimension) {
+    int8_t *pVect1 = (int8_t *)pVect1v;
+    int8_t *pVect2 = (int8_t *)pVect2v;
+
+    const int8_t *pEnd1 = pVect1 + dimension;
+
+    __m512i sum = _mm512_setzero_epi32();
+
+    // Deal with remainder first. `dim` is more than 32, so we have at least one 32-int_8 block,
+    // so mask loading is guaranteed to be safe
+    if constexpr (residual % 32) {
+        constexpr __mmask32 mask = (1LU << (residual % 32)) - 1;
+        __m256i temp_a = _mm256_loadu_epi8(pVect1);
+        __m512i va = _mm512_cvtepi8_epi16(temp_a);
+        pVect1 += residual % 32;
+
+        __m256i temp_b = _mm256_loadu_epi8(pVect2);
+        __m512i vb = _mm512_cvtepi8_epi16(temp_b);
+        pVect2 += residual % 32;
+
+        __m512i diff = _mm512_maskz_sub_epi16(mask, va, vb);
+        sum = _mm512_dpwssd_epi32(sum, diff, diff);
+    }
+
+    if constexpr (residual >= 32) {
+        L2SqrStep(pVect1, pVect2, sum);
+    }
+
+    // We dealt with the residual part. We are left with some multiple of 64-int_8.
+    while (pVect1 < pEnd1) {
+        L2SqrStep(pVect1, pVect2, sum);
+        L2SqrStep(pVect1, pVect2, sum);
+    }
+
+    return _mm512_reduce_add_epi32(sum);
+}
diff --git a/src/VecSim/spaces/L2_space.cpp b/src/VecSim/spaces/L2_space.cpp
index 1c2b2b59f..c0bec428f 100644
--- a/src/VecSim/spaces/L2_space.cpp
+++ b/src/VecSim/spaces/L2_space.cpp
@@ -15,6 +15,7 @@
 #include "VecSim/spaces/functions/SSE.h"
 #include "VecSim/spaces/functions/AVX512BW_VBMI2.h"
 #include "VecSim/spaces/functions/AVX512FP16_VL.h"
+#include "VecSim/spaces/functions/AVX512F_BW_VL_VNNI.h"
 #include "VecSim/spaces/functions/AVX2.h"
 #include "VecSim/spaces/functions/SSE3.h"
 
@@ -189,4 +190,30 @@ dist_func_t<float> L2_FP16_GetDistFunc(size_t dim, unsigned char *alignment, con
     return ret_dist_func;
 }
 
+dist_func_t<float> L2_INT8_GetDistFunc(size_t dim, unsigned char *alignment, const void *arch_opt) {
+    unsigned char dummy_alignment;
+    if (alignment == nullptr) {
+        alignment = &dummy_alignment;
+    }
+
+    dist_func_t<float> ret_dist_func = INT8_L2Sqr;
+    // Optimizations assume at least 32 int8. If we have less, we use the naive implementation.
+    if (dim < 32) {
+        return ret_dist_func;
+    }
+#ifdef CPU_FEATURES_ARCH_X86_64
+    auto features = (arch_opt == nullptr)
+                        ? cpu_features::GetX86Info().features
+                        : *static_cast<const cpu_features::X86Features *>(arch_opt);
+#ifdef OPT_AVX512_F_BW_VL_VNNI
+    if (features.avx512f && features.avx512bw && features.avx512vl && features.avx512vnni) {
+        if (dim % 32 == 0) // no point in aligning if we have an offsetting residual
+            *alignment = 32 * sizeof(int8_t); // align to 256 bits.
+        return Choose_INT8_L2_implementation_AVX512F_BW_VL_VNNI(dim);
+    }
+#endif
+#endif // __x86_64__
+    return ret_dist_func;
+}
+
 } // namespace spaces
diff --git a/src/VecSim/spaces/L2_space.h b/src/VecSim/spaces/L2_space.h
index 4a2ea801a..48e50a8c2 100644
--- a/src/VecSim/spaces/L2_space.h
+++ b/src/VecSim/spaces/L2_space.h
@@ -16,4 +16,6 @@ dist_func_t<float> L2_BF16_GetDistFunc(size_t dim, unsigned char *alignment = nu
                                        const void *arch_opt = nullptr);
 dist_func_t<float> L2_FP16_GetDistFunc(size_t dim, unsigned char *alignment = nullptr,
                                        const void *arch_opt = nullptr);
+dist_func_t<float> L2_INT8_GetDistFunc(size_t dim, unsigned char *alignment = nullptr,
+                                       const void *arch_opt = nullptr);
 } // namespace spaces
diff --git a/src/VecSim/spaces/computer/calculator.h b/src/VecSim/spaces/computer/calculator.h
index 36e76deed..64e0d8dae 100644
--- a/src/VecSim/spaces/computer/calculator.h
+++ b/src/VecSim/spaces/computer/calculator.h
@@ -26,10 +26,10 @@ class IndexCalculatorInterface : public VecsimBaseObject {
 /**
  * This object purpose is to calculate the distance between two vectors.
  * It extends the IndexCalculatorInterface class' type to hold the distance function.
- * Every specific implmentation of the distance claculater should hold by refrence or by value the
+ * Every specific implementation of the distance calculator should hold by reference or by value the
  * parameters required for the calculation. The distance calculation API of all DistanceCalculator
  * classes is: calc_dist(v1,v2,dim). Internally it calls the distance function according the
- * template signature, allowing fexability in the distance function arguments.
+ * template signature, allowing flexibility in the distance function arguments.
  */
 template <typename DistType, typename DistFuncType>
 class DistanceCalculatorInterface : public IndexCalculatorInterface<DistType> {
diff --git a/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.cpp b/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.cpp
new file mode 100644
index 000000000..661c2c945
--- /dev/null
+++ b/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.cpp
@@ -0,0 +1,36 @@
+/*
+ *Copyright Redis Ltd. 2021 - present
+ *Licensed under your choice of the Redis Source Available License 2.0 (RSALv2) or
+ *the Server Side Public License v1 (SSPLv1).
+ */
+
+#include "AVX512BW_VBMI2.h"
+
+#include "VecSim/spaces/L2/L2_AVX512F_BW_VL_VNNI_INT8.h"
+#include "VecSim/spaces/IP/IP_AVX512F_BW_VL_VNNI_INT8.h"
+
+namespace spaces {
+
+#include "implementation_chooser.h"
+
+dist_func_t<float> Choose_INT8_L2_implementation_AVX512F_BW_VL_VNNI(size_t dim) {
+    dist_func_t<float> ret_dist_func;
+    CHOOSE_IMPLEMENTATION(ret_dist_func, dim, 64, INT8_L2SqrSIMD64_AVX512F_BW_VL_VNNI);
+    return ret_dist_func;
+}
+
+dist_func_t<float> Choose_INT8_IP_implementation_AVX512F_BW_VL_VNNI(size_t dim) {
+    dist_func_t<float> ret_dist_func;
+    CHOOSE_IMPLEMENTATION(ret_dist_func, dim, 64, INT8_InnerProductSIMD64_AVX512F_BW_VL_VNNI);
+    return ret_dist_func;
+}
+
+dist_func_t<float> Choose_INT8_Cosine_implementation_AVX512F_BW_VL_VNNI(size_t dim) {
+    dist_func_t<float> ret_dist_func;
+    CHOOSE_IMPLEMENTATION(ret_dist_func, dim, 64, INT8_CosineSIMD64_AVX512F_BW_VL_VNNI);
+    return ret_dist_func;
+}
+
+#include "implementation_chooser_cleanup.h"
+
+} // namespace spaces
diff --git a/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.h b/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.h
new file mode 100644
index 000000000..532a33c76
--- /dev/null
+++ b/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.h
@@ -0,0 +1,17 @@
+/*
+ *Copyright Redis Ltd. 2021 - present
+ *Licensed under your choice of the Redis Source Available License 2.0 (RSALv2) or
+ *the Server Side Public License v1 (SSPLv1).
+ */
+
+#pragma once
+
+#include "VecSim/spaces/spaces.h"
+
+namespace spaces {
+
+dist_func_t<float> Choose_INT8_L2_implementation_AVX512F_BW_VL_VNNI(size_t dim);
+dist_func_t<float> Choose_INT8_IP_implementation_AVX512F_BW_VL_VNNI(size_t dim);
+dist_func_t<float> Choose_INT8_Cosine_implementation_AVX512F_BW_VL_VNNI(size_t dim);
+
+} // namespace spaces
diff --git a/src/VecSim/spaces/functions/implementation_chooser.h b/src/VecSim/spaces/functions/implementation_chooser.h
index 2903b8cc4..b32ad56c6 100644
--- a/src/VecSim/spaces/functions/implementation_chooser.h
+++ b/src/VecSim/spaces/functions/implementation_chooser.h
@@ -25,23 +25,28 @@
 // of 4N, 4N+1, 4N+2, 4N+3.
 #define C4(X, func, N) X(4 * N, func) X(4 * N + 1, func) X(4 * N + 2, func) X(4 * N + 3, func)
 
-// Macros for 8, 16 and 32 cases. Used to collapse the switch statement. Expands into 0-31, 0-15 or
-// 0-7 cases.
+// Macros for 8, 16, 32 and 64 cases. Used to collapse the switch statement. Expands into 0-63,
+// 0-31, 0-15 or 0-7 cases.
 #define CASES32(X, func)                                                                           \
     C4(X, func, 0)                                                                                 \
     C4(X, func, 1)                                                                                 \
     C4(X, func, 2) C4(X, func, 3) C4(X, func, 4) C4(X, func, 5) C4(X, func, 6) C4(X, func, 7)
 #define CASES16(X, func) C4(X, func, 0) C4(X, func, 1) C4(X, func, 2) C4(X, func, 3)
 #define CASES8(X, func)  C4(X, func, 0) C4(X, func, 1)
+#define CASES64(X, func)                                                                           \
+    CASES32(X, func)                                                                               \
+    C4(X, func, 8)                                                                                 \
+    C4(X, func, 9)                                                                                 \
+    C4(X, func, 10) C4(X, func, 11) C4(X, func, 12) C4(X, func, 13) C4(X, func, 14) C4(X, func, 15)
 
 // Main macro. Expands into a switch statement that chooses the implementation based on the
 // dimension's remainder.
 // @params:
 // out:     The output variable that will be set to the chosen implementation.
 // dim:     The dimension.
-// chunk:   The chunk size. Can be 32, 16 or 8. 32 for 16-bit elements, 16 for 32-bit elements, 8
-// for 64-bit elements. func:    The templated function that we want to choose the implementation
-// for.
+// chunk:   The chunk size. Can be 64, 32, 16 or 8. 64 for 8-bit elements, 32 for 16-bit elements,
+// 16 for 32-bit elements, 8 for 64-bit elements. func:    The templated function that we want to
+// choose the implementation for.
 #define CHOOSE_IMPLEMENTATION(out, dim, chunk, func)                                               \
     do {                                                                                           \
         decltype(out) __ret_dist_func;                                                             \
diff --git a/src/VecSim/spaces/normalize/compute_norm.h b/src/VecSim/spaces/normalize/compute_norm.h
new file mode 100644
index 000000000..d58139648
--- /dev/null
+++ b/src/VecSim/spaces/normalize/compute_norm.h
@@ -0,0 +1,25 @@
+/*
+ *Copyright Redis Ltd. 2021 - present
+ *Licensed under your choice of the Redis Source Available License 2.0 (RSALv2) or
+ *the Server Side Public License v1 (SSPLv1).
+ */
+
+#pragma once
+
+#include <cmath>
+
+namespace spaces {
+
+template <typename DataType>
+static inline float IntegralType_ComputeNorm(const DataType *vec, const size_t dim) {
+    int sum = 0;
+
+    for (size_t i = 0; i < dim; i++) {
+        // No need to cast to int because c++ integer promotion ensures vec[i] is promoted to int
+        // before multiplication.
+        sum += vec[i] * vec[i];
+    }
+    return sqrt(sum);
+}
+
+} // namespace spaces
diff --git a/src/VecSim/spaces/normalize/normalize_naive.h b/src/VecSim/spaces/normalize/normalize_naive.h
index 119c19dcf..88967e39a 100644
--- a/src/VecSim/spaces/normalize/normalize_naive.h
+++ b/src/VecSim/spaces/normalize/normalize_naive.h
@@ -8,6 +8,7 @@
 
 #include "VecSim/types/bfloat16.h"
 #include "VecSim/types/float16.h"
+#include "compute_norm.h"
 #include <cmath>
 #include <vector>
 
@@ -73,4 +74,13 @@ static inline void float16_normalizeVector(void *vec, const size_t dim) {
     }
 }
 
+static inline void int8_normalizeVector(void *vec, const size_t dim) {
+    int8_t *input_vector = static_cast<int8_t *>(vec);
+
+    float norm = IntegralType_ComputeNorm<int8_t>(input_vector, dim);
+
+    // Store norm at the end of the vector.
+    *reinterpret_cast<float *>(input_vector + dim) = norm;
+}
+
 } // namespace spaces
diff --git a/src/VecSim/spaces/space_includes.h b/src/VecSim/spaces/space_includes.h
index e7fe6163c..9a5ace731 100644
--- a/src/VecSim/spaces/space_includes.h
+++ b/src/VecSim/spaces/space_includes.h
@@ -16,6 +16,12 @@
 #if defined(__AVX512F__) || defined(__AVX__) || defined(__SSE__)
 #if defined(__GNUC__)
 #include <x86intrin.h>
+// Override missing implementations in GCC < 11
+// Full list and suggested alternatives for each missing function can be found here:
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95483
+#if (__GNUC__ < 11)
+#define _mm256_loadu_epi8(ptr) _mm256_maskz_loadu_epi8(~0, ptr)
+#endif
 #elif defined(__clang__)
 #include <xmmintrin.h>
 #elif defined(_MSC_VER)
diff --git a/src/VecSim/spaces/spaces.cpp b/src/VecSim/spaces/spaces.cpp
index 84f71b463..c73ec997f 100644
--- a/src/VecSim/spaces/spaces.cpp
+++ b/src/VecSim/spaces/spaces.cpp
@@ -70,6 +70,20 @@ dist_func_t<double> GetDistFunc<double, double>(VecSimMetric metric, size_t dim,
     throw std::invalid_argument("Invalid metric");
 }
 
+template <>
+dist_func_t<float> GetDistFunc<int8_t, float>(VecSimMetric metric, size_t dim,
+                                              unsigned char *alignment) {
+    switch (metric) {
+    case VecSimMetric_Cosine:
+        return Cosine_INT8_GetDistFunc(dim, alignment);
+    case VecSimMetric_IP:
+        return IP_INT8_GetDistFunc(dim, alignment);
+    case VecSimMetric_L2:
+        return L2_INT8_GetDistFunc(dim, alignment);
+    }
+    throw std::invalid_argument("Invalid metric");
+}
+
 template <>
 normalizeVector_f<float> GetNormalizeFunc<float>(void) {
     return normalizeVector_imp<float>;
@@ -94,4 +108,10 @@ normalizeVector_f<vecsim_types::float16> GetNormalizeFunc<vecsim_types::float16>
     return float16_normalizeVector;
 }
 
+/** The returned function computes the norm and stores it at the end of the given vector */
+template <>
+normalizeVector_f<int8_t> GetNormalizeFunc<int8_t>(void) {
+    return int8_normalizeVector;
+}
+
 } // namespace spaces
diff --git a/src/VecSim/utils/vec_utils.cpp b/src/VecSim/utils/vec_utils.cpp
index 99160c247..cbe61338b 100644
--- a/src/VecSim/utils/vec_utils.cpp
+++ b/src/VecSim/utils/vec_utils.cpp
@@ -27,6 +27,7 @@ const char *VecSimCommonStrings::FLOAT32_STRING = "FLOAT32";
 const char *VecSimCommonStrings::FLOAT64_STRING = "FLOAT64";
 const char *VecSimCommonStrings::BFLOAT16_STRING = "BFLOAT16";
 const char *VecSimCommonStrings::FLOAT16_STRING = "FLOAT16";
+const char *VecSimCommonStrings::INT8_STRING = "INT8";
 const char *VecSimCommonStrings::INT32_STRING = "INT32";
 const char *VecSimCommonStrings::INT64_STRING = "INT64";
 
@@ -147,6 +148,8 @@ const char *VecSimType_ToString(VecSimType vecsimType) {
         return VecSimCommonStrings::BFLOAT16_STRING;
     case VecSimType_FLOAT16:
         return VecSimCommonStrings::FLOAT16_STRING;
+    case VecSimType_INT8:
+        return VecSimCommonStrings::INT8_STRING;
     case VecSimType_INT32:
         return VecSimCommonStrings::INT32_STRING;
     case VecSimType_INT64:
@@ -195,6 +198,8 @@ size_t VecSimType_sizeof(VecSimType type) {
         return sizeof(bfloat16);
     case VecSimType_FLOAT16:
         return sizeof(float16);
+    case VecSimType_INT8:
+        return sizeof(int8_t);
     case VecSimType_INT32:
         return sizeof(int32_t);
     case VecSimType_INT64:
@@ -202,3 +207,11 @@ size_t VecSimType_sizeof(VecSimType type) {
     }
     return 0;
 }
+
+size_t VecSimParams_GetDataSize(VecSimType type, size_t dim, VecSimMetric metric) {
+    size_t dataSize = VecSimType_sizeof(type) * dim;
+    if (type == VecSimType_INT8 && metric == VecSimMetric_Cosine) {
+        dataSize += sizeof(float); // For the norm
+    }
+    return dataSize;
+}
diff --git a/src/VecSim/utils/vec_utils.h b/src/VecSim/utils/vec_utils.h
index abb0c5688..18a5d1db3 100644
--- a/src/VecSim/utils/vec_utils.h
+++ b/src/VecSim/utils/vec_utils.h
@@ -27,6 +27,7 @@ struct VecSimCommonStrings {
     static const char *FLOAT64_STRING;
     static const char *BFLOAT16_STRING;
     static const char *FLOAT16_STRING;
+    static const char *INT8_STRING;
     static const char *INT32_STRING;
     static const char *INT64_STRING;
 
@@ -90,3 +91,6 @@ const char *VecSimMetric_ToString(VecSimMetric vecsimMetric);
 const char *VecSimSearchMode_ToString(VecSearchMode vecsimSearchMode);
 
 size_t VecSimType_sizeof(VecSimType vecsimType);
+
+/** Returns the size in bytes of a stored or query vector */
+size_t VecSimParams_GetDataSize(VecSimType type, size_t dim, VecSimMetric metric);
diff --git a/src/VecSim/vec_sim.cpp b/src/VecSim/vec_sim.cpp
index 56912b07e..1a6d241fb 100644
--- a/src/VecSim/vec_sim.cpp
+++ b/src/VecSim/vec_sim.cpp
@@ -138,6 +138,9 @@ extern "C" void VecSim_Normalize(void *blob, size_t dim, VecSimType type) {
         spaces::GetNormalizeFunc<vecsim_types::bfloat16>()(blob, dim);
     } else if (type == VecSimType_FLOAT16) {
         spaces::GetNormalizeFunc<vecsim_types::float16>()(blob, dim);
+    } else if (type == VecSimType_INT8) {
+        // assuming blob is large enough to store the norm at the end of the vector
+        spaces::GetNormalizeFunc<int8_t>()(blob, dim);
     }
 }
 
diff --git a/src/VecSim/vec_sim_common.h b/src/VecSim/vec_sim_common.h
index 943338dee..e8062484a 100644
--- a/src/VecSim/vec_sim_common.h
+++ b/src/VecSim/vec_sim_common.h
@@ -36,6 +36,7 @@ typedef enum {
     VecSimType_FLOAT64,
     VecSimType_BFLOAT16,
     VecSimType_FLOAT16,
+    VecSimType_INT8,
     VecSimType_INT32,
     VecSimType_INT64
 } VecSimType;
diff --git a/src/VecSim/vec_sim_debug.cpp b/src/VecSim/vec_sim_debug.cpp
index 98cc05c91..395a3a9e0 100644
--- a/src/VecSim/vec_sim_debug.cpp
+++ b/src/VecSim/vec_sim_debug.cpp
@@ -32,6 +32,9 @@ extern "C" int VecSimDebug_GetElementNeighborsInHNSWGraph(VecSimIndex *index, si
         } else if (info.type == VecSimType_FLOAT16) {
             return dynamic_cast<HNSWIndex<vecsim_types::float16, float> *>(index)
                 ->getHNSWElementNeighbors(label, neighborsData);
+        } else if (info.type == VecSimType_INT8) {
+            return dynamic_cast<HNSWIndex<int8_t, float> *>(index)->getHNSWElementNeighbors(
+                label, neighborsData);
         } else {
             assert(false && "Invalid data type");
         }
@@ -48,6 +51,9 @@ extern "C" int VecSimDebug_GetElementNeighborsInHNSWGraph(VecSimIndex *index, si
         } else if (info.type == VecSimType_FLOAT16) {
             return dynamic_cast<TieredHNSWIndex<vecsim_types::float16, float> *>(index)
                 ->getHNSWElementNeighbors(label, neighborsData);
+        } else if (info.type == VecSimType_INT8) {
+            return dynamic_cast<TieredHNSWIndex<int8_t, float> *>(index)->getHNSWElementNeighbors(
+                label, neighborsData);
         } else {
             assert(false && "Invalid data type");
         }
diff --git a/src/VecSim/vec_sim_index.h b/src/VecSim/vec_sim_index.h
index 0bc60ebbc..d12b23d11 100644
--- a/src/VecSim/vec_sim_index.h
+++ b/src/VecSim/vec_sim_index.h
@@ -27,6 +27,7 @@
  * @param allocator The allocator to use for the index.
  * @param dim The dimension of the vectors in the index.
  * @param vecType The type of the vectors in the index.
+ * @param dataSize The size of stored vectors in bytes.
  * @param metric The metric to use in the index.
  * @param blockSize The block size to use in the index.
  * @param multi Determines if the index should multi-index or not.
@@ -36,6 +37,7 @@ struct AbstractIndexInitParams {
     std::shared_ptr<VecSimAllocator> allocator;
     size_t dim;
     VecSimType vecType;
+    size_t dataSize;
     VecSimMetric metric;
     size_t blockSize;
     bool multi;
@@ -93,9 +95,6 @@ struct VecSimIndexAbstract : public VecSimIndexInterface {
         return info;
     }
 
-    spaces::normalizeVector_f<DataType>
-        normalize_func; // A pointer to a normalization function of specific type.
-
 public:
     /**
      * @brief Construct a new Vec Sim Index object
@@ -104,13 +103,12 @@ struct VecSimIndexAbstract : public VecSimIndexInterface {
     VecSimIndexAbstract(const AbstractIndexInitParams &params,
                         const IndexComponents<DataType, DistType> &components)
         : VecSimIndexInterface(params.allocator), dim(params.dim), vecType(params.vecType),
-          dataSize(dim * VecSimType_sizeof(vecType)), metric(params.metric),
+          dataSize(params.dataSize), metric(params.metric),
           blockSize(params.blockSize ? params.blockSize : DEFAULT_BLOCK_SIZE),
           indexCalculator(components.indexCalculator), preprocessors(components.preprocessors),
-          lastMode(EMPTY_MODE), isMulti(params.multi), logCallbackCtx(params.logCtx),
-          normalize_func(spaces::GetNormalizeFunc<DataType>()) {
-
+          lastMode(EMPTY_MODE), isMulti(params.multi), logCallbackCtx(params.logCtx) {
         assert(VecSimType_sizeof(vecType));
+        assert(dataSize);
         this->vectors = new (this->allocator) DataBlocksContainer(
             this->blockSize, this->dataSize, this->allocator, this->getAlignment());
     }
diff --git a/src/python_bindings/bindings.cpp b/src/python_bindings/bindings.cpp
index 13215d6a4..e5d7733eb 100644
--- a/src/python_bindings/bindings.cpp
+++ b/src/python_bindings/bindings.cpp
@@ -300,6 +300,9 @@ class PyHNSWLibIndex : public PyVecSimIndex {
         } else if (type == VecSimType_FLOAT16) {
             auto *hnsw = dynamic_cast<HNSWIndex<float16, float> *>(index.get());
             hnsw->saveIndex(location);
+        } else if (type == VecSimType_INT8) {
+            auto *hnsw = dynamic_cast<HNSWIndex<int8_t, float> *>(index.get());
+            hnsw->saveIndex(location);
         } else {
             throw std::runtime_error("Invalid index data type");
         }
@@ -432,6 +435,10 @@ class PyHNSWLibIndex : public PyVecSimIndex {
             return dynamic_cast<HNSWIndex<float16, float> *>(this->index.get())
                 ->checkIntegrity()
                 .valid_state;
+        } else if (type == VecSimType_INT8) {
+            return dynamic_cast<HNSWIndex<int8_t, float> *>(this->index.get())
+                ->checkIntegrity()
+                .valid_state;
         } else {
             throw std::runtime_error("Invalid index data type");
         }
@@ -534,6 +541,7 @@ PYBIND11_MODULE(VecSim, m) {
         .value("VecSimType_FLOAT64", VecSimType_FLOAT64)
         .value("VecSimType_BFLOAT16", VecSimType_BFLOAT16)
         .value("VecSimType_FLOAT16", VecSimType_FLOAT16)
+        .value("VecSimType_INT8", VecSimType_INT8)
         .value("VecSimType_INT32", VecSimType_INT32)
         .value("VecSimType_INT64", VecSimType_INT64)
         .export_values();
diff --git a/tests/benchmark/CMakeLists.txt b/tests/benchmark/CMakeLists.txt
index 4d25a5499..2fa066e82 100644
--- a/tests/benchmark/CMakeLists.txt
+++ b/tests/benchmark/CMakeLists.txt
@@ -31,7 +31,7 @@ endforeach()
 
 include(${root}/cmake/x86_64InstructionFlags.cmake)
 
-set(DATA_TYPE fp32 fp64 bf16 fp16)
+set(DATA_TYPE fp32 fp64 bf16 fp16 int8)
 foreach(data_type IN LISTS DATA_TYPE)
 	add_executable(bm_spaces_${data_type} spaces_benchmarks/bm_spaces_${data_type}.cpp)
 	target_link_libraries(bm_spaces_${data_type} VectorSimilarity benchmark::benchmark)
diff --git a/tests/benchmark/benchmarks.sh b/tests/benchmark/benchmarks.sh
index 11872e869..867077ede 100755
--- a/tests/benchmark/benchmarks.sh
+++ b/tests/benchmark/benchmarks.sh
@@ -13,6 +13,7 @@ if [ -z "$BM_TYPE"  ] || [ "$BM_TYPE" = "benchmarks-all" ]; then
     echo spaces_fp64
     echo spaces_bf16
     echo spaces_fp16
+    echo spaces_int8
 elif [ "$BM_TYPE" = "benchmarks-default" ]; then
     echo basics_single_fp32
     echo basics_multi_fp32
@@ -20,6 +21,7 @@ elif [ "$BM_TYPE" = "benchmarks-default" ]; then
     echo spaces_fp64
     echo spaces_bf16
     echo spaces_fp16
+    echo spaces_int8
 # Basic benchmarks
 elif [ "$BM_TYPE" = "bm-basics-fp32-single" ] ; then
     echo basics_single_fp32
@@ -66,4 +68,15 @@ elif [ "$BM_TYPE" = "bm-spaces" ] ; then
     echo spaces_fp16
     echo spaces_fp64
     echo spaces_bf16
+    echo spaces_int8
+elif [ "$BM_TYPE" = "bm-spaces-fp32" ] ; then
+    echo spaces_fp32
+elif [ "$BM_TYPE" = "bm-spaces-fp64" ] ; then
+    echo spaces_fp64
+elif [ "$BM_TYPE" = "bm-spaces-bf16" ] ; then
+    echo spaces_bf16
+elif [ "$BM_TYPE" = "bm-spaces-fp16" ] ; then
+    echo spaces_fp16
+elif [ "$BM_TYPE" = "bm-spaces-int8" ] ; then
+    echo spaces_int8
 fi
diff --git a/tests/benchmark/spaces_benchmarks/bm_spaces.h b/tests/benchmark/spaces_benchmarks/bm_spaces.h
index 3b55a9032..b7431c43c 100644
--- a/tests/benchmark/spaces_benchmarks/bm_spaces.h
+++ b/tests/benchmark/spaces_benchmarks/bm_spaces.h
@@ -19,6 +19,7 @@
 #include "VecSim/spaces/functions/AVX.h"
 #include "VecSim/spaces/functions/AVX512BW_VBMI2.h"
 #include "VecSim/spaces/functions/AVX512BF16_VL.h"
+#include "VecSim/spaces/functions/AVX512F_BW_VL_VNNI.h"
 #include "VecSim/spaces/functions/AVX2.h"
 #include "VecSim/spaces/functions/F16C.h"
 #include "VecSim/spaces/functions/SSE3.h"
@@ -123,6 +124,12 @@ static constexpr size_t start = min_no_res_th_dim;
     INITIALIZE_EXACT_512BIT_BM(bm_class, type_prefix, arch, IP, dim_opt, arch_supported);          \
     INITIALIZE_RESIDUAL_BM(bm_class, type_prefix, arch, IP, dim_opt, arch_supported);
 
-#define INITIALIZE_BENCHMARKS_SET(bm_class, type_prefix, arch, dim_opt, arch_supported)            \
+#define INITIALIZE_BENCHMARKS_SET_Cosine(bm_class, type_prefix, arch, dim_opt, arch_supported)     \
+    INITIALIZE_HIGH_DIM(bm_class, type_prefix, arch, Cosine, arch_supported);                      \
+    INITIALIZE_LOW_DIM(bm_class, type_prefix, arch, Cosine, arch_supported);                       \
+    INITIALIZE_EXACT_512BIT_BM(bm_class, type_prefix, arch, Cosine, dim_opt, arch_supported);      \
+    INITIALIZE_RESIDUAL_BM(bm_class, type_prefix, arch, Cosine, dim_opt, arch_supported);
+
+#define INITIALIZE_BENCHMARKS_SET_L2_IP(bm_class, type_prefix, arch, dim_opt, arch_supported)      \
     INITIALIZE_BENCHMARKS_SET_L2(bm_class, type_prefix, arch, dim_opt, arch_supported)             \
     INITIALIZE_BENCHMARKS_SET_IP(bm_class, type_prefix, arch, dim_opt, arch_supported)
diff --git a/tests/benchmark/spaces_benchmarks/bm_spaces_bf16.cpp b/tests/benchmark/spaces_benchmarks/bm_spaces_bf16.cpp
index 8022c712a..27fe82a3d 100644
--- a/tests/benchmark/spaces_benchmarks/bm_spaces_bf16.cpp
+++ b/tests/benchmark/spaces_benchmarks/bm_spaces_bf16.cpp
@@ -26,20 +26,20 @@ INITIALIZE_BENCHMARKS_SET_IP(BM_VecSimSpaces_BF16, BF16, AVX512BF16_VL, 32,
 // AVX512 functions
 #ifdef OPT_AVX512_BW_VBMI2
 bool avx512_bw_vbmi2_supported = opt.avx512bw && opt.avx512vbmi2;
-INITIALIZE_BENCHMARKS_SET(BM_VecSimSpaces_BF16, BF16, AVX512BW_VBMI2, 32,
-                          avx512_bw_vbmi2_supported);
+INITIALIZE_BENCHMARKS_SET_L2_IP(BM_VecSimSpaces_BF16, BF16, AVX512BW_VBMI2, 32,
+                                avx512_bw_vbmi2_supported);
 #endif // AVX512F
 
 // AVX functions
 #ifdef OPT_AVX2
 bool avx2_supported = opt.avx2;
-INITIALIZE_BENCHMARKS_SET(BM_VecSimSpaces_BF16, BF16, AVX2, 32, avx2_supported);
+INITIALIZE_BENCHMARKS_SET_L2_IP(BM_VecSimSpaces_BF16, BF16, AVX2, 32, avx2_supported);
 #endif // AVX
 
 // SSE functions
 #ifdef OPT_SSE3
 bool sse3_supported = opt.sse3;
-INITIALIZE_BENCHMARKS_SET(BM_VecSimSpaces_BF16, BF16, SSE3, 32, sse3_supported);
+INITIALIZE_BENCHMARKS_SET_L2_IP(BM_VecSimSpaces_BF16, BF16, SSE3, 32, sse3_supported);
 #endif // SSE
 
 #endif // x86_64
diff --git a/tests/benchmark/spaces_benchmarks/bm_spaces_fp16.cpp b/tests/benchmark/spaces_benchmarks/bm_spaces_fp16.cpp
index c9bc42b0b..9457bc77d 100644
--- a/tests/benchmark/spaces_benchmarks/bm_spaces_fp16.cpp
+++ b/tests/benchmark/spaces_benchmarks/bm_spaces_fp16.cpp
@@ -22,8 +22,8 @@ cpu_features::X86Features opt = cpu_features::GetX86Info().features;
 class BM_VecSimSpaces_FP16_adv : public BM_VecSimSpaces<_Float16> {};
 
 bool avx512fp16_vl_supported = opt.avx512_fp16 && opt.avx512vl;
-INITIALIZE_BENCHMARKS_SET(BM_VecSimSpaces_FP16_adv, FP16, AVX512FP16_VL, 32,
-                          avx512fp16_vl_supported);
+INITIALIZE_BENCHMARKS_SET_L2_IP(BM_VecSimSpaces_FP16_adv, FP16, AVX512FP16_VL, 32,
+                                avx512fp16_vl_supported);
 
 INITIALIZE_NAIVE_BM(BM_VecSimSpaces_FP16_adv, FP16, InnerProduct, 32);
 INITIALIZE_NAIVE_BM(BM_VecSimSpaces_FP16_adv, FP16, L2Sqr, 32);
@@ -32,12 +32,12 @@ INITIALIZE_NAIVE_BM(BM_VecSimSpaces_FP16_adv, FP16, L2Sqr, 32);
 // OPT_AVX512F functions
 #ifdef OPT_AVX512F
 bool avx512f_supported = opt.avx512f;
-INITIALIZE_BENCHMARKS_SET(BM_VecSimSpaces_FP16, FP16, AVX512F, 32, avx512f_supported);
+INITIALIZE_BENCHMARKS_SET_L2_IP(BM_VecSimSpaces_FP16, FP16, AVX512F, 32, avx512f_supported);
 #endif // OPT_AVX512F
 // AVX functions
 #ifdef OPT_F16C
 bool avx512_bw_f16c_supported = opt.f16c && opt.fma3 && opt.avx;
-INITIALIZE_BENCHMARKS_SET(BM_VecSimSpaces_FP16, FP16, F16C, 32, avx512_bw_f16c_supported);
+INITIALIZE_BENCHMARKS_SET_L2_IP(BM_VecSimSpaces_FP16, FP16, F16C, 32, avx512_bw_f16c_supported);
 #endif // OPT_F16C
 
 #endif // x86_64
diff --git a/tests/benchmark/spaces_benchmarks/bm_spaces_fp32.cpp b/tests/benchmark/spaces_benchmarks/bm_spaces_fp32.cpp
index 289e42405..106b2abc8 100644
--- a/tests/benchmark/spaces_benchmarks/bm_spaces_fp32.cpp
+++ b/tests/benchmark/spaces_benchmarks/bm_spaces_fp32.cpp
@@ -13,19 +13,19 @@ cpu_features::X86Features opt = cpu_features::GetX86Info().features;
 // AVX512 functions
 #ifdef OPT_AVX512F
 bool avx512f_supported = opt.avx512f;
-INITIALIZE_BENCHMARKS_SET(BM_VecSimSpaces_FP32, FP32, AVX512F, 16, avx512f_supported);
+INITIALIZE_BENCHMARKS_SET_L2_IP(BM_VecSimSpaces_FP32, FP32, AVX512F, 16, avx512f_supported);
 #endif // AVX512F
 
 // AVX functions
 #ifdef OPT_AVX
 bool avx_supported = opt.avx;
-INITIALIZE_BENCHMARKS_SET(BM_VecSimSpaces_FP32, FP32, AVX, 16, avx_supported);
+INITIALIZE_BENCHMARKS_SET_L2_IP(BM_VecSimSpaces_FP32, FP32, AVX, 16, avx_supported);
 #endif // AVX
 
 // SSE functions
 #ifdef OPT_SSE
 bool sse_supported = opt.sse;
-INITIALIZE_BENCHMARKS_SET(BM_VecSimSpaces_FP32, FP32, SSE, 16, sse_supported);
+INITIALIZE_BENCHMARKS_SET_L2_IP(BM_VecSimSpaces_FP32, FP32, SSE, 16, sse_supported);
 #endif // SSE
 
 #endif // x86_64
diff --git a/tests/benchmark/spaces_benchmarks/bm_spaces_fp64.cpp b/tests/benchmark/spaces_benchmarks/bm_spaces_fp64.cpp
index 19157f03f..01052cebc 100644
--- a/tests/benchmark/spaces_benchmarks/bm_spaces_fp64.cpp
+++ b/tests/benchmark/spaces_benchmarks/bm_spaces_fp64.cpp
@@ -13,19 +13,19 @@ cpu_features::X86Features opt = cpu_features::GetX86Info().features;
 // AVX512 functions
 #ifdef OPT_AVX512F
 bool avx512f_supported = opt.avx512f;
-INITIALIZE_BENCHMARKS_SET(BM_VecSimSpaces_FP64, FP64, AVX512F, 8, avx512f_supported);
+INITIALIZE_BENCHMARKS_SET_L2_IP(BM_VecSimSpaces_FP64, FP64, AVX512F, 8, avx512f_supported);
 #endif // AVX512F
 
 // AVX functions
 #ifdef OPT_AVX
 bool avx_supported = opt.avx;
-INITIALIZE_BENCHMARKS_SET(BM_VecSimSpaces_FP64, FP64, AVX, 8, avx_supported);
+INITIALIZE_BENCHMARKS_SET_L2_IP(BM_VecSimSpaces_FP64, FP64, AVX, 8, avx_supported);
 #endif // AVX
 
 // SSE functions
 #ifdef OPT_SSE
 bool sse_supported = opt.sse;
-INITIALIZE_BENCHMARKS_SET(BM_VecSimSpaces_FP64, FP64, SSE, 8, sse_supported);
+INITIALIZE_BENCHMARKS_SET_L2_IP(BM_VecSimSpaces_FP64, FP64, SSE, 8, sse_supported);
 #endif // SSE
 
 #endif // x86_64
diff --git a/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp b/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp
new file mode 100644
index 000000000..25b7e85e0
--- /dev/null
+++ b/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp
@@ -0,0 +1,56 @@
+/*
+ *Copyright Redis Ltd. 2021 - present
+ *Licensed under your choice of the Redis Source Available License 2.0 (RSALv2) or
+ *the Server Side Public License v1 (SSPLv1).
+ */
+#include <benchmark/benchmark.h>
+#include <random>
+#include <cstring>
+#include "utils/tests_utils.h"
+#include "bm_spaces.h"
+
+class BM_VecSimSpaces_Integers_INT8 : public benchmark::Fixture {
+protected:
+    std::mt19937 rng;
+    size_t dim;
+    int8_t *v1, *v2;
+
+public:
+    BM_VecSimSpaces_Integers_INT8() { rng.seed(47); }
+    ~BM_VecSimSpaces_Integers_INT8() = default;
+
+    void SetUp(const ::benchmark::State &state) {
+        dim = state.range(0);
+        // Allocate vector with extra space for cosine calculations
+        v1 = new int8_t[dim + sizeof(float)];
+        v2 = new int8_t[dim + sizeof(float)];
+        test_utils::populate_int8_vec(v1, dim, 123);
+        test_utils::populate_int8_vec(v2, dim, 1234);
+
+        // Store the norm in the extra space for cosine calculations
+        *(float *)(v1 + dim) = test_utils::integral_compute_norm(v1, dim);
+        *(float *)(v2 + dim) = test_utils::integral_compute_norm(v2, dim);
+    }
+    void TearDown(const ::benchmark::State &state) {
+        delete v1;
+        delete v2;
+    }
+};
+
+#ifdef CPU_FEATURES_ARCH_X86_64
+cpu_features::X86Features opt = cpu_features::GetX86Info().features;
+
+// AVX512_F_BW_VL_VNNI functions
+#ifdef OPT_AVX512_F_BW_VL_VNNI
+bool avx512_f_bw_vl_vnni_supported = opt.avx512f && opt.avx512bw && opt.avx512vl && opt.avx512vnni;
+INITIALIZE_BENCHMARKS_SET_L2_IP(BM_VecSimSpaces_Integers_INT8, INT8, AVX512F_BW_VL_VNNI, 32,
+                                avx512_f_bw_vl_vnni_supported);
+INITIALIZE_BENCHMARKS_SET_Cosine(BM_VecSimSpaces_Integers_INT8, INT8, AVX512F_BW_VL_VNNI, 32,
+                                 avx512_f_bw_vl_vnni_supported)
+#endif // AVX512_F_BW_VL_VNNI
+
+#endif // x86_64
+
+    INITIALIZE_NAIVE_BM(BM_VecSimSpaces_Integers_INT8, INT8, InnerProduct, 32);
+INITIALIZE_NAIVE_BM(BM_VecSimSpaces_Integers_INT8, INT8, L2Sqr, 32);
+BENCHMARK_MAIN();
diff --git a/tests/flow/common.py b/tests/flow/common.py
index ea5333884..036862065 100644
--- a/tests/flow/common.py
+++ b/tests/flow/common.py
@@ -24,6 +24,7 @@ def create_hnsw_params(dim, num_elements, metric, data_type, ef_construction=200
     hnsw_params.multi = is_multi
 
     return hnsw_params
+
 # Helper function for creating an index,uses the default HNSW parameters if not specified.
 def create_hnsw_index(dim, num_elements, metric, data_type, ef_construction=200, m=16, ef_runtime=10, epsilon=0.01,
                       is_multi=False):
@@ -40,6 +41,23 @@ def create_hnsw_index(dim, num_elements, metric, data_type, ef_construction=200,
 
     return HNSWIndex(hnsw_params)
 
+# Helper function for creating an index, uses the default flat parameters if not specified.
+def create_flat_index(dim, metric, data_type, is_multi=False):
+    bfparams = BFParams()
+
+    bfparams.dim = dim
+    bfparams.type = data_type
+    bfparams.metric = metric
+    bfparams.multi = is_multi
+
+    return BFIndex(bfparams)
+
+def create_add_vectors(index, vectors):
+    label_to_vec_list = []
+    for i, vector in enumerate(vectors):
+        index.add_vector(vector, i)
+        label_to_vec_list.append((i, vector))
+    return label_to_vec_list
 
 # Compute the expected speedup as a function of the expected parallel section rate of the code by Amdahl's law
 def expected_speedup(expected_parallel_rate, n_threads):
@@ -61,9 +79,20 @@ def vec_to_bfloat16(vec):
 def vec_to_float16(vec):
     return vec.astype(np.float16)
 
+def create_int8_vectors(shape, rng: np.random.Generator = None):
+    rng = np.random.default_rng(seed=42) if rng is None else rng
+    return rng.integers(low=-128, high=127, size=shape, dtype=np.int8)
+
 def get_ground_truth_results(dist_func, query, vectors, k):
     results = [{"dist": dist_func(query, vec), "label": key} for key, vec in vectors]
     results = sorted(results, key=lambda x: x["dist"])
     keys = [res["label"] for res in results[:k]]
 
     return results, keys
+
+def fp32_expand_and_calc_cosine_dist(a, b):
+    # stupid numpy doesn't make any intermediate conversions when handling small types
+    # so we might get overflow. We need to convert to float32 ourselves.
+    a_float32 = a.astype(np.float32)
+    b_float32 = b.astype(np.float32)
+    return spatial.distance.cosine(a_float32, b_float32)
diff --git a/tests/flow/test_bruteforce.py b/tests/flow/test_bruteforce.py
index b3492b7fe..ee7b7a5b0 100644
--- a/tests/flow/test_bruteforce.py
+++ b/tests/flow/test_bruteforce.py
@@ -536,3 +536,201 @@ def test_bf_float16_multivalue():
 
     assert_allclose(bf_labels, [keys],  rtol=1e-5, atol=0)
     assert_allclose(bf_distances, [dists],  rtol=1e-5, atol=0)
+
+'''
+A Class to run common tests for BF index
+
+The following tests will *automatically* run if the class is inherited:
+* test_serialization - single L2 index
+* test_L2 - single L2 index
+* test_batch_iterator - single L2 index
+
+The following tests should be *explicitly* called from a method prefixed with test_*
+# range_query(dist_func) - single cosine index
+
+@param create_data_func is a function expects num_elements, dim, [and optional np.random.Generator] as input and
+returns a (num_elements, dim) numpy array of vectors
+uses multi L2 index
+# multi_value(create_data_func, num_per_label) -
+'''
+class GeneralTest():
+    dim = 128
+    num_elements = 10_000
+    num_queries = 1
+
+    data_type = None
+
+    rng = np.random.default_rng(seed=42)
+    vectors_data = None
+    query_data = None
+
+    # single FLAT index with L2 metric
+    cache_flat_index_L2_single = None
+    cached_label_to_vec_list = None
+
+    @classmethod
+    def create_index(cls, metric = VecSimMetric_L2, is_multi=False):
+        assert cls.data_type is not None
+        return create_flat_index(cls.dim, metric, cls.data_type, is_multi=is_multi)
+
+    @classmethod
+    def create_add_vectors(cls, index):
+        assert cls.vectors_data is not None
+        return create_add_vectors(index, cls.vectors_data)
+
+    @classmethod
+    def get_cached_single_L2_index(cls):
+        if cls.cache_flat_index_L2_single is None:
+            cls.cache_flat_index_L2_single = cls.create_index()
+            cls.cached_label_to_vec_list = cls.create_add_vectors(cls.cache_flat_index_L2_single)
+        return cls.cache_flat_index_L2_single, cls.cached_label_to_vec_list
+
+    @staticmethod
+    def compute_correct(res_labels, res_dist, gt_labels, gt_dist_label_list):
+        correct = 0
+        for i, label in enumerate(res_labels):
+            for j, correct_label in enumerate(gt_labels):
+                if label == correct_label:
+                    correct += 1
+                    assert math.isclose(res_dist[i], gt_dist_label_list[j]["dist"], rel_tol=1e-5)
+                    break
+
+        return correct
+
+    @classmethod
+    def knn(cls, index, label_vec_list, dist_func):
+        k = 10
+
+        results, keys = get_ground_truth_results(dist_func, cls.query_data[0], label_vec_list, k)
+        dists = [res["dist"] for res in results]
+        bf_labels, bf_distances = index.knn_query(cls.query_data, k=k)
+        assert_allclose(bf_labels, [keys],  rtol=1e-5, atol=0)
+        assert_allclose(bf_distances, [dists[:k]],  rtol=1e-5, atol=0)
+        print(f"\nsanity test for L2 and {cls.data_type} pass")
+
+    def test_L2(self):
+        index, label_to_vec_list = self.get_cached_single_L2_index()
+        self.knn(index, label_to_vec_list, spatial.distance.sqeuclidean)
+
+    def test_batch_iterator(self):
+        index, _ = self.get_cached_single_L2_index()
+        # num_elements = self.num_labels
+        batch_size = 10
+
+
+        batch_iterator = index.create_batch_iterator(self.query_data)
+        labels_first_batch, distances_first_batch = batch_iterator.get_next_results(batch_size, BY_ID)
+        for i, _ in enumerate(labels_first_batch[0][:-1]):
+            # assert sorting by id
+            assert(labels_first_batch[0][i] < labels_first_batch[0][i+1])
+
+        _, distances_second_batch = batch_iterator.get_next_results(batch_size, BY_SCORE)
+        for i, dist in enumerate(distances_second_batch[0][:-1]):
+            # assert sorting by score
+            assert(distances_second_batch[0][i] < distances_second_batch[0][i+1])
+            # assert that every distance in the second batch is higher than any distance of the first batch
+            assert(len(distances_first_batch[0][np.where(distances_first_batch[0] > dist)]) == 0)
+
+        # reset
+        batch_iterator.reset()
+
+        # Run again in batches until depleted
+        batch_size = 1500
+        returned_results_num = 0
+        iterations = 0
+        start = time.time()
+        while batch_iterator.has_next():
+            iterations += 1
+            labels, distances = batch_iterator.get_next_results(batch_size, BY_SCORE)
+            returned_results_num += len(labels[0])
+
+        print(f'Total search time for running batches of size {batch_size} for index with {self.num_elements} of dim={self.dim}: {time.time() - start}')
+        assert (returned_results_num == self.num_elements)
+        assert (iterations == np.ceil(self.num_elements/batch_size))
+
+    ##### Should be explicitly called #####
+    def range_query(self, dist_func):
+        bfindex = self.create_index(VecSimMetric_Cosine)
+        label_to_vec_list = self.create_add_vectors(bfindex)
+        radius = 0.7
+
+        start = time.time()
+        bf_labels, bf_distances = bfindex.range_query(self.query_data[0], radius=radius)
+        end = time.time()
+        res_num = len(bf_labels[0])
+        print(f'\nlookup time for {self.num_elements} vectors with dim={self.dim} took {end - start} seconds, got {res_num} results')
+
+        # Verify that we got exactly all vectors within the range
+        results, keys = get_ground_truth_results(dist_func, self.query_data[0], label_to_vec_list, res_num)
+
+        assert_allclose(max(bf_distances[0]), results[res_num-1]["dist"], rtol=1e-05)
+        assert np.array_equal(np.array(bf_labels[0]), np.array(keys))
+        assert max(bf_distances[0]) <= radius
+        # Verify that the next closest vector that hasn't returned is not within the range
+        assert results[res_num]["dist"] > radius
+
+        # Expect zero results for radius==0
+        bf_labels, bf_distances = bfindex.range_query(self.query_data[0], radius=0)
+        assert len(bf_labels[0]) == 0
+
+    def multi_value(self, create_data_func, num_per_label = 5):
+        # num_labels=5_000
+        # num_per_label=20
+        # num_elements = num_labels * num_per_label
+        num_labels = self.num_elements // num_per_label
+        k = 10
+
+        data = create_data_func((num_labels, self.dim), self.rng)
+
+        index = self.create_index(is_multi=True)
+
+        vectors = []
+        for i, vector in enumerate(data):
+            for _ in range(num_per_label):
+                index.add_vector(vector, i)
+                vectors.append((i, vector))
+
+        dists = {}
+        for key, vec in vectors:
+            # Setting or updating the score for each label.
+            # If it's the first time we calculate a score for a label dists.get(key, dist)
+            # will return dist so we will choose the actual score the first time.
+            dist = spatial.distance.sqeuclidean(self.query_data[0], vec)
+            dists[key] = min(dist, dists.get(key, dist))
+
+        dists = list(dists.items())
+        dists = sorted(dists, key=lambda pair: pair[1])[:k]
+        keys = [key for key, _ in dists[:k]]
+        dists = [dist for _, dist in dists[:k]]
+
+        start = time.time()
+        bf_labels, bf_distances = index.knn_query(self.query_data[0], k=10)
+        end = time.time()
+
+        print(f'\nlookup time for {self.num_elements} vectors ({num_labels} labels and {num_per_label} vectors per label) with dim={self.dim} took {end - start} seconds')
+
+        assert_allclose(bf_labels, [keys],  rtol=1e-5, atol=0)
+        assert_allclose(bf_distances, [dists],  rtol=1e-5, atol=0)
+
+class TestINT8(GeneralTest):
+
+    GeneralTest.data_type = VecSimType_INT8
+
+    #### Create vectors
+    GeneralTest.vectors_data = create_int8_vectors((GeneralTest.num_elements, GeneralTest.dim), GeneralTest.rng)
+
+    #### Create queries
+    GeneralTest.query_data = create_int8_vectors((GeneralTest.num_queries, GeneralTest.dim), GeneralTest.rng)
+
+    def test_Cosine(self):
+
+        index = self.create_index(VecSimMetric_Cosine)
+        label_to_vec_list = self.create_add_vectors(index)
+
+        self.knn(index, label_to_vec_list, fp32_expand_and_calc_cosine_dist)
+
+    def test_range_query(self):
+        self.range_query(fp32_expand_and_calc_cosine_dist)
+
+    def test_multi_value(self):
+        self.multi_value(create_int8_vectors)
diff --git a/tests/flow/test_hnsw.py b/tests/flow/test_hnsw.py
index 9370a651d..f5d6e3fb6 100644
--- a/tests/flow/test_hnsw.py
+++ b/tests/flow/test_hnsw.py
@@ -860,3 +860,270 @@ def test_hnsw_float16_multi_value():
     recall = float(correct) / (k * num_queries)
     print("\nrecall is: \n", recall)
     assert (recall > 0.9)
+
+'''
+A Class to run common tests for HNSW index
+
+The following tests will *automatically* run if the class is inherited:
+* test_serialization - single L2 index
+* test_L2 - single L2 index
+* test_batch_iterator - single L2 index
+
+The following tests should be *explicitly* called from a method prefixed with test_*
+# range_query(dist_func) - single cosine index
+
+@param create_data_func is a function expects num_elements, dim, [and optional np.random.Generator] as input and
+returns a (num_elements, dim) numpy array of vectors
+uses multi L2 index
+# multi_value(create_data_func, num_per_label) -
+'''
+class GeneralTest():
+    dim = 50
+    num_elements = 10_000
+    num_queries = 10
+    M = 32
+    efConstruction = 200
+    efRuntime = 50
+    data_type = None
+
+    rng = np.random.default_rng(seed=42)
+    data = None
+    query_data = None
+
+    # single HNSW index with L2 metric
+    cache_hnsw_index_L2_single = None
+    cached_label_to_vec_list = None
+
+    @classmethod
+    def create_index(cls, metric = VecSimMetric_L2, is_multi=False):
+        assert cls.data_type is not None
+        hnsw_index = create_hnsw_index(cls.dim, 0, metric, cls.data_type, cls.efConstruction, cls.M, cls.efRuntime, is_multi=is_multi)
+        return hnsw_index
+
+    @classmethod
+    def create_add_vectors(cls, hnsw_index):
+        assert cls.data is not None
+        return create_add_vectors(hnsw_index, cls.data)
+
+    @classmethod
+    def get_cached_single_L2_index(cls):
+        if cls.cache_hnsw_index_L2_single is None:
+            cls.cache_hnsw_index_L2_single = cls.create_index()
+            cls.cached_label_to_vec_list = cls.create_add_vectors(cls.cache_hnsw_index_L2_single)
+        return cls.cache_hnsw_index_L2_single, cls.cached_label_to_vec_list
+
+    @staticmethod
+    def compute_correct(res_labels, res_dist, gt_labels, gt_dist_label_list):
+        correct = 0
+        for i, label in enumerate(res_labels):
+            for j, correct_label in enumerate(gt_labels):
+                if label == correct_label:
+                    correct += 1
+                    assert math.isclose(res_dist[i], gt_dist_label_list[j]["dist"], rel_tol=1e-5)
+                    break
+
+        return correct
+
+    @classmethod
+    def knn(cls, hnsw_index, label_vec_list, dist_func):
+        k = 10
+
+        correct = 0
+        for target_vector in cls.query_data:
+            hnswlib_labels, hnswlib_distances = hnsw_index.knn_query(target_vector, k)
+            results, keys = get_ground_truth_results(dist_func, target_vector, label_vec_list, k)
+
+            correct += cls.compute_correct(hnswlib_labels[0], hnswlib_distances[0], keys, results)
+
+        # Measure recall
+        recall = recall = float(correct) / (k * cls.num_queries)
+        print("\nrecall is: \n", recall)
+        assert (recall > 0.9)
+
+    def test_serialization(self):
+        assert self.data_type is not None
+        hnsw_index, label_to_vec_list = self.get_cached_single_L2_index()
+        k = 10
+
+        correct = 0
+        correct_labels = []  # cache these
+        for target_vector in self.query_data:
+            hnswlib_labels, hnswlib_distances = hnsw_index.knn_query(target_vector, k)
+            results, keys = get_ground_truth_results(spatial.distance.sqeuclidean, target_vector, label_to_vec_list, k)
+
+            correct_labels.append(keys)
+            correct += self.compute_correct(hnswlib_labels[0], hnswlib_distances[0], keys, results)
+
+        # Measure recall
+        recall = float(correct) / (k * self.num_queries)
+        print("\nrecall is: \n", recall)
+
+        # Persist, delete and restore index.
+        file_name = os.getcwd() + "/dump"
+        hnsw_index.save_index(file_name)
+
+        new_hnsw_index = HNSWIndex(file_name)
+        os.remove(file_name)
+        assert new_hnsw_index.index_size() == self.num_elements
+        assert new_hnsw_index.index_type() == self.data_type
+        assert new_hnsw_index.check_integrity()
+
+        # Check recall
+        correct_after = 0
+        for i, target_vector in enumerate(self.query_data):
+            hnswlib_labels, _ = new_hnsw_index.knn_query(target_vector, k)
+            correct_labels_cur = correct_labels[i]
+            for label in hnswlib_labels[0]:
+                for correct_label in correct_labels_cur:
+                    if label == correct_label:
+                        correct_after += 1
+                        break
+
+        # Compare recall after reloading the index
+        recall_after = float(correct_after) / (k * self.num_queries)
+        print("\nrecall after is: \n", recall_after)
+        assert recall == recall_after
+
+    def test_L2(self):
+        hnsw_index, label_to_vec_list = self.get_cached_single_L2_index()
+        self.knn(hnsw_index, label_to_vec_list, spatial.distance.sqeuclidean)
+
+    def test_batch_iterator(self):
+        hnsw_index, _ = self.get_cached_single_L2_index()
+
+        batch_size = 10
+
+        efRuntime = 180
+        hnsw_index.set_ef(efRuntime)
+
+        batch_iterator = hnsw_index.create_batch_iterator(self.query_data)
+        labels_first_batch, distances_first_batch = batch_iterator.get_next_results(batch_size, BY_ID)
+        for i, _ in enumerate(labels_first_batch[0][:-1]):
+            # Assert sorting by id
+            assert (labels_first_batch[0][i] < labels_first_batch[0][i + 1])
+
+        _, distances_second_batch = batch_iterator.get_next_results(batch_size, BY_SCORE)
+        should_have_return_in_first_batch = []
+        for i, dist in enumerate(distances_second_batch[0][:-1]):
+            # Assert sorting by score
+            assert (distances_second_batch[0][i] < distances_second_batch[0][i + 1])
+            # Assert that every distance in the second batch is higher than any distance of the first batch
+            if len(distances_first_batch[0][np.where(distances_first_batch[0] > dist)]) != 0:
+                should_have_return_in_first_batch.append(dist)
+        assert (len(should_have_return_in_first_batch) <= 2)
+
+        # Verify that runtime args are sent properly to the batch iterator.
+        query_params = VecSimQueryParams()
+        query_params.hnswRuntimeParams.efRuntime = 5
+        batch_iterator_new = hnsw_index.create_batch_iterator(self.query_data, query_params)
+        _, distances_first_batch_new = batch_iterator_new.get_next_results(batch_size, BY_ID)
+        # Verify that accuracy is worse with the new lower ef_runtime.
+        assert (sum(distances_first_batch[0]) < sum(distances_first_batch_new[0]))
+
+        # reset efRuntime
+        hnsw_index.set_ef(self.efRuntime)
+
+    ##### Should be explicitly called #####
+
+    def range_query(self, dist_func):
+        hnsw_index = self.create_index(VecSimMetric_Cosine)
+        label_to_vec_list = self.create_add_vectors(hnsw_index)
+        radius = 0.7
+        recalls = {}
+
+        for epsilon_rt in [0.001, 0.01, 0.1]:
+            query_params = VecSimQueryParams()
+            query_params.hnswRuntimeParams.epsilon = epsilon_rt
+            start = time.time()
+            hnsw_labels, hnsw_distances = hnsw_index.range_query(self.query_data[0], radius=radius, query_param=query_params)
+            end = time.time()
+            res_num = len(hnsw_labels[0])
+
+            dists = sorted([(key, dist_func(self.query_data[0], vec)) for key, vec in label_to_vec_list])
+            actual_results = [(key, dist) for key, dist in dists if dist <= radius]
+
+            print(
+                f'\nlookup time for {self.num_elements} vectors with dim={self.dim} took {end - start} seconds with epsilon={epsilon_rt},'
+                f' got {res_num} results, which are {res_num / len(actual_results)} of the entire results in the range.')
+
+            # Compare the number of vectors that are actually within the range to the returned results.
+            assert np.all(np.isin(hnsw_labels, np.array([label for label, _ in actual_results])))
+
+            assert max(hnsw_distances[0]) <= radius
+            recall = res_num / len(actual_results)
+            assert recall > 0.9
+            recalls[epsilon_rt] = res_num / len(actual_results)
+
+        # Expect higher recalls for higher epsilon values.
+        assert recalls[0.001] <= recalls[0.01] <= recalls[0.1]
+
+        # Expect zero results for radius==0
+        hnsw_labels, hnsw_distances = hnsw_index.range_query(self.query_data[0], radius=0)
+        assert len(hnsw_labels[0]) == 0
+
+    def multi_value(self, create_data_func, num_per_label = 5):
+        num_per_label = 5
+        num_labels = self.num_elements // num_per_label
+        k = 10
+
+        data = create_data_func((num_labels, self.dim), self.rng)
+
+        hnsw_index = self.create_index(is_multi=True)
+
+        vectors = []
+        for i, vector in enumerate(data):
+            for _ in range(num_per_label):
+                hnsw_index.add_vector(vector, i)
+                vectors.append((i, vector))
+
+        correct = 0
+        for target_vector in self.query_data:
+            hnswlib_labels, hnswlib_distances = hnsw_index.knn_query(target_vector, k)
+            assert (len(hnswlib_labels[0]) == len(np.unique(hnswlib_labels[0])))
+
+            # sort distances of every vector from the target vector and get actual k nearest vectors
+            dists = {}
+            for key, vec in vectors:
+                # Setting or updating the score for each label.
+                # If it's the first time we calculate a score for a label dists.get(key, dist)
+                # will return dist so we will choose the actual score the first time.
+                dist = spatial.distance.sqeuclidean(target_vector, vec)
+                dists[key] = min(dist, dists.get(key, dist))
+
+            dists = list(dists.items())
+            dists = sorted(dists, key=lambda pair: pair[1])[:k]
+            keys = [key for key, _ in dists]
+
+            for i, label in enumerate(hnswlib_labels[0]):
+                for j, correct_label in enumerate(keys):
+                    if label == correct_label:
+                        correct += 1
+                        assert math.isclose(hnswlib_distances[0][i], dists[j][1], rel_tol=1e-5)
+                        break
+
+        # Measure recall
+        recall = float(correct) / (k * self.num_queries)
+        print("\nrecall is: \n", recall)
+        assert (recall > 0.9)
+
+class TestINT8(GeneralTest):
+
+    GeneralTest.data_type = VecSimType_INT8
+
+    #### Create vectors
+    GeneralTest.data = create_int8_vectors((GeneralTest.num_elements, GeneralTest.dim), GeneralTest.rng)
+
+    #### Create queries
+    GeneralTest.query_data = create_int8_vectors((GeneralTest.num_queries, GeneralTest.dim), GeneralTest.rng)
+
+    def test_Cosine(self):
+        hnsw_index = self.create_index(VecSimMetric_Cosine)
+        label_to_vec_list = self.create_add_vectors(hnsw_index)
+
+        self.knn(hnsw_index, label_to_vec_list, fp32_expand_and_calc_cosine_dist)
+
+    def test_range_query(self):
+        self.range_query(fp32_expand_and_calc_cosine_dist)
+
+    def test_multi_value(self):
+        self.multi_value(create_int8_vectors)
diff --git a/tests/flow/test_hnsw_tiered.py b/tests/flow/test_hnsw_tiered.py
index 569c52493..73ac88c45 100644
--- a/tests/flow/test_hnsw_tiered.py
+++ b/tests/flow/test_hnsw_tiered.py
@@ -12,7 +12,20 @@ def create_tiered_hnsw_params(swap_job_threshold = 0):
     return tiered_hnsw_params
 
 class IndexCtx:
-    array_conversion_func = {VecSimType_FLOAT32: np.float32, VecSimType_BFLOAT16: vec_to_bfloat16, VecSimType_FLOAT16: vec_to_float16}
+    array_conversion_func = {
+        VecSimType_FLOAT32: np.float32,
+        VecSimType_BFLOAT16: vec_to_bfloat16,
+        VecSimType_FLOAT16: vec_to_float16,
+    }
+
+    type_to_dtype = {
+        VecSimType_FLOAT32: np.float32,
+        VecSimType_FLOAT64: np.float64,
+        VecSimType_BFLOAT16: bfloat16,
+        VecSimType_FLOAT16: np.float16,
+        VecSimType_INT8: np.int8
+    }
+
     def __init__(self, data_size=10000,
                  dim=16,
                  M=16,
@@ -23,7 +36,8 @@ def __init__(self, data_size=10000,
                  is_multi=False,
                  num_per_label=1,
                  swap_job_threshold=0,
-                 flat_buffer_size=1024):
+                 flat_buffer_size=1024,
+                 create_data_func = None):
         self.num_vectors = data_size
         self.dim = dim
         self.M = M
@@ -38,12 +52,17 @@ def __init__(self, data_size=10000,
         self.num_labels = int(self.num_vectors/num_per_label)
 
         self.rng = np.random.default_rng(seed=47)
+        self.create_data_func = self.rng.random if create_data_func is None else create_data_func
 
         data_shape = (self.num_labels, num_per_label, self.dim) if is_multi else (self.num_labels, self.dim)
-        data = self.rng.random(data_shape)
-        if self.data_type != VecSimType_FLOAT64:
-            self.data = self.array_conversion_func[self.data_type](data)
-            print("data type = ", self.data.dtype)
+
+
+        self.data = self.create_data_func(data_shape)
+        if self.data_type in self.array_conversion_func.keys():
+            self.data = self.array_conversion_func[self.data_type](self.data)
+        print("data type = ", self.data.dtype)
+        assert self.data.dtype == self.type_to_dtype[self.data_type]
+
         self.hnsw_params = create_hnsw_params(dim = self.dim,
                                               num_elements = self.num_vectors,
                                               metric = self.metric,
@@ -102,18 +121,23 @@ def init_and_populate_hnsw_index(self):
         return hnsw_index
 
     def generate_queries(self, num_queries):
-        queries = self.rng.random((num_queries, self.dim))
-        if self.data_type != VecSimType_FLOAT64:
+        queries = self.create_data_func((num_queries, self.dim))
+        if self.data_type in self.array_conversion_func.keys():
             queries = self.array_conversion_func[self.data_type](queries)
         return queries
 
     def get_vectors_memory_size(self):
-        memory_size = {VecSimType_FLOAT32:4, VecSimType_FLOAT64:8, VecSimType_BFLOAT16:2, VecSimType_FLOAT16:2}
+        memory_size = {
+            VecSimType_FLOAT32: 4,
+            VecSimType_FLOAT64: 8,
+            VecSimType_BFLOAT16: 2,
+            VecSimType_FLOAT16: 2,
+            VecSimType_INT8: 1
+        }
         return bytes_to_mega(self.num_vectors * self.dim * memory_size[self.data_type])
 
-
-def create_tiered_index(is_multi: bool, num_per_label=1, data_type=VecSimType_FLOAT32):
-    indices_ctx = IndexCtx(data_size=50000, is_multi=is_multi, num_per_label=num_per_label, data_type=data_type)
+def create_tiered_index(is_multi: bool, num_per_label=1, data_type=VecSimType_FLOAT32, create_data_func=None):
+    indices_ctx = IndexCtx(data_size=50000, is_multi=is_multi, num_per_label=num_per_label, data_type=data_type, create_data_func=create_data_func)
     num_elements = indices_ctx.num_labels
 
     index = indices_ctx.tiered_index
@@ -152,10 +176,10 @@ def create_tiered_index(is_multi: bool, num_per_label=1, data_type=VecSimType_FL
     print(f"with {threads_num} threads, insertion runtime is {round_(execution_time_ratio)} times better \n")
 
 
-def search_insert(is_multi: bool, num_per_label=1, data_type=VecSimType_FLOAT32):
+def search_insert(is_multi: bool, num_per_label=1, data_type=VecSimType_FLOAT32, create_data_func=None):
     data_size = 100000
     indices_ctx = IndexCtx(data_size=data_size, is_multi=is_multi, num_per_label=num_per_label,
-                           flat_buffer_size=data_size, M=64, data_type=data_type)
+                           flat_buffer_size=data_size, M=64, data_type=data_type, create_data_func=create_data_func)
     index = indices_ctx.tiered_index
 
     num_labels = indices_ctx.num_labels
@@ -226,13 +250,17 @@ def test_create_multi():
     create_tiered_index(is_multi=True, num_per_label=5)
 
 def test_create_bf16():
-    print("Test create multi label tiered hnsw index")
+    print("Test create BFLOAT16 tiered hnsw index")
     create_tiered_index(is_multi=False, data_type=VecSimType_BFLOAT16)
 
 def test_create_fp16():
-    print("Test create multi label tiered hnsw index")
+    print("Test create FLOAT16 tiered hnsw index")
     create_tiered_index(is_multi=False, data_type=VecSimType_FLOAT16)
 
+def test_create_int8():
+    print("Test create INT8 tiered hnsw index")
+    create_tiered_index(is_multi=False, data_type=VecSimType_INT8, create_data_func=create_int8_vectors)
+
 def test_search_insert():
     print(f"\nStart insert & search test")
     search_insert(is_multi=False)
@@ -245,6 +273,10 @@ def test_search_insert_fp16():
     print(f"\nStart insert & search test")
     search_insert(is_multi=False, data_type=VecSimType_FLOAT16)
 
+def test_search_insert_int8():
+    print(f"\nStart insert & search test")
+    search_insert(is_multi=False, data_type=VecSimType_INT8, create_data_func=create_int8_vectors)
+
 def test_search_insert_multi_index():
     print(f"\nStart insert & search test for multi index")
 
diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt
index b16bddac6..5a4cc5108 100644
--- a/tests/unit/CMakeLists.txt
+++ b/tests/unit/CMakeLists.txt
@@ -30,15 +30,17 @@ endif()
 
 include(${root}/cmake/x86_64InstructionFlags.cmake)
 
-add_executable(test_hnsw ../utils/mock_thread_pool.cpp test_hnsw.cpp test_hnsw_multi.cpp test_hnsw_tiered.cpp test_utils.cpp)
-add_executable(test_hnsw_parallel test_hnsw_parallel.cpp test_utils.cpp)
-add_executable(test_bruteforce test_bruteforce.cpp test_bruteforce_multi.cpp test_utils.cpp)
-add_executable(test_allocator test_allocator.cpp test_utils.cpp)
+add_executable(test_hnsw ../utils/mock_thread_pool.cpp test_hnsw.cpp test_hnsw_multi.cpp test_hnsw_tiered.cpp unit_test_utils.cpp)
+add_executable(test_hnsw_parallel test_hnsw_parallel.cpp ../utils/mock_thread_pool.cpp unit_test_utils.cpp)
+add_executable(test_bruteforce test_bruteforce.cpp test_bruteforce_multi.cpp ../utils/mock_thread_pool.cpp unit_test_utils.cpp)
+add_executable(test_allocator test_allocator.cpp ../utils/mock_thread_pool.cpp unit_test_utils.cpp)
 add_executable(test_spaces test_spaces.cpp)
 add_executable(test_types test_types.cpp)
-add_executable(test_common ../utils/mock_thread_pool.cpp test_utils.cpp test_common.cpp)
-add_executable(test_bf16 ../utils/mock_thread_pool.cpp test_bf16.cpp test_utils.cpp)
-add_executable(test_fp16 ../utils/mock_thread_pool.cpp test_fp16.cpp test_utils.cpp)
+add_executable(test_common ../utils/mock_thread_pool.cpp test_common.cpp unit_test_utils.cpp)
+add_executable(test_components test_components.cpp ../utils/mock_thread_pool.cpp unit_test_utils.cpp)
+add_executable(test_bf16 ../utils/mock_thread_pool.cpp test_bf16.cpp unit_test_utils.cpp)
+add_executable(test_fp16 ../utils/mock_thread_pool.cpp test_fp16.cpp unit_test_utils.cpp)
+add_executable(test_int8 ../utils/mock_thread_pool.cpp test_int8.cpp unit_test_utils.cpp)
 
 target_link_libraries(test_hnsw PUBLIC gtest_main VectorSimilarity)
 target_link_libraries(test_hnsw_parallel PUBLIC gtest_main VectorSimilarity)
@@ -46,9 +48,11 @@ target_link_libraries(test_bruteforce PUBLIC gtest_main VectorSimilarity)
 target_link_libraries(test_allocator PUBLIC gtest_main VectorSimilarity)
 target_link_libraries(test_spaces PUBLIC gtest_main VectorSimilarity)
 target_link_libraries(test_common PUBLIC gtest_main VectorSimilarity)
+target_link_libraries(test_components PUBLIC gtest_main VectorSimilarity)
 target_link_libraries(test_types PUBLIC gtest_main VectorSimilarity)
 target_link_libraries(test_bf16 PUBLIC gtest_main VectorSimilarity)
 target_link_libraries(test_fp16 PUBLIC gtest_main VectorSimilarity)
+target_link_libraries(test_int8 PUBLIC gtest_main VectorSimilarity)
 
 include(GoogleTest)
 
@@ -58,6 +62,8 @@ gtest_discover_tests(test_bruteforce)
 gtest_discover_tests(test_allocator)
 gtest_discover_tests(test_spaces)
 gtest_discover_tests(test_common)
+gtest_discover_tests(test_components)
 gtest_discover_tests(test_types)
 gtest_discover_tests(test_bf16 TEST_PREFIX BF16UNIT_)
 gtest_discover_tests(test_fp16 TEST_PREFIX FP16UNIT_)
+gtest_discover_tests(test_int8 TEST_PREFIX INT8UNIT_)
diff --git a/tests/unit/test_allocator.cpp b/tests/unit/test_allocator.cpp
index 42689b6eb..81642d657 100644
--- a/tests/unit/test_allocator.cpp
+++ b/tests/unit/test_allocator.cpp
@@ -10,7 +10,7 @@
 #include "VecSim/memory/vecsim_base.h"
 #include "VecSim/algorithms/brute_force/brute_force_single.h"
 #include "VecSim/algorithms/hnsw/hnsw_single.h"
-#include "test_utils.h"
+#include "unit_test_utils.h"
 #include "VecSim/utils/serializer.h"
 #include "VecSim/index_factories/hnsw_factory.h"
 
@@ -83,7 +83,7 @@ TEST_F(AllocatorTest, test_nested_object) {
 template <typename index_type_t>
 class IndexAllocatorTest : public ::testing::Test {};
 
-// DataTypeSet, TEST_DATA_T and TEST_DIST_T are defined in test_utils.h
+// DataTypeSet, TEST_DATA_T and TEST_DIST_T are defined in unit_test_utils.h
 
 TYPED_TEST_SUITE(IndexAllocatorTest, DataTypeSet);
 
diff --git a/tests/unit/test_bf16.cpp b/tests/unit/test_bf16.cpp
index 921c80c35..95e12c98b 100644
--- a/tests/unit/test_bf16.cpp
+++ b/tests/unit/test_bf16.cpp
@@ -2,7 +2,7 @@
 #include "VecSim/vec_sim.h"
 #include "VecSim/algorithms/hnsw/hnsw_single.h"
 #include "VecSim/index_factories/hnsw_factory.h"
-#include "test_utils.h"
+#include "unit_test_utils.h"
 #include "VecSim/utils/serializer.h"
 #include "mock_thread_pool.h"
 #include "VecSim/query_result_definitions.h"
diff --git a/tests/unit/test_bruteforce.cpp b/tests/unit/test_bruteforce.cpp
index c56415e3d..b3d5b1192 100644
--- a/tests/unit/test_bruteforce.cpp
+++ b/tests/unit/test_bruteforce.cpp
@@ -6,7 +6,7 @@
 
 #include "gtest/gtest.h"
 #include "VecSim/vec_sim.h"
-#include "test_utils.h"
+#include "unit_test_utils.h"
 #include "VecSim/algorithms/brute_force/brute_force.h"
 #include "VecSim/algorithms/brute_force/brute_force_single.h"
 #include "cpu_features_macros.h"
@@ -32,7 +32,7 @@ class BruteForceTest : public ::testing::Test {
     }
 };
 
-// DataTypeSet, TEST_DATA_T and TEST_DIST_T are defined in test_utils.h
+// DataTypeSet, TEST_DATA_T and TEST_DIST_T are defined in unit_test_utils.h
 
 TYPED_TEST_SUITE(BruteForceTest, DataTypeSet);
 
diff --git a/tests/unit/test_bruteforce_multi.cpp b/tests/unit/test_bruteforce_multi.cpp
index ef9cfc636..55aadedd4 100644
--- a/tests/unit/test_bruteforce_multi.cpp
+++ b/tests/unit/test_bruteforce_multi.cpp
@@ -6,7 +6,7 @@
 
 #include "gtest/gtest.h"
 #include "VecSim/vec_sim.h"
-#include "test_utils.h"
+#include "unit_test_utils.h"
 #include "VecSim/algorithms/brute_force/brute_force_multi.h"
 #include <cmath>
 
@@ -27,7 +27,7 @@ class BruteForceMultiTest : public ::testing::Test {
     }
 };
 
-// DataTypeSet, TEST_DATA_T and TEST_DIST_T are defined in test_utils.h
+// DataTypeSet, TEST_DATA_T and TEST_DIST_T are defined in unit_test_utils.h
 
 TYPED_TEST_SUITE(BruteForceMultiTest, DataTypeSet);
 
diff --git a/tests/unit/test_common.cpp b/tests/unit/test_common.cpp
index 58df46fba..e0ccd8d4c 100644
--- a/tests/unit/test_common.cpp
+++ b/tests/unit/test_common.cpp
@@ -10,11 +10,12 @@
 #include "VecSim/query_result_definitions.h"
 #include "VecSim/utils/updatable_heap.h"
 #include "VecSim/utils/vec_utils.h"
-#include "test_utils.h"
+#include "unit_test_utils.h"
 #include "VecSim/containers/vecsim_results_container.h"
 #include "VecSim/algorithms/hnsw/hnsw.h"
 #include "VecSim/index_factories/hnsw_factory.h"
 #include "mock_thread_pool.h"
+#include "tests_utils.h"
 #include "VecSim/index_factories/tiered_factory.h"
 #include "VecSim/spaces/spaces.h"
 #include "VecSim/types/bfloat16.h"
@@ -32,7 +33,7 @@ using float16 = vecsim_types::float16;
 template <typename index_type_t>
 class CommonIndexTest : public ::testing::Test {};
 
-// DataTypeSet are defined in test_utils.h
+// DataTypeSet are defined in unit_test_utils.h
 
 TYPED_TEST_SUITE(CommonIndexTest, DataTypeSet);
 
@@ -625,579 +626,178 @@ TEST(CommonAPITest, NormalizeFloat16) {
     ASSERT_NEAR(1.0, norm, 0.001);
 }
 
-class IndexCalculatorTest : public ::testing::Test {};
-
-namespace dummyCalcultor {
-
-using DummyType = int;
-using dummy_dist_func_t = DummyType (*)(int);
+TEST(CommonAPITest, NormalizeInt8) {
+    size_t dim = 20;
+    int8_t v[dim + sizeof(float)];
 
-int dummyDistFunc(int value) { return value; }
+    test_utils::populate_int8_vec(v, dim);
 
-template <typename DistType>
-class DistanceCalculatorDummy : public DistanceCalculatorInterface<DistType, dummy_dist_func_t> {
-public:
-    DistanceCalculatorDummy(std::shared_ptr<VecSimAllocator> allocator, dummy_dist_func_t dist_func)
-        : DistanceCalculatorInterface<DistType, dummy_dist_func_t>(allocator, dist_func) {}
+    VecSim_Normalize(v, dim, VecSimType_INT8);
 
-    virtual DistType calcDistance(const void *v1, const void *v2, size_t dim) const {
-        return this->dist_func(7);
+    float res_norm = *(reinterpret_cast<float *>(v + dim));
+    // Check that the normalized vector norm is 1.
+    float norm = 0;
+    for (size_t i = 0; i < dim; ++i) {
+        float val = v[i] / res_norm;
+        norm += val * val;
     }
-};
-
-} // namespace dummyCalcultor
-
-TEST(IndexCalculatorTest, TestIndexCalculator) {
-
-    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
-
-    // Test computer with a distance function signature different from dim(v1, v2, dim()).
-    using namespace dummyCalcultor;
-    auto distance_calculator = DistanceCalculatorDummy<DummyType>(allocator, dummyDistFunc);
 
-    ASSERT_EQ(distance_calculator.calcDistance(nullptr, nullptr, 0), 7);
+    ASSERT_FLOAT_EQ(norm, 1.0);
 }
 
-class PreprocessorsTest : public ::testing::Test {};
-
-namespace dummyPreprocessors {
-
-using DummyType = int;
-
-enum pp_mode { STORAGE_ONLY, QUERY_ONLY, BOTH, EMPTY };
-
-// Dummy storage preprocessor
-template <typename DataType>
-class DummyStoragePreprocessor : public PreprocessorInterface {
-public:
-    DummyStoragePreprocessor(std::shared_ptr<VecSimAllocator> allocator, int value_to_add_storage,
-                             int value_to_add_query = 0)
-        : PreprocessorInterface(allocator), value_to_add_storage(value_to_add_storage),
-          value_to_add_query(value_to_add_query) {
-        if (!value_to_add_query)
-            value_to_add_query = value_to_add_storage;
-    }
-
-    void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
-                    size_t processed_bytes_count, unsigned char alignment) const override {
-
-        this->preprocessForStorage(original_blob, storage_blob, processed_bytes_count);
-    }
-
-    void preprocessForStorage(const void *original_blob, void *&blob,
-                              size_t processed_bytes_count) const override {
-        // If the blob was not allocated yet, allocate it.
-        if (blob == nullptr) {
-            blob = this->allocator->allocate(processed_bytes_count);
-            memcpy(blob, original_blob, processed_bytes_count);
-        }
-        static_cast<DataType *>(blob)[0] += value_to_add_storage;
-    }
-    void preprocessQueryInPlace(void *blob, size_t processed_bytes_count,
-                                unsigned char alignment) const override {}
-    void preprocessQuery(const void *original_blob, void *&blob, size_t processed_bytes_count,
-                         unsigned char alignment) const override {
-        /* do nothing*/
-    }
-
-private:
-    int value_to_add_storage;
-    int value_to_add_query;
-};
-
-// Dummy query preprocessor
-template <typename DataType>
-class DummyQueryPreprocessor : public PreprocessorInterface {
-public:
-    DummyQueryPreprocessor(std::shared_ptr<VecSimAllocator> allocator, int value_to_add_storage,
-                           int _value_to_add_query = 0)
-        : PreprocessorInterface(allocator), value_to_add_storage(value_to_add_storage),
-          value_to_add_query(_value_to_add_query) {
-        if (!_value_to_add_query)
-            value_to_add_query = value_to_add_storage;
-    }
-
-    void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
-                    size_t processed_bytes_count, unsigned char alignment) const override {
-        this->preprocessQuery(original_blob, query_blob, processed_bytes_count, alignment);
-    }
-
-    void preprocessForStorage(const void *original_blob, void *&blob,
-                              size_t processed_bytes_count) const override {
-        /* do nothing*/
-    }
-    void preprocessQueryInPlace(void *blob, size_t processed_bytes_count,
-                                unsigned char alignment) const override {
-        static_cast<DataType *>(blob)[0] += value_to_add_query;
-    }
-    void preprocessQuery(const void *original_blob, void *&blob, size_t processed_bytes_count,
-                         unsigned char alignment) const override {
-        // If the blob was not allocated yet, allocate it.
-        if (blob == nullptr) {
-            blob = this->allocator->allocate_aligned(processed_bytes_count, alignment);
-            memcpy(blob, original_blob, processed_bytes_count);
-        }
-        static_cast<DataType *>(blob)[0] += value_to_add_query;
-    }
-
-private:
-    int value_to_add_storage;
-    int value_to_add_query;
-};
-
-// Dummy mixed preprocessor (precesses the blobs  differently)
-template <typename DataType>
-class DummyMixedPreprocessor : public PreprocessorInterface {
-public:
-    DummyMixedPreprocessor(std::shared_ptr<VecSimAllocator> allocator, int value_to_add_storage,
-                           int value_to_add_query)
-        : PreprocessorInterface(allocator), value_to_add_storage(value_to_add_storage),
-          value_to_add_query(value_to_add_query) {}
-    void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
-                    size_t processed_bytes_count, unsigned char alignment) const override {
-
-        // One blob was already allocated by a previous preprocessor(s) that process both blobs the
-        // same. The blobs are pointing to the same memory, we need to allocate another memory slot
-        // to split them.
-        if ((storage_blob == query_blob) && (query_blob != nullptr)) {
-            storage_blob = this->allocator->allocate(processed_bytes_count);
-            memcpy(storage_blob, query_blob, processed_bytes_count);
-        }
+class CommonTypeMetricTests : public testing::TestWithParam<std::tuple<VecSimType, VecSimMetric>> {
+protected:
+    template <typename algo_params>
+    void test_datasize();
 
-        // Either both are nullptr or they are pointing to different memory slots. Both cases are
-        // handled by the designated functions.
-        this->preprocessForStorage(original_blob, storage_blob, processed_bytes_count);
-        this->preprocessQuery(original_blob, query_blob, processed_bytes_count, alignment);
-    }
+    template <typename algo_params>
+    void test_initial_size_estimation();
 
-    void preprocessForStorage(const void *original_blob, void *&blob,
-                              size_t processed_bytes_count) const override {
-        // If the blob was not allocated yet, allocate it.
-        if (blob == nullptr) {
-            blob = this->allocator->allocate(processed_bytes_count);
-            memcpy(blob, original_blob, processed_bytes_count);
-        }
-        static_cast<DataType *>(blob)[0] += value_to_add_storage;
-    }
-    void preprocessQueryInPlace(void *blob, size_t processed_bytes_count,
-                                unsigned char alignment) const override {}
-    void preprocessQuery(const void *original_blob, void *&blob, size_t processed_bytes_count,
-                         unsigned char alignment) const override {
-        // If the blob was not allocated yet, allocate it.
-        if (blob == nullptr) {
-            blob = this->allocator->allocate_aligned(processed_bytes_count, alignment);
-            memcpy(blob, original_blob, processed_bytes_count);
-        }
-        static_cast<DataType *>(blob)[0] += value_to_add_query;
-    }
+    virtual void TearDown() { VecSimIndex_Free(index); }
 
-private:
-    int value_to_add_storage;
-    int value_to_add_query;
+    VecSimIndex *index;
 };
-} // namespace dummyPreprocessors
-
-TEST(PreprocessorsTest, PreprocessorsTestBasicAlignmentTest) {
-    using namespace dummyPreprocessors;
-    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
-
-    unsigned char alignment = 5;
-    auto preprocessor = PreprocessorsContainerAbstract(allocator, alignment);
-    const int original_blob[4] = {1, 1, 1, 1};
-    size_t processed_bytes_count = sizeof(original_blob);
 
-    {
-        auto aligned_query = preprocessor.preprocessQuery(original_blob, processed_bytes_count);
-        unsigned char address_alignment = (uintptr_t)(aligned_query.get()) % alignment;
-        ASSERT_EQ(address_alignment, 0);
-    }
-
-    // The index computer is responsible for releasing the distance calculator.
+template <typename algo_params>
+void CommonTypeMetricTests::test_datasize() {
+    size_t dim = 4;
+    VecSimType type = std::get<0>(GetParam());
+    VecSimMetric metric = std::get<1>(GetParam());
+    algo_params params = {.dim = dim, .metric = metric};
+    this->index = test_utils::CreateNewIndex(params, type);
+    size_t actual = test_utils::CalcVectorDataSize(index, type);
+    size_t expected = dim * VecSimType_sizeof(type);
+    if (type == VecSimType_INT8 && metric == VecSimMetric_Cosine) {
+        expected += sizeof(float);
+    }
+    ASSERT_EQ(actual, expected);
 }
 
-template <unsigned char alignment>
-void MultiPPContainerEmpty() {
-    using namespace dummyPreprocessors;
-    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
-    constexpr size_t dim = 4;
-    const int original_blob[dim] = {1, 2, 3, 4};
-    const int original_blob_cpy[dim] = {1, 2, 3, 4};
-
-    constexpr size_t n_preprocessors = 3;
-
-    auto multiPPContainer =
-        MultiPreprocessorsContainer<DummyType, n_preprocessors>(allocator, alignment);
-
-    {
-        ProcessedBlobs processed_blobs =
-            multiPPContainer.preprocess(original_blob, sizeof(original_blob));
-        // Original blob should not be changed
-        CompareVectors(original_blob, original_blob_cpy, dim);
-
-        const void *storage_blob = processed_blobs.getStorageBlob();
-        const void *query_blob = processed_blobs.getQueryBlob();
+TEST_P(CommonTypeMetricTests, TestDataSizeBF) { this->test_datasize<BFParams>(); }
+TEST_P(CommonTypeMetricTests, TestDataSizeHNSW) { this->test_datasize<HNSWParams>(); }
 
-        // Storage blob should not be reallocated or changed
-        ASSERT_EQ(storage_blob, (const int *)original_blob);
-        CompareVectors(original_blob, (const int *)storage_blob, dim);
+template <typename algo_params>
+void CommonTypeMetricTests::test_initial_size_estimation() {
+    size_t dim = 4;
+    VecSimType type = std::get<0>(GetParam());
+    VecSimMetric metric = std::get<1>(GetParam());
+    algo_params params = {.dim = dim, .metric = metric};
+    this->index = test_utils::CreateNewIndex(params, type);
 
-        // query blob *values* should not be changed
-        CompareVectors(original_blob, (const int *)query_blob, dim);
+    size_t estimation = EstimateInitialSize(params);
+    size_t actual = index->getAllocationSize();
 
-        // If alignment is set the query blob address should be aligned to the specified alignment.
-        if constexpr (alignment) {
-            unsigned char address_alignment = (uintptr_t)(query_blob) % alignment;
-            ASSERT_EQ(address_alignment, 0);
-        }
-    }
+    ASSERT_EQ(estimation, actual);
 }
 
-TEST(PreprocessorsTest, MultiPPContainerEmptyNoAlignment) {
-    using namespace dummyPreprocessors;
-    MultiPPContainerEmpty<0>();
+TEST_P(CommonTypeMetricTests, TestInitialSizeEstimationBF) {
+    this->test_initial_size_estimation<BFParams>();
 }
-
-TEST(PreprocessorsTest, MultiPPContainerEmptyAlignment) {
-    using namespace dummyPreprocessors;
-    MultiPPContainerEmpty<5>();
+TEST_P(CommonTypeMetricTests, TestInitialSizeEstimationHNSW) {
+    this->test_initial_size_estimation<HNSWParams>();
 }
 
-template <typename PreprocessorType>
-void MultiPreprocessorsContainerNoAlignment(dummyPreprocessors::pp_mode MODE) {
-    using namespace dummyPreprocessors;
-    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+class CommonTypeMetricTieredTests : public CommonTypeMetricTests {
+protected:
+    virtual void TearDown() override {}
 
-    constexpr size_t n_preprocessors = 2;
-    unsigned char alignment = 0;
-    int initial_value = 1;
-    int value_to_add = 7;
-    const int original_blob[4] = {initial_value, initial_value, initial_value, initial_value};
-    size_t processed_bytes_count = sizeof(original_blob);
-
-    // Test computer with multiple preprocessors of the same type.
-    auto multiPPContainer =
-        MultiPreprocessorsContainer<DummyType, n_preprocessors>(allocator, alignment);
-
-    auto verify_preprocess = [&](int expected_processed_value) {
-        ProcessedBlobs processed_blobs =
-            multiPPContainer.preprocess(original_blob, processed_bytes_count);
-        // Original blob should not be changed
-        ASSERT_EQ(original_blob[0], initial_value);
-
-        const void *storage_blob = processed_blobs.getStorageBlob();
-        const void *query_blob = processed_blobs.getQueryBlob();
-        if (MODE == STORAGE_ONLY) {
-            // New storage blob should be allocated
-            ASSERT_NE(storage_blob, original_blob);
-            // query blob should be unprocessed
-            ASSERT_EQ(query_blob, original_blob);
-            ASSERT_EQ(((const int *)storage_blob)[0], expected_processed_value);
-        } else if (MODE == QUERY_ONLY) {
-            // New query blob should be allocated
-            ASSERT_NE(query_blob, original_blob);
-            // Storage blob should be unprocessed
-            ASSERT_EQ(storage_blob, original_blob);
-            ASSERT_EQ(((const int *)query_blob)[0], expected_processed_value);
+    tieredIndexMock mock_thread_pool;
+};
+
+TEST_P(CommonTypeMetricTieredTests, TestDataSizeTieredHNSW) {
+    size_t dim = 4;
+    VecSimType type = std::get<0>(GetParam());
+    VecSimMetric metric = std::get<1>(GetParam());
+
+    HNSWParams hnsw_params = {.type = type, .dim = 4, .metric = metric};
+    VecSimIndex *index = test_utils::CreateNewTieredHNSWIndex(hnsw_params, this->mock_thread_pool);
+
+    auto verify_data_size = [&](const auto &tiered_index) {
+        auto hnsw_index = tiered_index->getHNSWIndex();
+        auto bf_index = tiered_index->getFlatBufferIndex();
+        size_t expected = dim * VecSimType_sizeof(type);
+        if (type == VecSimType_INT8 && metric == VecSimMetric_Cosine) {
+            expected += sizeof(float);
         }
+        size_t actual_hnsw = hnsw_index->getDataSize();
+        ASSERT_EQ(actual_hnsw, expected);
+        size_t actual_bf = bf_index->getDataSize();
+        ASSERT_EQ(actual_bf, expected);
     };
 
-    /* ==== Add the first preprocessor ==== */
-    auto preprocessor0 = new (allocator) PreprocessorType(allocator, value_to_add);
-    // add preprocessor returns next free spot in its preprocessors array.
-    ASSERT_EQ(multiPPContainer.addPreprocessor(preprocessor0), 1);
-    verify_preprocess(initial_value + value_to_add);
-
-    /* ==== Add the second preprocessor ==== */
-    auto preprocessor1 = new (allocator) PreprocessorType(allocator, value_to_add);
-    // add preprocessor returns 0 when adding the last preprocessor.
-    ASSERT_EQ(multiPPContainer.addPreprocessor(preprocessor1), 0);
-    ASSERT_NO_FATAL_FAILURE(verify_preprocess(initial_value + 2 * value_to_add));
-}
-
-TEST(PreprocessorsTest, MultiPreprocessorsContainerStorageNoAlignment) {
-    using namespace dummyPreprocessors;
-    MultiPreprocessorsContainerNoAlignment<DummyStoragePreprocessor<DummyType>>(
-        pp_mode::STORAGE_ONLY);
-}
-
-TEST(PreprocessorsTest, MultiPreprocessorsContainerQueryNoAlignment) {
-    using namespace dummyPreprocessors;
-    MultiPreprocessorsContainerNoAlignment<DummyQueryPreprocessor<DummyType>>(pp_mode::QUERY_ONLY);
-}
-
-template <typename FirstPreprocessorType, typename SecondPreprocessorType>
-void multiPPContainerMixedPreprocessorNoAlignment() {
-    using namespace dummyPreprocessors;
-    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
-
-    constexpr size_t n_preprocessors = 3;
-    unsigned char alignment = 0;
-    int initial_value = 1;
-    int value_to_add_storage = 7;
-    int value_to_add_query = 2;
-    const int original_blob[4] = {initial_value, initial_value, initial_value, initial_value};
-    size_t processed_bytes_count = sizeof(original_blob);
-
-    // Test multiple preprocessors of the same type.
-    auto multiPPContainer =
-        MultiPreprocessorsContainer<DummyType, n_preprocessors>(allocator, alignment);
-
-    /* ==== Add one preprocessor of each type ==== */
-    auto preprocessor0 =
-        new (allocator) FirstPreprocessorType(allocator, value_to_add_storage, value_to_add_query);
-    ASSERT_EQ(multiPPContainer.addPreprocessor(preprocessor0), 1);
-    auto preprocessor1 =
-        new (allocator) SecondPreprocessorType(allocator, value_to_add_storage, value_to_add_query);
-    ASSERT_EQ(multiPPContainer.addPreprocessor(preprocessor1), 2);
-
-    // scope this section so the blobs are released before the allocator.
-    {
-        ProcessedBlobs processed_blobs =
-            multiPPContainer.preprocess(original_blob, processed_bytes_count);
-        // Original blob should not be changed
-        ASSERT_EQ(original_blob[0], initial_value);
-
-        // Both blobs should be allocated
-        const void *storage_blob = processed_blobs.getStorageBlob();
-        const void *query_blob = processed_blobs.getQueryBlob();
-
-        // Ensure the computer process returns a new allocation of the expected processed blob with
-        // the new value.
-        ASSERT_NE(storage_blob, original_blob);
-        ASSERT_NE(query_blob, original_blob);
-        ASSERT_NE(query_blob, storage_blob);
-
-        ASSERT_EQ(((const int *)storage_blob)[0], initial_value + value_to_add_storage);
-        ASSERT_EQ(((const int *)query_blob)[0], initial_value + value_to_add_query);
+    switch (type) {
+    case VecSimType_FLOAT32: {
+        auto tiered_index = test_utils::cast_to_tiered_index<float, float>(index);
+        verify_data_size(tiered_index);
+        break;
     }
-
-    /* ==== Add a preprocessor that processes both storage and query ==== */
-    auto preprocessor2 = new (allocator)
-        DummyMixedPreprocessor<DummyType>(allocator, value_to_add_storage, value_to_add_query);
-    // add preprocessor returns 0 when adding the last preprocessor.
-    ASSERT_EQ(multiPPContainer.addPreprocessor(preprocessor2), 0);
-    {
-        ProcessedBlobs mixed_processed_blobs =
-            multiPPContainer.preprocess(original_blob, processed_bytes_count);
-
-        const void *mixed_pp_storage_blob = mixed_processed_blobs.getStorageBlob();
-        const void *mixed_pp_query_blob = mixed_processed_blobs.getQueryBlob();
-
-        // Ensure the computer process both blobs.
-        ASSERT_EQ(((const int *)mixed_pp_storage_blob)[0],
-                  initial_value + 2 * value_to_add_storage);
-        ASSERT_EQ(((const int *)mixed_pp_query_blob)[0], initial_value + 2 * value_to_add_query);
+    case VecSimType_FLOAT64: {
+        auto tiered_index = test_utils::cast_to_tiered_index<double, double>(index);
+        verify_data_size(tiered_index);
+        break;
     }
-
-    // try adding another preprocessor and fail.
-    ASSERT_EQ(multiPPContainer.addPreprocessor(preprocessor2), -1);
-}
-
-TEST(PreprocessorsTest, multiPPContainerMixedPreprocessorQueryFirst) {
-    using namespace dummyPreprocessors;
-    multiPPContainerMixedPreprocessorNoAlignment<DummyQueryPreprocessor<DummyType>,
-                                                 DummyStoragePreprocessor<DummyType>>();
-}
-
-TEST(PreprocessorsTest, multiPPContainerMixedPreprocessorStorageFirst) {
-    using namespace dummyPreprocessors;
-    multiPPContainerMixedPreprocessorNoAlignment<DummyStoragePreprocessor<DummyType>,
-                                                 DummyQueryPreprocessor<DummyType>>();
-}
-
-template <typename PreprocessorType>
-void multiPPContainerAlignment(dummyPreprocessors::pp_mode MODE) {
-    using namespace dummyPreprocessors;
-    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
-
-    unsigned char alignment = 5;
-    constexpr size_t n_preprocessors = 1;
-    int initial_value = 1;
-    int value_to_add = 7;
-    const int original_blob[4] = {initial_value, initial_value, initial_value, initial_value};
-    size_t processed_bytes_count = sizeof(original_blob);
-
-    auto multiPPContainer =
-        MultiPreprocessorsContainer<DummyType, n_preprocessors>(allocator, alignment);
-
-    auto verify_preprocess = [&](int expected_processed_value) {
-        ProcessedBlobs processed_blobs =
-            multiPPContainer.preprocess(original_blob, processed_bytes_count);
-
-        const void *storage_blob = processed_blobs.getStorageBlob();
-        const void *query_blob = processed_blobs.getQueryBlob();
-        if (MODE == STORAGE_ONLY) {
-            // New storage blob should be allocated and processed
-            ASSERT_NE(storage_blob, original_blob);
-            ASSERT_EQ(((const int *)storage_blob)[0], expected_processed_value);
-            // query blob *values* should be unprocessed, however, it might be allocated if the
-            // original blob is not aligned.
-            ASSERT_EQ(((const int *)query_blob)[0], original_blob[0]);
-        } else if (MODE == QUERY_ONLY) {
-            // New query blob should be allocated
-            ASSERT_NE(query_blob, original_blob);
-            // Storage blob should be unprocessed and not allocated.
-            ASSERT_EQ(storage_blob, original_blob);
-            ASSERT_EQ(((const int *)query_blob)[0], expected_processed_value);
-        }
-
-        // anyway the query blob should be aligned
-        unsigned char address_alignment = (uintptr_t)(query_blob) % alignment;
-        ASSERT_EQ(address_alignment, 0);
-    };
-
-    auto preprocessor0 = new (allocator) PreprocessorType(allocator, value_to_add);
-    // add preprocessor returns next free spot in its preprocessors array.
-    ASSERT_EQ(multiPPContainer.addPreprocessor(preprocessor0), 0);
-    verify_preprocess(initial_value + value_to_add);
-}
-
-TEST(PreprocessorsTest, StoragePreprocessorWithAlignment) {
-    using namespace dummyPreprocessors;
-    multiPPContainerAlignment<DummyStoragePreprocessor<DummyType>>(pp_mode::STORAGE_ONLY);
-}
-
-TEST(PreprocessorsTest, QueryPreprocessorWithAlignment) {
-    using namespace dummyPreprocessors;
-    multiPPContainerAlignment<DummyQueryPreprocessor<DummyType>>(pp_mode::QUERY_ONLY);
-}
-
-TEST(PreprocessorsTest, multiPPContainerCosineThenMixedPreprocess) {
-    using namespace dummyPreprocessors;
-    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
-
-    constexpr size_t n_preprocessors = 2;
-    constexpr size_t dim = 4;
-    unsigned char alignment = 5;
-
-    float initial_value = 1.0f;
-    float normalized_value = 0.5f;
-    float value_to_add_storage = 7.0f;
-    float value_to_add_query = 2.0f;
-    const float original_blob[dim] = {initial_value, initial_value, initial_value, initial_value};
-
-    auto multiPPContainer =
-        MultiPreprocessorsContainer<DummyType, n_preprocessors>(allocator, alignment);
-
-    // adding cosine preprocessor
-    auto cosine_preprocessor = new (allocator) CosinePreprocessor<float>(allocator, dim);
-    multiPPContainer.addPreprocessor(cosine_preprocessor);
-    {
-        ProcessedBlobs processed_blobs =
-            multiPPContainer.preprocess(original_blob, sizeof(original_blob));
-        const void *storage_blob = processed_blobs.getStorageBlob();
-        const void *query_blob = processed_blobs.getQueryBlob();
-        // blobs should point to the same memory slot
-        ASSERT_EQ(storage_blob, query_blob);
-        // memory should be aligned
-        unsigned char address_alignment = (uintptr_t)(storage_blob) % alignment;
-        ASSERT_EQ(address_alignment, 0);
-        // They need to be allocated and processed
-        ASSERT_NE(storage_blob, nullptr);
-        ASSERT_EQ(((const float *)storage_blob)[0], normalized_value);
-        // the original blob should not change
-        ASSERT_NE(storage_blob, original_blob);
+    case VecSimType_BFLOAT16: {
+        auto tiered_index = test_utils::cast_to_tiered_index<vecsim_types::bfloat16, float>(index);
+        verify_data_size(tiered_index);
+        break;
     }
-    // adding mixed preprocessor
-    auto mixed_preprocessor = new (allocator)
-        DummyMixedPreprocessor<float>(allocator, value_to_add_storage, value_to_add_query);
-    multiPPContainer.addPreprocessor(mixed_preprocessor);
-    {
-        ProcessedBlobs processed_blobs =
-            multiPPContainer.preprocess(original_blob, sizeof(original_blob));
-        const void *storage_blob = processed_blobs.getStorageBlob();
-        const void *query_blob = processed_blobs.getQueryBlob();
-        // blobs should point to a different memory slot
-        ASSERT_NE(storage_blob, query_blob);
-        ASSERT_NE(storage_blob, nullptr);
-        ASSERT_NE(query_blob, nullptr);
-
-        // query blob should be aligned
-        unsigned char address_alignment = (uintptr_t)(query_blob) % alignment;
-        ASSERT_EQ(address_alignment, 0);
-
-        // They need to be processed by both processors.
-        ASSERT_EQ(((const float *)storage_blob)[0], normalized_value + value_to_add_storage);
-        ASSERT_EQ(((const float *)query_blob)[0], normalized_value + value_to_add_query);
-
-        // the original blob should not change
-        ASSERT_NE(storage_blob, original_blob);
-        ASSERT_NE(query_blob, original_blob);
+    case VecSimType_FLOAT16: {
+        auto tiered_index = test_utils::cast_to_tiered_index<vecsim_types::float16, float>(index);
+        verify_data_size(tiered_index);
+        break;
     }
-    // The preprocessors should be released by the preprocessors container.
-}
-
-TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) {
-    using namespace dummyPreprocessors;
-    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
-
-    constexpr size_t n_preprocessors = 2;
-    constexpr size_t dim = 4;
-    unsigned char alignment = 5;
-
-    float initial_value = 1.0f;
-    float normalized_value = 0.5f;
-    float value_to_add_storage = 7.0f;
-    float value_to_add_query = 2.0f;
-    const float original_blob[dim] = {initial_value, initial_value, initial_value, initial_value};
-
-    // Creating multi preprocessors container
-    auto mixed_preprocessor = new (allocator)
-        DummyMixedPreprocessor<float>(allocator, value_to_add_storage, value_to_add_query);
-    auto multiPPContainer =
-        MultiPreprocessorsContainer<DummyType, n_preprocessors>(allocator, alignment);
-    multiPPContainer.addPreprocessor(mixed_preprocessor);
-
-    {
-        ProcessedBlobs processed_blobs =
-            multiPPContainer.preprocess(original_blob, sizeof(original_blob));
-        const void *storage_blob = processed_blobs.getStorageBlob();
-        const void *query_blob = processed_blobs.getQueryBlob();
-        // blobs should point to a different memory slot
-        ASSERT_NE(storage_blob, query_blob);
-        ASSERT_NE(storage_blob, nullptr);
-        ASSERT_NE(query_blob, nullptr);
-
-        // query blob should be aligned
-        unsigned char address_alignment = (uintptr_t)(query_blob) % alignment;
-        ASSERT_EQ(address_alignment, 0);
-
-        // They need to be processed by both processors.
-        ASSERT_EQ(((const float *)storage_blob)[0], initial_value + value_to_add_storage);
-        ASSERT_EQ(((const float *)query_blob)[0], initial_value + value_to_add_query);
-
-        // the original blob should not change
-        ASSERT_NE(storage_blob, original_blob);
-        ASSERT_NE(query_blob, original_blob);
+    case VecSimType_INT8: {
+        auto tiered_index = test_utils::cast_to_tiered_index<int8_t, float>(index);
+        verify_data_size(tiered_index);
+        break;
     }
-
-    // adding cosine preprocessor
-    auto cosine_preprocessor = new (allocator) CosinePreprocessor<float>(allocator, dim);
-    multiPPContainer.addPreprocessor(cosine_preprocessor);
-    {
-        ProcessedBlobs processed_blobs =
-            multiPPContainer.preprocess(original_blob, sizeof(original_blob));
-        const void *storage_blob = processed_blobs.getStorageBlob();
-        const void *query_blob = processed_blobs.getQueryBlob();
-        // blobs should point to a different memory slot
-        ASSERT_NE(storage_blob, query_blob);
-        // query memory should be aligned
-        unsigned char address_alignment = (uintptr_t)(query_blob) % alignment;
-        ASSERT_EQ(address_alignment, 0);
-        // They need to be allocated and processed
-        ASSERT_NE(storage_blob, nullptr);
-        ASSERT_NE(query_blob, nullptr);
-        float expected_processed_storage[dim] = {initial_value + value_to_add_storage,
-                                                 initial_value, initial_value, initial_value};
-        float expected_processed_query[dim] = {initial_value + value_to_add_query, initial_value,
-                                               initial_value, initial_value};
-        VecSim_Normalize(expected_processed_storage, dim, VecSimType_FLOAT32);
-        VecSim_Normalize(expected_processed_query, dim, VecSimType_FLOAT32);
-        ASSERT_EQ(((const float *)storage_blob)[0], expected_processed_storage[0]);
-        ASSERT_EQ(((const float *)query_blob)[0], expected_processed_query[0]);
-        // the original blob should not change
-        ASSERT_NE(storage_blob, original_blob);
-        ASSERT_NE(query_blob, original_blob);
+    default:
+        FAIL() << "Unsupported data type";
     }
-    // The preprocessors should be released by the preprocessors container.
 }
+
+TEST_P(CommonTypeMetricTieredTests, TestInitialSizeEstimationTieredHNSW) {
+    size_t dim = 4;
+    VecSimType type = std::get<0>(GetParam());
+    VecSimMetric metric = std::get<1>(GetParam());
+    HNSWParams hnsw_params = {.type = type, .dim = dim, .metric = metric};
+    VecSimParams vecsim_hnsw_params = CreateParams(hnsw_params);
+    TieredIndexParams tiered_params =
+        test_utils::CreateTieredParams(vecsim_hnsw_params, this->mock_thread_pool);
+    VecSimParams params = CreateParams(tiered_params);
+    auto *index = VecSimIndex_New(&params);
+    mock_thread_pool.ctx->index_strong_ref.reset(index);
+
+    size_t estimation = VecSimIndex_EstimateInitialSize(&params);
+    size_t actual = index->getAllocationSize();
+
+    ASSERT_EQ(estimation, actual);
+}
+
+constexpr VecSimType vecsim_datatypes[] = {VecSimType_FLOAT32, VecSimType_FLOAT64,
+                                           VecSimType_BFLOAT16, VecSimType_FLOAT16,
+                                           VecSimType_INT8};
+
+/** Run all CommonTypeMetricTests tests for each {VecSimType, VecSimMetric} combination */
+INSTANTIATE_TEST_SUITE_P(CommonTest, CommonTypeMetricTests,
+                         testing::Combine(testing::ValuesIn(vecsim_datatypes),
+                                          testing::Values(VecSimMetric_L2, VecSimMetric_IP,
+                                                          VecSimMetric_Cosine)),
+                         [](const testing::TestParamInfo<CommonTypeMetricTests::ParamType> &info) {
+                             const char *type = VecSimType_ToString(std::get<0>(info.param));
+                             const char *metric = VecSimMetric_ToString(std::get<1>(info.param));
+                             std::string test_name(type);
+                             return test_name + "_" + metric;
+                         });
+
+/** Run all CommonTypeMetricTieredTests tests for each {VecSimType, VecSimMetric} combination */
+INSTANTIATE_TEST_SUITE_P(
+    CommonTieredTest, CommonTypeMetricTieredTests,
+    testing::Combine(testing::ValuesIn(vecsim_datatypes),
+                     testing::Values(VecSimMetric_L2, VecSimMetric_IP, VecSimMetric_Cosine)),
+    [](const testing::TestParamInfo<CommonTypeMetricTieredTests::ParamType> &info) {
+        const char *type = VecSimType_ToString(std::get<0>(info.param));
+        const char *metric = VecSimMetric_ToString(std::get<1>(info.param));
+        std::string test_name(type);
+        return test_name + "_" + metric;
+    });
diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp
new file mode 100644
index 000000000..af49b12a8
--- /dev/null
+++ b/tests/unit/test_components.cpp
@@ -0,0 +1,587 @@
+/*
+ *Copyright Redis Ltd. 2021 - present
+ *Licensed under your choice of the Redis Source Available License 2.0 (RSALv2) or
+ *the Server Side Public License v1 (SSPLv1).
+ */
+
+#include "gtest/gtest.h"
+#include "VecSim/vec_sim.h"
+#include "VecSim/spaces/computer/preprocessor_container.h"
+#include "VecSim/spaces/computer/calculator.h"
+#include "unit_test_utils.h"
+
+class IndexCalculatorTest : public ::testing::Test {};
+namespace dummyCalcultor {
+
+using DummyType = int;
+using dummy_dist_func_t = DummyType (*)(int);
+
+int dummyDistFunc(int value) { return value; }
+
+template <typename DistType>
+class DistanceCalculatorDummy : public DistanceCalculatorInterface<DistType, dummy_dist_func_t> {
+public:
+    DistanceCalculatorDummy(std::shared_ptr<VecSimAllocator> allocator, dummy_dist_func_t dist_func)
+        : DistanceCalculatorInterface<DistType, dummy_dist_func_t>(allocator, dist_func) {}
+
+    virtual DistType calcDistance(const void *v1, const void *v2, size_t dim) const {
+        return this->dist_func(7);
+    }
+};
+
+} // namespace dummyCalcultor
+
+TEST(IndexCalculatorTest, TestIndexCalculator) {
+
+    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+
+    // Test computer with a distance function signature different from dim(v1, v2, dim()).
+    using namespace dummyCalcultor;
+    auto distance_calculator = DistanceCalculatorDummy<DummyType>(allocator, dummyDistFunc);
+
+    ASSERT_EQ(distance_calculator.calcDistance(nullptr, nullptr, 0), 7);
+}
+
+class PreprocessorsTest : public ::testing::Test {};
+
+namespace dummyPreprocessors {
+
+using DummyType = int;
+
+enum pp_mode { STORAGE_ONLY, QUERY_ONLY, BOTH, EMPTY };
+
+// Dummy storage preprocessor
+template <typename DataType>
+class DummyStoragePreprocessor : public PreprocessorInterface {
+public:
+    DummyStoragePreprocessor(std::shared_ptr<VecSimAllocator> allocator, int value_to_add_storage,
+                             int value_to_add_query = 0)
+        : PreprocessorInterface(allocator), value_to_add_storage(value_to_add_storage),
+          value_to_add_query(value_to_add_query) {
+        if (!value_to_add_query)
+            value_to_add_query = value_to_add_storage;
+    }
+
+    void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
+                    size_t processed_bytes_count, unsigned char alignment) const override {
+
+        this->preprocessForStorage(original_blob, storage_blob, processed_bytes_count);
+    }
+
+    void preprocessForStorage(const void *original_blob, void *&blob,
+                              size_t processed_bytes_count) const override {
+        // If the blob was not allocated yet, allocate it.
+        if (blob == nullptr) {
+            blob = this->allocator->allocate(processed_bytes_count);
+            memcpy(blob, original_blob, processed_bytes_count);
+        }
+        static_cast<DataType *>(blob)[0] += value_to_add_storage;
+    }
+    void preprocessQueryInPlace(void *blob, size_t processed_bytes_count,
+                                unsigned char alignment) const override {}
+    void preprocessQuery(const void *original_blob, void *&blob, size_t processed_bytes_count,
+                         unsigned char alignment) const override {
+        /* do nothing*/
+    }
+
+private:
+    int value_to_add_storage;
+    int value_to_add_query;
+};
+
+// Dummy query preprocessor
+template <typename DataType>
+class DummyQueryPreprocessor : public PreprocessorInterface {
+public:
+    DummyQueryPreprocessor(std::shared_ptr<VecSimAllocator> allocator, int value_to_add_storage,
+                           int _value_to_add_query = 0)
+        : PreprocessorInterface(allocator), value_to_add_storage(value_to_add_storage),
+          value_to_add_query(_value_to_add_query) {
+        if (!_value_to_add_query)
+            value_to_add_query = value_to_add_storage;
+    }
+
+    void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
+                    size_t processed_bytes_count, unsigned char alignment) const override {
+        this->preprocessQuery(original_blob, query_blob, processed_bytes_count, alignment);
+    }
+
+    void preprocessForStorage(const void *original_blob, void *&blob,
+                              size_t processed_bytes_count) const override {
+        /* do nothing*/
+    }
+    void preprocessQueryInPlace(void *blob, size_t processed_bytes_count,
+                                unsigned char alignment) const override {
+        static_cast<DataType *>(blob)[0] += value_to_add_query;
+    }
+    void preprocessQuery(const void *original_blob, void *&blob, size_t processed_bytes_count,
+                         unsigned char alignment) const override {
+        // If the blob was not allocated yet, allocate it.
+        if (blob == nullptr) {
+            blob = this->allocator->allocate_aligned(processed_bytes_count, alignment);
+            memcpy(blob, original_blob, processed_bytes_count);
+        }
+        static_cast<DataType *>(blob)[0] += value_to_add_query;
+    }
+
+private:
+    int value_to_add_storage;
+    int value_to_add_query;
+};
+
+// Dummy mixed preprocessor (precesses the blobs  differently)
+template <typename DataType>
+class DummyMixedPreprocessor : public PreprocessorInterface {
+public:
+    DummyMixedPreprocessor(std::shared_ptr<VecSimAllocator> allocator, int value_to_add_storage,
+                           int value_to_add_query)
+        : PreprocessorInterface(allocator), value_to_add_storage(value_to_add_storage),
+          value_to_add_query(value_to_add_query) {}
+    void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
+                    size_t processed_bytes_count, unsigned char alignment) const override {
+
+        // One blob was already allocated by a previous preprocessor(s) that process both blobs the
+        // same. The blobs are pointing to the same memory, we need to allocate another memory slot
+        // to split them.
+        if ((storage_blob == query_blob) && (query_blob != nullptr)) {
+            storage_blob = this->allocator->allocate(processed_bytes_count);
+            memcpy(storage_blob, query_blob, processed_bytes_count);
+        }
+
+        // Either both are nullptr or they are pointing to different memory slots. Both cases are
+        // handled by the designated functions.
+        this->preprocessForStorage(original_blob, storage_blob, processed_bytes_count);
+        this->preprocessQuery(original_blob, query_blob, processed_bytes_count, alignment);
+    }
+
+    void preprocessForStorage(const void *original_blob, void *&blob,
+                              size_t processed_bytes_count) const override {
+        // If the blob was not allocated yet, allocate it.
+        if (blob == nullptr) {
+            blob = this->allocator->allocate(processed_bytes_count);
+            memcpy(blob, original_blob, processed_bytes_count);
+        }
+        static_cast<DataType *>(blob)[0] += value_to_add_storage;
+    }
+    void preprocessQueryInPlace(void *blob, size_t processed_bytes_count,
+                                unsigned char alignment) const override {}
+    void preprocessQuery(const void *original_blob, void *&blob, size_t processed_bytes_count,
+                         unsigned char alignment) const override {
+        // If the blob was not allocated yet, allocate it.
+        if (blob == nullptr) {
+            blob = this->allocator->allocate_aligned(processed_bytes_count, alignment);
+            memcpy(blob, original_blob, processed_bytes_count);
+        }
+        static_cast<DataType *>(blob)[0] += value_to_add_query;
+    }
+
+private:
+    int value_to_add_storage;
+    int value_to_add_query;
+};
+} // namespace dummyPreprocessors
+
+TEST(PreprocessorsTest, PreprocessorsTestBasicAlignmentTest) {
+    using namespace dummyPreprocessors;
+    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+
+    unsigned char alignment = 5;
+    auto preprocessor = PreprocessorsContainerAbstract(allocator, alignment);
+    const int original_blob[4] = {1, 1, 1, 1};
+    size_t processed_bytes_count = sizeof(original_blob);
+
+    {
+        auto aligned_query = preprocessor.preprocessQuery(original_blob, processed_bytes_count);
+        unsigned char address_alignment = (uintptr_t)(aligned_query.get()) % alignment;
+        ASSERT_EQ(address_alignment, 0);
+    }
+
+    // The index computer is responsible for releasing the distance calculator.
+}
+
+template <unsigned char alignment>
+void MultiPPContainerEmpty() {
+    using namespace dummyPreprocessors;
+    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+    constexpr size_t dim = 4;
+    const int original_blob[dim] = {1, 2, 3, 4};
+    const int original_blob_cpy[dim] = {1, 2, 3, 4};
+
+    constexpr size_t n_preprocessors = 3;
+
+    auto multiPPContainer =
+        MultiPreprocessorsContainer<DummyType, n_preprocessors>(allocator, alignment);
+
+    {
+        ProcessedBlobs processed_blobs =
+            multiPPContainer.preprocess(original_blob, sizeof(original_blob));
+        // Original blob should not be changed
+        CompareVectors(original_blob, original_blob_cpy, dim);
+
+        const void *storage_blob = processed_blobs.getStorageBlob();
+        const void *query_blob = processed_blobs.getQueryBlob();
+
+        // Storage blob should not be reallocated or changed
+        ASSERT_EQ(storage_blob, (const int *)original_blob);
+        CompareVectors(original_blob, (const int *)storage_blob, dim);
+
+        // query blob *values* should not be changed
+        CompareVectors(original_blob, (const int *)query_blob, dim);
+
+        // If alignment is set the query blob address should be aligned to the specified alignment.
+        if constexpr (alignment) {
+            unsigned char address_alignment = (uintptr_t)(query_blob) % alignment;
+            ASSERT_EQ(address_alignment, 0);
+        }
+    }
+}
+
+TEST(PreprocessorsTest, MultiPPContainerEmptyNoAlignment) {
+    using namespace dummyPreprocessors;
+    MultiPPContainerEmpty<0>();
+}
+
+TEST(PreprocessorsTest, MultiPPContainerEmptyAlignment) {
+    using namespace dummyPreprocessors;
+    MultiPPContainerEmpty<5>();
+}
+
+template <typename PreprocessorType>
+void MultiPreprocessorsContainerNoAlignment(dummyPreprocessors::pp_mode MODE) {
+    using namespace dummyPreprocessors;
+    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+
+    constexpr size_t n_preprocessors = 2;
+    unsigned char alignment = 0;
+    int initial_value = 1;
+    int value_to_add = 7;
+    const int original_blob[4] = {initial_value, initial_value, initial_value, initial_value};
+    size_t processed_bytes_count = sizeof(original_blob);
+
+    // Test computer with multiple preprocessors of the same type.
+    auto multiPPContainer =
+        MultiPreprocessorsContainer<DummyType, n_preprocessors>(allocator, alignment);
+
+    auto verify_preprocess = [&](int expected_processed_value) {
+        ProcessedBlobs processed_blobs =
+            multiPPContainer.preprocess(original_blob, processed_bytes_count);
+        // Original blob should not be changed
+        ASSERT_EQ(original_blob[0], initial_value);
+
+        const void *storage_blob = processed_blobs.getStorageBlob();
+        const void *query_blob = processed_blobs.getQueryBlob();
+        if (MODE == STORAGE_ONLY) {
+            // New storage blob should be allocated
+            ASSERT_NE(storage_blob, original_blob);
+            // query blob should be unprocessed
+            ASSERT_EQ(query_blob, original_blob);
+            ASSERT_EQ(((const int *)storage_blob)[0], expected_processed_value);
+        } else if (MODE == QUERY_ONLY) {
+            // New query blob should be allocated
+            ASSERT_NE(query_blob, original_blob);
+            // Storage blob should be unprocessed
+            ASSERT_EQ(storage_blob, original_blob);
+            ASSERT_EQ(((const int *)query_blob)[0], expected_processed_value);
+        }
+    };
+
+    /* ==== Add the first preprocessor ==== */
+    auto preprocessor0 = new (allocator) PreprocessorType(allocator, value_to_add);
+    // add preprocessor returns next free spot in its preprocessors array.
+    ASSERT_EQ(multiPPContainer.addPreprocessor(preprocessor0), 1);
+    verify_preprocess(initial_value + value_to_add);
+
+    /* ==== Add the second preprocessor ==== */
+    auto preprocessor1 = new (allocator) PreprocessorType(allocator, value_to_add);
+    // add preprocessor returns 0 when adding the last preprocessor.
+    ASSERT_EQ(multiPPContainer.addPreprocessor(preprocessor1), 0);
+    ASSERT_NO_FATAL_FAILURE(verify_preprocess(initial_value + 2 * value_to_add));
+}
+
+TEST(PreprocessorsTest, MultiPreprocessorsContainerStorageNoAlignment) {
+    using namespace dummyPreprocessors;
+    MultiPreprocessorsContainerNoAlignment<DummyStoragePreprocessor<DummyType>>(
+        pp_mode::STORAGE_ONLY);
+}
+
+TEST(PreprocessorsTest, MultiPreprocessorsContainerQueryNoAlignment) {
+    using namespace dummyPreprocessors;
+    MultiPreprocessorsContainerNoAlignment<DummyQueryPreprocessor<DummyType>>(pp_mode::QUERY_ONLY);
+}
+
+template <typename FirstPreprocessorType, typename SecondPreprocessorType>
+void multiPPContainerMixedPreprocessorNoAlignment() {
+    using namespace dummyPreprocessors;
+    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+
+    constexpr size_t n_preprocessors = 3;
+    unsigned char alignment = 0;
+    int initial_value = 1;
+    int value_to_add_storage = 7;
+    int value_to_add_query = 2;
+    const int original_blob[4] = {initial_value, initial_value, initial_value, initial_value};
+    size_t processed_bytes_count = sizeof(original_blob);
+
+    // Test multiple preprocessors of the same type.
+    auto multiPPContainer =
+        MultiPreprocessorsContainer<DummyType, n_preprocessors>(allocator, alignment);
+
+    /* ==== Add one preprocessor of each type ==== */
+    auto preprocessor0 =
+        new (allocator) FirstPreprocessorType(allocator, value_to_add_storage, value_to_add_query);
+    ASSERT_EQ(multiPPContainer.addPreprocessor(preprocessor0), 1);
+    auto preprocessor1 =
+        new (allocator) SecondPreprocessorType(allocator, value_to_add_storage, value_to_add_query);
+    ASSERT_EQ(multiPPContainer.addPreprocessor(preprocessor1), 2);
+
+    // scope this section so the blobs are released before the allocator.
+    {
+        ProcessedBlobs processed_blobs =
+            multiPPContainer.preprocess(original_blob, processed_bytes_count);
+        // Original blob should not be changed
+        ASSERT_EQ(original_blob[0], initial_value);
+
+        // Both blobs should be allocated
+        const void *storage_blob = processed_blobs.getStorageBlob();
+        const void *query_blob = processed_blobs.getQueryBlob();
+
+        // Ensure the computer process returns a new allocation of the expected processed blob with
+        // the new value.
+        ASSERT_NE(storage_blob, original_blob);
+        ASSERT_NE(query_blob, original_blob);
+        ASSERT_NE(query_blob, storage_blob);
+
+        ASSERT_EQ(((const int *)storage_blob)[0], initial_value + value_to_add_storage);
+        ASSERT_EQ(((const int *)query_blob)[0], initial_value + value_to_add_query);
+    }
+
+    /* ==== Add a preprocessor that processes both storage and query ==== */
+    auto preprocessor2 = new (allocator)
+        DummyMixedPreprocessor<DummyType>(allocator, value_to_add_storage, value_to_add_query);
+    // add preprocessor returns 0 when adding the last preprocessor.
+    ASSERT_EQ(multiPPContainer.addPreprocessor(preprocessor2), 0);
+    {
+        ProcessedBlobs mixed_processed_blobs =
+            multiPPContainer.preprocess(original_blob, processed_bytes_count);
+
+        const void *mixed_pp_storage_blob = mixed_processed_blobs.getStorageBlob();
+        const void *mixed_pp_query_blob = mixed_processed_blobs.getQueryBlob();
+
+        // Ensure the computer process both blobs.
+        ASSERT_EQ(((const int *)mixed_pp_storage_blob)[0],
+                  initial_value + 2 * value_to_add_storage);
+        ASSERT_EQ(((const int *)mixed_pp_query_blob)[0], initial_value + 2 * value_to_add_query);
+    }
+
+    // try adding another preprocessor and fail.
+    ASSERT_EQ(multiPPContainer.addPreprocessor(preprocessor2), -1);
+}
+
+TEST(PreprocessorsTest, multiPPContainerMixedPreprocessorQueryFirst) {
+    using namespace dummyPreprocessors;
+    multiPPContainerMixedPreprocessorNoAlignment<DummyQueryPreprocessor<DummyType>,
+                                                 DummyStoragePreprocessor<DummyType>>();
+}
+
+TEST(PreprocessorsTest, multiPPContainerMixedPreprocessorStorageFirst) {
+    using namespace dummyPreprocessors;
+    multiPPContainerMixedPreprocessorNoAlignment<DummyStoragePreprocessor<DummyType>,
+                                                 DummyQueryPreprocessor<DummyType>>();
+}
+
+template <typename PreprocessorType>
+void multiPPContainerAlignment(dummyPreprocessors::pp_mode MODE) {
+    using namespace dummyPreprocessors;
+    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+
+    unsigned char alignment = 5;
+    constexpr size_t n_preprocessors = 1;
+    int initial_value = 1;
+    int value_to_add = 7;
+    const int original_blob[4] = {initial_value, initial_value, initial_value, initial_value};
+    size_t processed_bytes_count = sizeof(original_blob);
+
+    auto multiPPContainer =
+        MultiPreprocessorsContainer<DummyType, n_preprocessors>(allocator, alignment);
+
+    auto verify_preprocess = [&](int expected_processed_value) {
+        ProcessedBlobs processed_blobs =
+            multiPPContainer.preprocess(original_blob, processed_bytes_count);
+
+        const void *storage_blob = processed_blobs.getStorageBlob();
+        const void *query_blob = processed_blobs.getQueryBlob();
+        if (MODE == STORAGE_ONLY) {
+            // New storage blob should be allocated and processed
+            ASSERT_NE(storage_blob, original_blob);
+            ASSERT_EQ(((const int *)storage_blob)[0], expected_processed_value);
+            // query blob *values* should be unprocessed, however, it might be allocated if the
+            // original blob is not aligned.
+            ASSERT_EQ(((const int *)query_blob)[0], original_blob[0]);
+        } else if (MODE == QUERY_ONLY) {
+            // New query blob should be allocated
+            ASSERT_NE(query_blob, original_blob);
+            // Storage blob should be unprocessed and not allocated.
+            ASSERT_EQ(storage_blob, original_blob);
+            ASSERT_EQ(((const int *)query_blob)[0], expected_processed_value);
+        }
+
+        // anyway the query blob should be aligned
+        unsigned char address_alignment = (uintptr_t)(query_blob) % alignment;
+        ASSERT_EQ(address_alignment, 0);
+    };
+
+    auto preprocessor0 = new (allocator) PreprocessorType(allocator, value_to_add);
+    // add preprocessor returns next free spot in its preprocessors array.
+    ASSERT_EQ(multiPPContainer.addPreprocessor(preprocessor0), 0);
+    verify_preprocess(initial_value + value_to_add);
+}
+
+TEST(PreprocessorsTest, StoragePreprocessorWithAlignment) {
+    using namespace dummyPreprocessors;
+    multiPPContainerAlignment<DummyStoragePreprocessor<DummyType>>(pp_mode::STORAGE_ONLY);
+}
+
+TEST(PreprocessorsTest, QueryPreprocessorWithAlignment) {
+    using namespace dummyPreprocessors;
+    multiPPContainerAlignment<DummyQueryPreprocessor<DummyType>>(pp_mode::QUERY_ONLY);
+}
+
+TEST(PreprocessorsTest, multiPPContainerCosineThenMixedPreprocess) {
+    using namespace dummyPreprocessors;
+    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+
+    constexpr size_t n_preprocessors = 2;
+    constexpr size_t dim = 4;
+    unsigned char alignment = 5;
+
+    float initial_value = 1.0f;
+    float normalized_value = 0.5f;
+    float value_to_add_storage = 7.0f;
+    float value_to_add_query = 2.0f;
+    const float original_blob[dim] = {initial_value, initial_value, initial_value, initial_value};
+
+    auto multiPPContainer =
+        MultiPreprocessorsContainer<DummyType, n_preprocessors>(allocator, alignment);
+
+    // adding cosine preprocessor
+    auto cosine_preprocessor = new (allocator) CosinePreprocessor<float>(allocator, dim);
+    multiPPContainer.addPreprocessor(cosine_preprocessor);
+    {
+        ProcessedBlobs processed_blobs =
+            multiPPContainer.preprocess(original_blob, sizeof(original_blob));
+        const void *storage_blob = processed_blobs.getStorageBlob();
+        const void *query_blob = processed_blobs.getQueryBlob();
+        // blobs should point to the same memory slot
+        ASSERT_EQ(storage_blob, query_blob);
+        // memory should be aligned
+        unsigned char address_alignment = (uintptr_t)(storage_blob) % alignment;
+        ASSERT_EQ(address_alignment, 0);
+        // They need to be allocated and processed
+        ASSERT_NE(storage_blob, nullptr);
+        ASSERT_EQ(((const float *)storage_blob)[0], normalized_value);
+        // the original blob should not change
+        ASSERT_NE(storage_blob, original_blob);
+    }
+    // adding mixed preprocessor
+    auto mixed_preprocessor = new (allocator)
+        DummyMixedPreprocessor<float>(allocator, value_to_add_storage, value_to_add_query);
+    multiPPContainer.addPreprocessor(mixed_preprocessor);
+    {
+        ProcessedBlobs processed_blobs =
+            multiPPContainer.preprocess(original_blob, sizeof(original_blob));
+        const void *storage_blob = processed_blobs.getStorageBlob();
+        const void *query_blob = processed_blobs.getQueryBlob();
+        // blobs should point to a different memory slot
+        ASSERT_NE(storage_blob, query_blob);
+        ASSERT_NE(storage_blob, nullptr);
+        ASSERT_NE(query_blob, nullptr);
+
+        // query blob should be aligned
+        unsigned char address_alignment = (uintptr_t)(query_blob) % alignment;
+        ASSERT_EQ(address_alignment, 0);
+
+        // They need to be processed by both processors.
+        ASSERT_EQ(((const float *)storage_blob)[0], normalized_value + value_to_add_storage);
+        ASSERT_EQ(((const float *)query_blob)[0], normalized_value + value_to_add_query);
+
+        // the original blob should not change
+        ASSERT_NE(storage_blob, original_blob);
+        ASSERT_NE(query_blob, original_blob);
+    }
+    // The preprocessors should be released by the preprocessors container.
+}
+
+TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) {
+    using namespace dummyPreprocessors;
+    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+
+    constexpr size_t n_preprocessors = 2;
+    constexpr size_t dim = 4;
+    unsigned char alignment = 5;
+
+    float initial_value = 1.0f;
+    float normalized_value = 0.5f;
+    float value_to_add_storage = 7.0f;
+    float value_to_add_query = 2.0f;
+    const float original_blob[dim] = {initial_value, initial_value, initial_value, initial_value};
+
+    // Creating multi preprocessors container
+    auto mixed_preprocessor = new (allocator)
+        DummyMixedPreprocessor<float>(allocator, value_to_add_storage, value_to_add_query);
+    auto multiPPContainer =
+        MultiPreprocessorsContainer<DummyType, n_preprocessors>(allocator, alignment);
+    multiPPContainer.addPreprocessor(mixed_preprocessor);
+
+    {
+        ProcessedBlobs processed_blobs =
+            multiPPContainer.preprocess(original_blob, sizeof(original_blob));
+        const void *storage_blob = processed_blobs.getStorageBlob();
+        const void *query_blob = processed_blobs.getQueryBlob();
+        // blobs should point to a different memory slot
+        ASSERT_NE(storage_blob, query_blob);
+        ASSERT_NE(storage_blob, nullptr);
+        ASSERT_NE(query_blob, nullptr);
+
+        // query blob should be aligned
+        unsigned char address_alignment = (uintptr_t)(query_blob) % alignment;
+        ASSERT_EQ(address_alignment, 0);
+
+        // They need to be processed by both processors.
+        ASSERT_EQ(((const float *)storage_blob)[0], initial_value + value_to_add_storage);
+        ASSERT_EQ(((const float *)query_blob)[0], initial_value + value_to_add_query);
+
+        // the original blob should not change
+        ASSERT_NE(storage_blob, original_blob);
+        ASSERT_NE(query_blob, original_blob);
+    }
+
+    // adding cosine preprocessor
+    auto cosine_preprocessor = new (allocator) CosinePreprocessor<float>(allocator, dim);
+    multiPPContainer.addPreprocessor(cosine_preprocessor);
+    {
+        ProcessedBlobs processed_blobs =
+            multiPPContainer.preprocess(original_blob, sizeof(original_blob));
+        const void *storage_blob = processed_blobs.getStorageBlob();
+        const void *query_blob = processed_blobs.getQueryBlob();
+        // blobs should point to a different memory slot
+        ASSERT_NE(storage_blob, query_blob);
+        // query memory should be aligned
+        unsigned char address_alignment = (uintptr_t)(query_blob) % alignment;
+        ASSERT_EQ(address_alignment, 0);
+        // They need to be allocated and processed
+        ASSERT_NE(storage_blob, nullptr);
+        ASSERT_NE(query_blob, nullptr);
+        float expected_processed_storage[dim] = {initial_value + value_to_add_storage,
+                                                 initial_value, initial_value, initial_value};
+        float expected_processed_query[dim] = {initial_value + value_to_add_query, initial_value,
+                                               initial_value, initial_value};
+        VecSim_Normalize(expected_processed_storage, dim, VecSimType_FLOAT32);
+        VecSim_Normalize(expected_processed_query, dim, VecSimType_FLOAT32);
+        ASSERT_EQ(((const float *)storage_blob)[0], expected_processed_storage[0]);
+        ASSERT_EQ(((const float *)query_blob)[0], expected_processed_query[0]);
+        // the original blob should not change
+        ASSERT_NE(storage_blob, original_blob);
+        ASSERT_NE(query_blob, original_blob);
+    }
+    // The preprocessors should be released by the preprocessors container.
+}
diff --git a/tests/unit/test_fp16.cpp b/tests/unit/test_fp16.cpp
index 377ef8f32..244bb9d0c 100644
--- a/tests/unit/test_fp16.cpp
+++ b/tests/unit/test_fp16.cpp
@@ -2,7 +2,7 @@
 #include "VecSim/vec_sim.h"
 #include "VecSim/algorithms/hnsw/hnsw_single.h"
 #include "VecSim/index_factories/hnsw_factory.h"
-#include "test_utils.h"
+#include "unit_test_utils.h"
 #include "VecSim/utils/serializer.h"
 #include "mock_thread_pool.h"
 #include "VecSim/query_result_definitions.h"
diff --git a/tests/unit/test_hnsw.cpp b/tests/unit/test_hnsw.cpp
index 733f21432..20f009f48 100644
--- a/tests/unit/test_hnsw.cpp
+++ b/tests/unit/test_hnsw.cpp
@@ -9,7 +9,7 @@
 #include "VecSim/vec_sim_debug.h"
 #include "VecSim/algorithms/hnsw/hnsw_single.h"
 #include "VecSim/index_factories/hnsw_factory.h"
-#include "test_utils.h"
+#include "unit_test_utils.h"
 #include "VecSim/utils/serializer.h"
 #include "VecSim/query_result_definitions.h"
 #include <unistd.h>
@@ -36,7 +36,7 @@ class HNSWTest : public ::testing::Test {
     }
 };
 
-// DataTypeSet, TEST_DATA_T and TEST_DIST_T are defined in test_utils.h
+// DataTypeSet, TEST_DATA_T and TEST_DIST_T are defined in unit_test_utils.h
 
 TYPED_TEST_SUITE(HNSWTest, DataTypeSet);
 
diff --git a/tests/unit/test_hnsw_multi.cpp b/tests/unit/test_hnsw_multi.cpp
index ba87f1759..026f96e62 100644
--- a/tests/unit/test_hnsw_multi.cpp
+++ b/tests/unit/test_hnsw_multi.cpp
@@ -6,7 +6,7 @@
 
 #include "gtest/gtest.h"
 #include "VecSim/vec_sim.h"
-#include "test_utils.h"
+#include "unit_test_utils.h"
 #include "VecSim/algorithms/hnsw/hnsw_multi.h"
 #include <cmath>
 #include <map>
@@ -31,7 +31,7 @@ class HNSWMultiTest : public ::testing::Test {
     }
 };
 
-// DataTypeSet, TEST_DATA_T and TEST_DIST_T are defined in test_utils.h
+// DataTypeSet, TEST_DATA_T and TEST_DIST_T are defined in unit_test_utils.h
 
 TYPED_TEST_SUITE(HNSWMultiTest, DataTypeSet);
 
diff --git a/tests/unit/test_hnsw_parallel.cpp b/tests/unit/test_hnsw_parallel.cpp
index a2d4827ca..0354a6af1 100644
--- a/tests/unit/test_hnsw_parallel.cpp
+++ b/tests/unit/test_hnsw_parallel.cpp
@@ -7,7 +7,7 @@
 #include "gtest/gtest.h"
 #include "VecSim/vec_sim.h"
 #include "VecSim/algorithms/hnsw/hnsw_single.h"
-#include "test_utils.h"
+#include "unit_test_utils.h"
 #include "VecSim/query_result_definitions.h"
 #include "VecSim/vec_sim_debug.h"
 #include <unistd.h>
@@ -124,7 +124,7 @@ class HNSWTestParallel : public ::testing::Test {
     void parallelInsertSearch(bool is_multi);
 };
 
-// DataTypeSet, TEST_DATA_T and TEST_DIST_T are defined in test_utils.h
+// DataTypeSet, TEST_DATA_T and TEST_DIST_T are defined in unit_test_utils.h
 
 TYPED_TEST_SUITE(HNSWTestParallel, DataTypeSet);
 
diff --git a/tests/unit/test_hnsw_tiered.cpp b/tests/unit/test_hnsw_tiered.cpp
index 198a8293a..4f59c9021 100644
--- a/tests/unit/test_hnsw_tiered.cpp
+++ b/tests/unit/test_hnsw_tiered.cpp
@@ -6,7 +6,7 @@
 #include <string>
 #include <array>
 
-#include "test_utils.h"
+#include "unit_test_utils.h"
 #include "mock_thread_pool.h"
 
 #include <thread>
@@ -163,7 +163,8 @@ TYPED_TEST(HNSWTieredIndexTest, testIndexesAttributes) {
         dynamic_cast<MultiPreprocessorsContainer<TEST_DATA_T, 1> *>(bf_preprocessors)
             ->getPreprocessors();
     const std::type_info &bf_pp_expected_type = typeid(CosinePreprocessor<TEST_DATA_T>);
-    const std::type_info &bf_pp_actual_type = typeid(*pp_arr[0]);
+    PreprocessorInterface *bf_pp = pp_arr[0];
+    const std::type_info &bf_pp_actual_type = typeid(*bf_pp);
     ASSERT_EQ(bf_pp_actual_type, bf_pp_expected_type);
 
     // hnsw - simple
diff --git a/tests/unit/test_int8.cpp b/tests/unit/test_int8.cpp
new file mode 100644
index 000000000..e298232ba
--- /dev/null
+++ b/tests/unit/test_int8.cpp
@@ -0,0 +1,995 @@
+#include "gtest/gtest.h"
+#include "VecSim/vec_sim.h"
+#include "VecSim/algorithms/hnsw/hnsw_single.h"
+#include "tests_utils.h"
+#include "unit_test_utils.h"
+#include "mock_thread_pool.h"
+#include "VecSim/vec_sim_debug.h"
+#include "VecSim/spaces/L2/L2.h"
+#include "VecSim/spaces/IP/IP.h"
+
+class INT8Test : public ::testing::Test {
+protected:
+    virtual void SetUp(HNSWParams &params) {
+        FAIL() << "INT8Test::SetUp(HNSWParams) this method should be overriden";
+    }
+
+    virtual void SetUp(BFParams &params) {
+        FAIL() << "INT8Test::SetUp(BFParams) this method should be overriden";
+    }
+
+    virtual void SetUp(TieredIndexParams &tiered_params) {
+        FAIL() << "INT8Test::SetUp(TieredIndexParams) this method should be overriden";
+    }
+
+    virtual void TearDown() { VecSimIndex_Free(index); }
+
+    virtual const void *GetDataByInternalId(idType id) = 0;
+
+    template <typename algo_t>
+    algo_t *CastIndex() {
+        return dynamic_cast<algo_t *>(index);
+    }
+
+    template <typename algo_t>
+    algo_t *CastIndex(VecSimIndex *vecsim_index) {
+        return dynamic_cast<algo_t *>(vecsim_index);
+    }
+
+    virtual HNSWIndex<int8_t, float> *CastToHNSW() { return CastIndex<HNSWIndex<int8_t, float>>(); }
+
+    void PopulateRandomVector(int8_t *out_vec) { test_utils::populate_int8_vec(out_vec, dim); }
+    int PopulateRandomAndAddVector(size_t id, int8_t *out_vec) {
+        PopulateRandomVector(out_vec);
+        return VecSimIndex_AddVector(index, out_vec, id);
+    }
+
+    virtual int GenerateAndAddVector(size_t id, int8_t value = 1) {
+        // use unit_test_utils.h
+        return ::GenerateAndAddVector<int8_t>(index, dim, id, value);
+    }
+
+    void GenerateVector(int8_t *out_vec, int8_t value) {
+        // use unit_test_utils.h
+        return ::GenerateVector<int8_t>(out_vec, this->dim, value);
+    }
+
+    virtual int GenerateRandomAndAddVector(size_t id) {
+        int8_t v[dim];
+        PopulateRandomVector(v);
+        return VecSimIndex_AddVector(index, v, id);
+    }
+
+    size_t GetValidVectorsCount() {
+        VecSimIndexInfo info = VecSimIndex_Info(index);
+        return info.commonInfo.indexLabelCount;
+    }
+
+    template <typename params_t>
+    void create_index_test(params_t index_params);
+    template <typename params_t>
+    void element_size_test(params_t index_params);
+    template <typename params_t>
+    void search_by_id_test(params_t index_params);
+    template <typename params_t>
+    void search_by_score_test(params_t index_params);
+    template <typename params_t>
+    void metrics_test(params_t index_params);
+    template <typename params_t>
+    void search_empty_index_test(params_t index_params);
+    template <typename params_t>
+    void test_override(params_t index_params);
+    template <typename params_t>
+    void test_range_query(params_t index_params);
+    template <typename params_t>
+    void test_batch_iterator_basic(params_t index_params);
+    template <typename params_t>
+    VecSimIndexInfo test_info(params_t index_params);
+    template <typename params_t>
+    void test_info_iterator(VecSimMetric metric);
+    template <typename params_t>
+    void get_element_neighbors(params_t index_params);
+
+    VecSimIndex *index;
+    size_t dim;
+};
+
+class INT8HNSWTest : public INT8Test {
+protected:
+    virtual void SetUp(HNSWParams &params) override {
+        params.type = VecSimType_INT8;
+        VecSimParams vecsim_params = CreateParams(params);
+        index = VecSimIndex_New(&vecsim_params);
+        dim = params.dim;
+    }
+
+    virtual const void *GetDataByInternalId(idType id) override {
+        return CastIndex<HNSWIndex_Single<int8_t, float>>()->getDataByInternalId(id);
+    }
+
+    virtual HNSWIndex<int8_t, float> *CastToHNSW() override {
+        return CastIndex<HNSWIndex<int8_t, float>>(index);
+    }
+
+    HNSWIndex<int8_t, float> *CastToHNSW(VecSimIndex *new_index) {
+        return CastIndex<HNSWIndex<int8_t, float>>(new_index);
+    }
+
+    void test_info(bool is_multi);
+    void test_serialization(bool is_multi);
+};
+
+class INT8BruteForceTest : public INT8Test {
+protected:
+    virtual void SetUp(BFParams &params) override {
+        params.type = VecSimType_INT8;
+        VecSimParams vecsim_params = CreateParams(params);
+        index = VecSimIndex_New(&vecsim_params);
+        dim = params.dim;
+    }
+
+    virtual const void *GetDataByInternalId(idType id) override {
+        return CastIndex<BruteForceIndex_Single<int8_t, float>>()->getDataByInternalId(id);
+    }
+
+    virtual HNSWIndex<int8_t, float> *CastToHNSW() override {
+        ADD_FAILURE() << "INT8BruteForceTest::CastToHNSW() this method should not be called";
+        return nullptr;
+    }
+
+    void test_info(bool is_multi);
+};
+
+class INT8TieredTest : public INT8Test {
+protected:
+    TieredIndexParams generate_tiered_params(HNSWParams &hnsw_params, size_t swap_job_threshold = 1,
+                                             size_t flat_buffer_limit = SIZE_MAX) {
+        hnsw_params.type = VecSimType_INT8;
+        vecsim_hnsw_params = CreateParams(hnsw_params);
+        TieredIndexParams tiered_params = {
+            .jobQueue = &mock_thread_pool.jobQ,
+            .jobQueueCtx = mock_thread_pool.ctx,
+            .submitCb = tieredIndexMock::submit_callback,
+            .flatBufferLimit = flat_buffer_limit,
+            .primaryIndexParams = &vecsim_hnsw_params,
+            .specificParams = {TieredHNSWParams{.swapJobThreshold = swap_job_threshold}}};
+        return tiered_params;
+    }
+
+    virtual void SetUp(TieredIndexParams &tiered_params) override {
+        VecSimParams params = CreateParams(tiered_params);
+        index = VecSimIndex_New(&params);
+        dim = tiered_params.primaryIndexParams->algoParams.hnswParams.dim;
+
+        // Set the created tiered index in the index external context.
+        mock_thread_pool.ctx->index_strong_ref.reset(index);
+    }
+
+    virtual void SetUp(HNSWParams &hnsw_params) override {
+        TieredIndexParams tiered_params = generate_tiered_params(hnsw_params);
+        SetUp(tiered_params);
+    }
+
+    virtual void TearDown() override {}
+
+    virtual const void *GetDataByInternalId(idType id) override {
+        return CastIndex<BruteForceIndex<int8_t, float>>(CastToBruteForce())
+            ->getDataByInternalId(id);
+    }
+
+    virtual HNSWIndex<int8_t, float> *CastToHNSW() override {
+        auto tiered_index = dynamic_cast<TieredHNSWIndex<int8_t, float> *>(index);
+        return tiered_index->getHNSWIndex();
+    }
+
+    virtual HNSWIndex_Single<int8_t, float> *CastToHNSWSingle() {
+        return CastIndex<HNSWIndex_Single<int8_t, float>>(CastToHNSW());
+    }
+
+    VecSimIndexAbstract<int8_t, float> *CastToBruteForce() {
+        auto tiered_index = dynamic_cast<TieredHNSWIndex<int8_t, float> *>(index);
+        return tiered_index->getFlatBufferIndex();
+    }
+
+    int GenerateRandomAndAddVector(size_t id) override {
+        int8_t v[dim];
+        PopulateRandomVector(v);
+        int ret = VecSimIndex_AddVector(index, v, id);
+        mock_thread_pool.thread_iteration();
+        return ret;
+    }
+
+    int GenerateAndAddVector(size_t id, int8_t value) override {
+        // use unit_test_utils.h
+        int ret = INT8Test::GenerateAndAddVector(id, value);
+        mock_thread_pool.thread_iteration();
+        return ret;
+    }
+
+    void test_info(bool is_multi);
+    void test_info_iterator(VecSimMetric metric);
+
+    VecSimParams vecsim_hnsw_params;
+    tieredIndexMock mock_thread_pool;
+};
+
+/* ---------------------------- Create index tests ---------------------------- */
+
+template <typename params_t>
+void INT8Test::create_index_test(params_t index_params) {
+    SetUp(index_params);
+
+    ASSERT_EQ(VecSimIndex_IndexSize(index), 0);
+
+    int8_t vector[dim];
+    this->PopulateRandomVector(vector);
+    VecSimIndex_AddVector(index, vector, 0);
+
+    ASSERT_EQ(VecSimIndex_IndexSize(index), 1);
+    ASSERT_EQ(index->getDistanceFrom_Unsafe(0, vector), 0);
+
+    ASSERT_NO_FATAL_FAILURE(
+        CompareVectors(static_cast<const int8_t *>(this->GetDataByInternalId(0)), vector, dim));
+}
+
+TEST_F(INT8HNSWTest, createIndex) {
+    HNSWParams params = {.dim = 40, .M = 16, .efConstruction = 200};
+    EXPECT_NO_FATAL_FAILURE(create_index_test(params));
+    ASSERT_EQ(index->basicInfo().type, VecSimType_INT8);
+    ASSERT_EQ(index->basicInfo().algo, VecSimAlgo_HNSWLIB);
+}
+
+TEST_F(INT8BruteForceTest, createIndex) {
+    BFParams params = {.dim = 40};
+    EXPECT_NO_FATAL_FAILURE(create_index_test(params));
+    ASSERT_EQ(index->basicInfo().type, VecSimType_INT8);
+    ASSERT_EQ(index->basicInfo().algo, VecSimAlgo_BF);
+}
+
+TEST_F(INT8TieredTest, createIndex) {
+    HNSWParams params = {.dim = 40, .M = 16, .efConstruction = 200};
+    EXPECT_NO_FATAL_FAILURE(create_index_test(params));
+    ASSERT_EQ(index->basicInfo().type, VecSimType_INT8);
+    ASSERT_EQ(index->basicInfo().isTiered, true);
+}
+
+/* ---------------------------- Size Estimation tests ---------------------------- */
+
+template <typename params_t>
+void INT8Test::element_size_test(params_t index_params) {
+    SetUp(index_params);
+
+    // Estimate the memory delta of adding a single vector that requires a full new block.
+    size_t estimation = EstimateElementSize(index_params) * DEFAULT_BLOCK_SIZE;
+    size_t before = index->getAllocationSize();
+    ASSERT_EQ(this->GenerateRandomAndAddVector(0), 1);
+    size_t actual = index->getAllocationSize() - before;
+
+    // We check that the actual size is within 1% of the estimation.
+    ASSERT_GE(estimation, actual * 0.99);
+    ASSERT_LE(estimation, actual * 1.01);
+}
+
+TEST_F(INT8HNSWTest, elementSizeEstimation) {
+    size_t M = 64;
+
+    HNSWParams params = {.dim = 4, .M = M};
+    EXPECT_NO_FATAL_FAILURE(element_size_test(params));
+}
+
+TEST_F(INT8BruteForceTest, elementSizeEstimation) {
+    BFParams params = {.dim = 4};
+    EXPECT_NO_FATAL_FAILURE(element_size_test(params));
+}
+
+TEST_F(INT8TieredTest, elementSizeEstimation) {
+    size_t M = 64;
+    HNSWParams hnsw_params = {.dim = 4, .M = M};
+    VecSimParams vecsim_hnsw_params = CreateParams(hnsw_params);
+    TieredIndexParams tiered_params =
+        test_utils::CreateTieredParams(vecsim_hnsw_params, this->mock_thread_pool);
+    EXPECT_NO_FATAL_FAILURE(element_size_test(tiered_params));
+}
+
+/* ---------------------------- Functionality tests ---------------------------- */
+
+template <typename params_t>
+void INT8Test::search_by_id_test(params_t index_params) {
+    SetUp(index_params);
+
+    size_t k = 11;
+    int8_t n = 100;
+
+    for (int8_t i = 0; i < n; i++) {
+        this->GenerateAndAddVector(i, i); // {i, i, i, i}
+    }
+    ASSERT_EQ(VecSimIndex_IndexSize(index), n);
+
+    int8_t query[dim];
+    this->GenerateVector(query, 50); // {50, 50, 50, 50}
+
+    // Vectors values are equal to the id, so the 11 closest vectors are 45, 46...50
+    // (closest), 51...55
+    static size_t expected_res_order[] = {45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55};
+    auto verify_res = [&](size_t id, double score, size_t index) {
+        ASSERT_EQ(id, expected_res_order[index]);    // results are sorted by ID
+        ASSERT_EQ(score, 4 * (50 - id) * (50 - id)); // L2 distance
+    };
+
+    runTopKSearchTest(index, query, k, verify_res, nullptr, BY_ID);
+}
+
+TEST_F(INT8HNSWTest, searchByID) {
+    HNSWParams params = {.dim = 4, .M = 16, .efConstruction = 200};
+    EXPECT_NO_FATAL_FAILURE(search_by_id_test(params));
+}
+
+TEST_F(INT8BruteForceTest, searchByID) {
+    BFParams params = {.dim = 4};
+    EXPECT_NO_FATAL_FAILURE(search_by_id_test(params));
+}
+
+TEST_F(INT8TieredTest, searchByID) {
+    HNSWParams params = {.dim = 4, .M = 16, .efConstruction = 200};
+    EXPECT_NO_FATAL_FAILURE(search_by_id_test(params));
+}
+
+template <typename params_t>
+void INT8Test::search_by_score_test(params_t index_params) {
+    SetUp(index_params);
+
+    size_t k = 11;
+    size_t n = 100;
+
+    for (size_t i = 0; i < n; i++) {
+        this->GenerateAndAddVector(i, i); // {i, i, i, i}
+    }
+    ASSERT_EQ(VecSimIndex_IndexSize(index), n);
+
+    int8_t query[dim];
+    this->GenerateVector(query, 50); // {50, 50, 50, 50}
+
+    // Vectors values are equal to the id, so the 11 closest vectors are
+    // 45, 46...50 (closest), 51...55
+    static size_t expected_res_order[] = {50, 49, 51, 48, 52, 47, 53, 46, 54, 45, 55};
+    auto verify_res = [&](size_t id, double score, size_t index) {
+        ASSERT_EQ(id, expected_res_order[index]);
+        ASSERT_EQ(score, 4 * (50 - id) * (50 - id)); // L2 distance
+    };
+
+    // Search by score
+    runTopKSearchTest(index, query, k, verify_res);
+}
+
+TEST_F(INT8HNSWTest, searchByScore) {
+    HNSWParams params = {.dim = 4, .M = 16, .efConstruction = 200};
+    EXPECT_NO_FATAL_FAILURE(search_by_score_test(params));
+}
+
+TEST_F(INT8BruteForceTest, searchByScore) {
+    BFParams params = {.dim = 4};
+    EXPECT_NO_FATAL_FAILURE(search_by_score_test(params));
+}
+
+TEST_F(INT8TieredTest, searchByScore) {
+    HNSWParams params = {.dim = 4, .M = 16, .efConstruction = 200};
+    EXPECT_NO_FATAL_FAILURE(search_by_score_test(params));
+}
+
+template <typename params_t>
+void INT8Test::metrics_test(params_t index_params) {
+    SetUp(index_params);
+    size_t n = 10;
+    VecSimMetric metric = index_params.metric;
+    double expected_score = 0;
+
+    auto verify_res = [&](size_t id, double score, size_t index) {
+        ASSERT_EQ(score, expected_score) << "failed at vector id:" << id;
+    };
+
+    for (size_t i = 0; i < n; i++) {
+        int8_t vector[dim];
+        this->PopulateRandomAndAddVector(i, vector);
+
+        if (metric == VecSimMetric_Cosine) {
+            // compare with the norm stored in the index vector
+            const int8_t *index_vector = static_cast<const int8_t *>(this->GetDataByInternalId(i));
+            float index_vector_norm = *(reinterpret_cast<const float *>(index_vector + dim));
+            float vector_norm = spaces::IntegralType_ComputeNorm<int8_t>(vector, dim);
+            ASSERT_EQ(index_vector_norm, vector_norm) << "wrong vector norm for vector id:" << i;
+        } else if (metric == VecSimMetric_IP) {
+            expected_score = INT8_InnerProduct(vector, vector, dim);
+        }
+
+        // query index with k = 1 expect to get the vector
+        runTopKSearchTest(index, vector, 1, verify_res);
+        ASSERT_EQ(VecSimIndex_IndexSize(index), i + 1);
+    }
+}
+
+TEST_F(INT8HNSWTest, CosineTest) {
+    HNSWParams params = {.dim = 40, .metric = VecSimMetric_Cosine, .M = 16, .efConstruction = 200};
+    EXPECT_NO_FATAL_FAILURE(metrics_test(params));
+}
+TEST_F(INT8HNSWTest, IPTest) {
+    HNSWParams params = {.dim = 40, .metric = VecSimMetric_IP, .M = 16, .efConstruction = 200};
+    EXPECT_NO_FATAL_FAILURE((metrics_test)(params));
+}
+TEST_F(INT8HNSWTest, L2Test) {
+    HNSWParams params = {.dim = 40, .metric = VecSimMetric_L2, .M = 16, .efConstruction = 200};
+    EXPECT_NO_FATAL_FAILURE(metrics_test(params));
+}
+
+TEST_F(INT8BruteForceTest, CosineTest) {
+    BFParams params = {.dim = 40, .metric = VecSimMetric_Cosine};
+    EXPECT_NO_FATAL_FAILURE(metrics_test(params));
+}
+TEST_F(INT8BruteForceTest, IPTest) {
+    BFParams params = {.dim = 40, .metric = VecSimMetric_IP};
+    EXPECT_NO_FATAL_FAILURE((metrics_test)(params));
+}
+TEST_F(INT8BruteForceTest, L2Test) {
+    BFParams params = {.dim = 40, .metric = VecSimMetric_L2};
+    EXPECT_NO_FATAL_FAILURE(metrics_test(params));
+}
+
+TEST_F(INT8TieredTest, CosineTest) {
+    HNSWParams params = {.dim = 40, .metric = VecSimMetric_Cosine, .M = 16, .efConstruction = 200};
+    EXPECT_NO_FATAL_FAILURE(metrics_test(params));
+}
+TEST_F(INT8TieredTest, IPTest) {
+    HNSWParams params = {.dim = 40, .metric = VecSimMetric_IP, .M = 16, .efConstruction = 200};
+    EXPECT_NO_FATAL_FAILURE((metrics_test)(params));
+}
+TEST_F(INT8TieredTest, L2Test) {
+    HNSWParams params = {.dim = 40, .metric = VecSimMetric_L2, .M = 16, .efConstruction = 200};
+    EXPECT_NO_FATAL_FAILURE(metrics_test(params));
+}
+
+template <typename params_t>
+void INT8Test::search_empty_index_test(params_t params) {
+    size_t n = 100;
+    size_t k = 11;
+
+    SetUp(params);
+    ASSERT_EQ(VecSimIndex_IndexSize(index), 0);
+
+    int8_t query[dim];
+    this->GenerateVector(query, 50); // {50, 50, 50, 50}
+
+    // We do not expect any results.
+    VecSimQueryReply *res = VecSimIndex_TopKQuery(index, query, k, NULL, BY_SCORE);
+    ASSERT_EQ(VecSimQueryReply_Len(res), 0);
+    VecSimQueryReply_Iterator *it = VecSimQueryReply_GetIterator(res);
+    ASSERT_EQ(VecSimQueryReply_IteratorNext(it), nullptr);
+    VecSimQueryReply_IteratorFree(it);
+    VecSimQueryReply_Free(res);
+
+    res = VecSimIndex_RangeQuery(index, query, 1.0, NULL, BY_SCORE);
+    ASSERT_EQ(VecSimQueryReply_Len(res), 0);
+    VecSimQueryReply_Free(res);
+
+    // Add some vectors and remove them all from index, so it will be empty again.
+    for (size_t i = 0; i < n; i++) {
+        this->GenerateAndAddVector(i);
+    }
+    ASSERT_EQ(VecSimIndex_IndexSize(index), n);
+    for (size_t i = 0; i < n; i++) {
+        VecSimIndex_DeleteVector(index, i);
+    }
+    // vectors marked as deleted will be included in VecSimIndex_IndexSize
+    ASSERT_EQ(GetValidVectorsCount(), 0);
+
+    // Again - we do not expect any results.
+    res = VecSimIndex_TopKQuery(index, query, k, NULL, BY_SCORE);
+    ASSERT_EQ(VecSimQueryReply_Len(res), 0);
+    it = VecSimQueryReply_GetIterator(res);
+    ASSERT_EQ(VecSimQueryReply_IteratorNext(it), nullptr);
+    VecSimQueryReply_IteratorFree(it);
+    VecSimQueryReply_Free(res);
+
+    res = VecSimIndex_RangeQuery(index, query, 1.0, NULL, BY_SCORE);
+    ASSERT_EQ(VecSimQueryReply_Len(res), 0);
+    VecSimQueryReply_Free(res);
+}
+
+TEST_F(INT8HNSWTest, SearchEmptyIndex) {
+    HNSWParams params = {.dim = 4, .initialCapacity = 0};
+    EXPECT_NO_FATAL_FAILURE(search_empty_index_test(params));
+}
+
+TEST_F(INT8BruteForceTest, SearchEmptyIndex) {
+    BFParams params = {.dim = 4, .initialCapacity = 0};
+    EXPECT_NO_FATAL_FAILURE(search_empty_index_test(params));
+}
+
+TEST_F(INT8TieredTest, SearchEmptyIndex) {
+    HNSWParams params = {.dim = 4, .initialCapacity = 0};
+    EXPECT_NO_FATAL_FAILURE(search_empty_index_test(params));
+}
+
+template <typename params_t>
+void INT8Test::test_override(params_t params) {
+    size_t n = 50;
+    size_t new_n = 120;
+    SetUp(params);
+
+    // Insert n vectors.
+    for (size_t i = 0; i < n; i++) {
+        ASSERT_EQ(GenerateAndAddVector(i, i), 1);
+    }
+    ASSERT_EQ(VecSimIndex_IndexSize(index), n);
+
+    // Override n vectors, the first 100 will be overwritten (deleted first).
+    for (size_t i = 0; i < n; i++) {
+        ASSERT_EQ(this->GenerateAndAddVector(i, i), 0);
+    }
+
+    // Add up to new_n vectors.
+    for (size_t i = n; i < new_n; i++) {
+        ASSERT_EQ(this->GenerateAndAddVector(i, i), 1);
+    }
+
+    int8_t query[dim];
+    this->GenerateVector(query, new_n);
+
+    // Vectors values equals their id, so we expect the larger the id the closest it will be to the
+    // query.
+    auto verify_res = [&](size_t id, double score, size_t index) {
+        ASSERT_EQ(id, new_n - 1 - index) << "id: " << id << " score: " << score;
+        float diff = new_n - id;
+        float exp_score = 4 * diff * diff;
+        ASSERT_EQ(score, exp_score) << "id: " << id << " score: " << score;
+    };
+    runTopKSearchTest(index, query, 300, verify_res);
+}
+
+TEST_F(INT8HNSWTest, Override) {
+    HNSWParams params = {
+        .dim = 4, .initialCapacity = 100, .M = 8, .efConstruction = 20, .efRuntime = 250};
+    EXPECT_NO_FATAL_FAILURE(test_override(params));
+}
+
+TEST_F(INT8BruteForceTest, Override) {
+    BFParams params = {.dim = 4, .initialCapacity = 100};
+    EXPECT_NO_FATAL_FAILURE(test_override(params));
+}
+
+TEST_F(INT8TieredTest, Override) {
+    HNSWParams params = {
+        .dim = 4, .initialCapacity = 100, .M = 8, .efConstruction = 20, .efRuntime = 250};
+    EXPECT_NO_FATAL_FAILURE(test_override(params));
+}
+
+template <typename params_t>
+void INT8Test::test_range_query(params_t params) {
+    size_t n = 100;
+    SetUp(params);
+
+    int8_t pivot_value = 1;
+    int8_t pivot_vec[dim];
+    this->GenerateVector(pivot_vec, pivot_value);
+
+    int8_t radius = 20;
+    std::mt19937 gen(42);
+    std::uniform_int_distribution<int16_t> dis(pivot_value - radius, pivot_value + radius);
+
+    // insert 20 vectors near a pivot vector.
+    size_t n_close = 20;
+    for (size_t i = 0; i < n_close; i++) {
+        int8_t random_number = static_cast<int8_t>(dis(gen));
+        this->GenerateAndAddVector(i, random_number);
+    }
+
+    int8_t max_vec[dim];
+    GenerateVector(max_vec, pivot_value + radius);
+    float max_dist = INT8_L2Sqr(pivot_vec, max_vec, dim);
+
+    // Add more vectors far from the pivot vector
+    for (size_t i = n_close; i < n; i++) {
+        int8_t random_number = static_cast<int8_t>(dis(gen));
+        GenerateAndAddVector(i, 50 + random_number);
+    }
+    ASSERT_EQ(VecSimIndex_IndexSize(index), n);
+
+    auto verify_res_by_score = [&](size_t id, double score, size_t index) {
+        ASSERT_LE(id, n_close - 1) << "score: " << score;
+        ASSERT_LE(score, max_dist);
+    };
+    size_t expected_num_results = n_close;
+
+    runRangeQueryTest(index, pivot_vec, max_dist, verify_res_by_score, expected_num_results,
+                      BY_SCORE);
+}
+
+TEST_F(INT8HNSWTest, rangeQuery) {
+    HNSWParams params = {.dim = 4};
+    EXPECT_NO_FATAL_FAILURE(test_range_query(params));
+}
+
+TEST_F(INT8BruteForceTest, rangeQuery) {
+    BFParams params = {.dim = 4};
+    EXPECT_NO_FATAL_FAILURE(test_range_query(params));
+}
+
+TEST_F(INT8TieredTest, rangeQuery) {
+    HNSWParams params = {.dim = 4};
+    EXPECT_NO_FATAL_FAILURE(test_range_query(params));
+}
+
+/* ---------------------------- Batch iterator tests ---------------------------- */
+
+template <typename params_t>
+void INT8Test::test_batch_iterator_basic(params_t params) {
+    SetUp(params);
+    size_t n = 100;
+
+    // For every i, add the vector (i,i,i,i) under the label i.
+    for (size_t i = 0; i < n; i++) {
+        ASSERT_EQ(this->GenerateAndAddVector(i, i), 1);
+    }
+
+    ASSERT_EQ(VecSimIndex_IndexSize(index), n);
+
+    // Query for (n,n,n,n) vector (recall that n-1 is the largest id in te index).
+    int8_t query[dim];
+    GenerateVector(query, n);
+
+    VecSimBatchIterator *batchIterator = VecSimBatchIterator_New(index, query, nullptr);
+    size_t iteration_num = 0;
+
+    // Get the 5 vectors whose ids are the maximal among those that hasn't been returned yet
+    // in every iteration. The results order should be sorted by their score (distance from the
+    // query vector), which means sorted from the largest id to the lowest.
+    size_t n_res = 5;
+    while (VecSimBatchIterator_HasNext(batchIterator)) {
+        std::vector<size_t> expected_ids(n_res);
+        for (size_t i = 0; i < n_res; i++) {
+            expected_ids[i] = (n - iteration_num * n_res - i - 1);
+        }
+        auto verify_res = [&](size_t id, double score, size_t index) {
+            ASSERT_EQ(expected_ids[index], id)
+                << "iteration_num: " << iteration_num << " index: " << index << " score: " << score;
+        };
+        runBatchIteratorSearchTest(batchIterator, n_res, verify_res);
+        iteration_num++;
+    }
+    ASSERT_EQ(iteration_num, n / n_res);
+    VecSimBatchIterator_Free(batchIterator);
+}
+
+TEST_F(INT8HNSWTest, BatchIteratorBasic) {
+    HNSWParams params = {.dim = 4, .M = 8, .efConstruction = 20, .efRuntime = 100};
+    EXPECT_NO_FATAL_FAILURE(test_batch_iterator_basic(params));
+}
+
+TEST_F(INT8BruteForceTest, BatchIteratorBasic) {
+    BFParams params = {.dim = 4};
+    EXPECT_NO_FATAL_FAILURE(test_batch_iterator_basic(params));
+}
+
+TEST_F(INT8TieredTest, BatchIteratorBasic) {
+    HNSWParams params = {.dim = 4, .M = 8, .efConstruction = 20, .efRuntime = 100};
+    EXPECT_NO_FATAL_FAILURE(test_batch_iterator_basic(params));
+}
+
+/* ---------------------------- Info tests ---------------------------- */
+
+template <typename params_t>
+VecSimIndexInfo INT8Test::test_info(params_t params) {
+    SetUp(params);
+    VecSimIndexInfo info = VecSimIndex_Info(index);
+    EXPECT_EQ(info.commonInfo.basicInfo.dim, params.dim);
+    EXPECT_EQ(info.commonInfo.basicInfo.isMulti, params.multi);
+    EXPECT_EQ(info.commonInfo.basicInfo.type, VecSimType_INT8);
+    EXPECT_EQ(info.commonInfo.basicInfo.blockSize, DEFAULT_BLOCK_SIZE);
+    EXPECT_EQ(info.commonInfo.indexSize, 0);
+    EXPECT_EQ(info.commonInfo.indexLabelCount, 0);
+    EXPECT_EQ(info.commonInfo.memory, index->getAllocationSize());
+    EXPECT_EQ(info.commonInfo.basicInfo.metric, VecSimMetric_L2);
+
+    // Validate that basic info returns the right restricted info as well.
+    VecSimIndexBasicInfo s_info = VecSimIndex_BasicInfo(index);
+    EXPECT_EQ(info.commonInfo.basicInfo.algo, s_info.algo);
+    EXPECT_EQ(info.commonInfo.basicInfo.dim, s_info.dim);
+    EXPECT_EQ(info.commonInfo.basicInfo.blockSize, s_info.blockSize);
+    EXPECT_EQ(info.commonInfo.basicInfo.type, s_info.type);
+    EXPECT_EQ(info.commonInfo.basicInfo.isMulti, s_info.isMulti);
+    EXPECT_EQ(info.commonInfo.basicInfo.type, s_info.type);
+    EXPECT_EQ(info.commonInfo.basicInfo.isTiered, s_info.isTiered);
+
+    return info;
+}
+
+void INT8HNSWTest::test_info(bool is_multi) {
+    HNSWParams params = {.dim = 128, .multi = is_multi};
+    VecSimIndexInfo info = INT8Test::test_info(params);
+    ASSERT_EQ(info.commonInfo.basicInfo.algo, VecSimAlgo_HNSWLIB);
+
+    ASSERT_EQ(info.hnswInfo.M, HNSW_DEFAULT_M);
+    ASSERT_EQ(info.hnswInfo.efConstruction, HNSW_DEFAULT_EF_C);
+    ASSERT_EQ(info.hnswInfo.efRuntime, HNSW_DEFAULT_EF_RT);
+    ASSERT_DOUBLE_EQ(info.hnswInfo.epsilon, HNSW_DEFAULT_EPSILON);
+}
+TEST_F(INT8HNSWTest, testInfoSingle) { test_info(false); }
+TEST_F(INT8HNSWTest, testInfoMulti) { test_info(true); }
+
+void INT8BruteForceTest::test_info(bool is_multi) {
+    BFParams params = {.dim = 128, .multi = is_multi};
+    VecSimIndexInfo info = INT8Test::test_info(params);
+    ASSERT_EQ(info.commonInfo.basicInfo.algo, VecSimAlgo_BF);
+}
+
+TEST_F(INT8BruteForceTest, testInfoSingle) { test_info(false); }
+TEST_F(INT8BruteForceTest, testInfoMulti) { test_info(true); }
+
+void INT8TieredTest::test_info(bool is_multi) {
+    size_t bufferLimit = SIZE_MAX;
+    HNSWParams hnsw_params = {.dim = 128, .multi = is_multi};
+
+    VecSimIndexInfo info = INT8Test::test_info(hnsw_params);
+    ASSERT_EQ(info.commonInfo.basicInfo.algo, VecSimAlgo_HNSWLIB);
+    VecSimIndexInfo frontendIndexInfo = CastToBruteForce()->info();
+    VecSimIndexInfo backendIndexInfo = CastToHNSW()->info();
+
+    compareCommonInfo(info.tieredInfo.frontendCommonInfo, frontendIndexInfo.commonInfo);
+    compareFlatInfo(info.tieredInfo.bfInfo, frontendIndexInfo.bfInfo);
+    compareCommonInfo(info.tieredInfo.backendCommonInfo, backendIndexInfo.commonInfo);
+    compareHNSWInfo(info.tieredInfo.backendInfo.hnswInfo, backendIndexInfo.hnswInfo);
+
+    EXPECT_EQ(info.commonInfo.memory, info.tieredInfo.management_layer_memory +
+                                          backendIndexInfo.commonInfo.memory +
+                                          frontendIndexInfo.commonInfo.memory);
+    EXPECT_EQ(info.tieredInfo.backgroundIndexing, false);
+    EXPECT_EQ(info.tieredInfo.bufferLimit, bufferLimit);
+    EXPECT_EQ(info.tieredInfo.specificTieredBackendInfo.hnswTieredInfo.pendingSwapJobsThreshold, 1);
+
+    INT8Test::GenerateAndAddVector(1, 1);
+    info = index->info();
+
+    EXPECT_EQ(info.commonInfo.indexSize, 1);
+    EXPECT_EQ(info.commonInfo.indexLabelCount, 1);
+    EXPECT_EQ(info.tieredInfo.backendCommonInfo.indexSize, 0);
+    EXPECT_EQ(info.tieredInfo.backendCommonInfo.indexLabelCount, 0);
+    EXPECT_EQ(info.tieredInfo.frontendCommonInfo.indexSize, 1);
+    EXPECT_EQ(info.tieredInfo.frontendCommonInfo.indexLabelCount, 1);
+    EXPECT_EQ(info.commonInfo.memory, info.tieredInfo.management_layer_memory +
+                                          info.tieredInfo.backendCommonInfo.memory +
+                                          info.tieredInfo.frontendCommonInfo.memory);
+    EXPECT_EQ(info.tieredInfo.backgroundIndexing, true);
+
+    mock_thread_pool.thread_iteration();
+    info = index->info();
+
+    EXPECT_EQ(info.commonInfo.indexSize, 1);
+    EXPECT_EQ(info.commonInfo.indexLabelCount, 1);
+    EXPECT_EQ(info.tieredInfo.backendCommonInfo.indexSize, 1);
+    EXPECT_EQ(info.tieredInfo.backendCommonInfo.indexLabelCount, 1);
+    EXPECT_EQ(info.tieredInfo.frontendCommonInfo.indexSize, 0);
+    EXPECT_EQ(info.tieredInfo.frontendCommonInfo.indexLabelCount, 0);
+    EXPECT_EQ(info.commonInfo.memory, info.tieredInfo.management_layer_memory +
+                                          info.tieredInfo.backendCommonInfo.memory +
+                                          info.tieredInfo.frontendCommonInfo.memory);
+    EXPECT_EQ(info.tieredInfo.backgroundIndexing, false);
+
+    if (is_multi) {
+        INT8Test::GenerateAndAddVector(1, 1);
+        info = index->info();
+
+        EXPECT_EQ(info.commonInfo.indexSize, 2);
+        EXPECT_EQ(info.commonInfo.indexLabelCount, 1);
+        EXPECT_EQ(info.tieredInfo.backendCommonInfo.indexSize, 1);
+        EXPECT_EQ(info.tieredInfo.backendCommonInfo.indexLabelCount, 1);
+        EXPECT_EQ(info.tieredInfo.frontendCommonInfo.indexSize, 1);
+        EXPECT_EQ(info.tieredInfo.frontendCommonInfo.indexLabelCount, 1);
+        EXPECT_EQ(info.commonInfo.memory, info.tieredInfo.management_layer_memory +
+                                              info.tieredInfo.backendCommonInfo.memory +
+                                              info.tieredInfo.frontendCommonInfo.memory);
+        EXPECT_EQ(info.tieredInfo.backgroundIndexing, true);
+    }
+
+    VecSimIndex_DeleteVector(index, 1);
+    info = index->info();
+
+    EXPECT_EQ(info.commonInfo.indexSize, 0);
+    EXPECT_EQ(info.commonInfo.indexLabelCount, 0);
+    EXPECT_EQ(info.tieredInfo.backendCommonInfo.indexSize, 0);
+    EXPECT_EQ(info.tieredInfo.backendCommonInfo.indexLabelCount, 0);
+    EXPECT_EQ(info.tieredInfo.frontendCommonInfo.indexSize, 0);
+    EXPECT_EQ(info.tieredInfo.frontendCommonInfo.indexLabelCount, 0);
+    EXPECT_EQ(info.commonInfo.memory, info.tieredInfo.management_layer_memory +
+                                          info.tieredInfo.backendCommonInfo.memory +
+                                          info.tieredInfo.frontendCommonInfo.memory);
+    EXPECT_EQ(info.tieredInfo.backgroundIndexing, false);
+}
+
+TEST_F(INT8TieredTest, testInfoSingle) { test_info(false); }
+TEST_F(INT8TieredTest, testInfoMulti) { test_info(true); }
+
+template <typename params_t>
+void INT8Test::test_info_iterator(VecSimMetric metric) {
+    params_t params = {.dim = 128, .metric = metric};
+    SetUp(params);
+    VecSimIndexInfo info = VecSimIndex_Info(index);
+    VecSimInfoIterator *infoIter = VecSimIndex_InfoIterator(index);
+    VecSimAlgo algo = info.commonInfo.basicInfo.algo;
+    if (algo == VecSimAlgo_HNSWLIB) {
+        compareHNSWIndexInfoToIterator(info, infoIter);
+    } else if (algo == VecSimAlgo_BF) {
+        compareFlatIndexInfoToIterator(info, infoIter);
+    }
+    VecSimInfoIterator_Free(infoIter);
+}
+
+TEST_F(INT8BruteForceTest, InfoIteratorCosine) {
+    test_info_iterator<BFParams>(VecSimMetric_Cosine);
+}
+TEST_F(INT8BruteForceTest, InfoIteratorIP) { test_info_iterator<BFParams>(VecSimMetric_IP); }
+TEST_F(INT8BruteForceTest, InfoIteratorL2) { test_info_iterator<BFParams>(VecSimMetric_L2); }
+TEST_F(INT8HNSWTest, InfoIteratorCosine) { test_info_iterator<HNSWParams>(VecSimMetric_Cosine); }
+TEST_F(INT8HNSWTest, InfoIteratorIP) { test_info_iterator<HNSWParams>(VecSimMetric_IP); }
+TEST_F(INT8HNSWTest, InfoIteratorL2) { test_info_iterator<HNSWParams>(VecSimMetric_L2); }
+
+void INT8TieredTest::test_info_iterator(VecSimMetric metric) {
+    size_t n = 100;
+    size_t d = 128;
+    HNSWParams params = {.dim = d, .metric = metric, .initialCapacity = n};
+    SetUp(params);
+    VecSimIndexInfo info = VecSimIndex_Info(index);
+    VecSimInfoIterator *infoIter = VecSimIndex_InfoIterator(index);
+    VecSimIndexInfo frontendIndexInfo = CastToBruteForce()->info();
+    VecSimIndexInfo backendIndexInfo = CastToHNSW()->info();
+    VecSimInfoIterator_Free(infoIter);
+}
+
+TEST_F(INT8TieredTest, InfoIteratorCosine) { test_info_iterator(VecSimMetric_Cosine); }
+TEST_F(INT8TieredTest, InfoIteratorIP) { test_info_iterator(VecSimMetric_IP); }
+TEST_F(INT8TieredTest, InfoIteratorL2) { test_info_iterator(VecSimMetric_L2); }
+
+/* ---------------------------- HNSW specific tests ---------------------------- */
+
+void INT8HNSWTest::test_serialization(bool is_multi) {
+    size_t dim = 4;
+    size_t n = 1001;
+    size_t n_labels[] = {n, 100};
+    size_t M = 8;
+    size_t ef = 10;
+    double epsilon = 0.004;
+    size_t blockSize = 20;
+    std::string multiToString[] = {"single", "multi_100labels_"};
+
+    HNSWParams params{.type = VecSimType_INT8,
+                      .dim = dim,
+                      .metric = VecSimMetric_Cosine,
+                      .multi = is_multi,
+                      .initialCapacity = n,
+                      .blockSize = blockSize,
+                      .M = M,
+                      .efConstruction = ef,
+                      .efRuntime = ef,
+                      .epsilon = epsilon};
+    SetUp(params);
+
+    auto *hnsw_index = this->CastToHNSW();
+
+    int8_t data[n * dim];
+
+    for (size_t i = 0; i < n * dim; i += dim) {
+        test_utils::populate_int8_vec(data + i, dim, i);
+    }
+
+    for (size_t j = 0; j < n; ++j) {
+        VecSimIndex_AddVector(index, data + dim * j, j % n_labels[is_multi]);
+    }
+
+    auto file_name = std::string(getenv("ROOT")) + "/tests/unit/1k-d4-L2-M8-ef_c10_" +
+                     VecSimType_ToString(VecSimType_INT8) + "_" + multiToString[is_multi] +
+                     ".hnsw_current_version";
+
+    // Save the index with the default version (V3).
+    hnsw_index->saveIndex(file_name);
+
+    // Fetch info after saving, as memory size change during saving.
+    VecSimIndexInfo info = VecSimIndex_Info(index);
+    ASSERT_EQ(info.commonInfo.basicInfo.algo, VecSimAlgo_HNSWLIB);
+    ASSERT_EQ(info.hnswInfo.M, M);
+    ASSERT_EQ(info.hnswInfo.efConstruction, ef);
+    ASSERT_EQ(info.hnswInfo.efRuntime, ef);
+    ASSERT_EQ(info.commonInfo.indexSize, n);
+    ASSERT_EQ(info.commonInfo.basicInfo.metric, VecSimMetric_Cosine);
+    ASSERT_EQ(info.commonInfo.basicInfo.type, VecSimType_INT8);
+    ASSERT_EQ(info.commonInfo.basicInfo.dim, dim);
+    ASSERT_EQ(info.commonInfo.indexLabelCount, n_labels[is_multi]);
+
+    // Load the index from the file.
+    VecSimIndex *serialized_index = HNSWFactory::NewIndex(file_name);
+    auto *serialized_hnsw_index = this->CastToHNSW(serialized_index);
+
+    // Verify that the index was loaded as expected.
+    ASSERT_TRUE(serialized_hnsw_index->checkIntegrity().valid_state);
+
+    VecSimIndexInfo info2 = VecSimIndex_Info(serialized_index);
+    ASSERT_EQ(info2.commonInfo.basicInfo.algo, VecSimAlgo_HNSWLIB);
+    ASSERT_EQ(info2.hnswInfo.M, M);
+    ASSERT_EQ(info2.commonInfo.basicInfo.isMulti, is_multi);
+    ASSERT_EQ(info2.commonInfo.basicInfo.blockSize, blockSize);
+    ASSERT_EQ(info2.hnswInfo.efConstruction, ef);
+    ASSERT_EQ(info2.hnswInfo.efRuntime, ef);
+    ASSERT_EQ(info2.commonInfo.indexSize, n);
+    ASSERT_EQ(info2.commonInfo.basicInfo.metric, VecSimMetric_Cosine);
+    ASSERT_EQ(info2.commonInfo.basicInfo.type, VecSimType_INT8);
+    ASSERT_EQ(info2.commonInfo.basicInfo.dim, dim);
+    ASSERT_EQ(info2.commonInfo.indexLabelCount, n_labels[is_multi]);
+    ASSERT_EQ(info2.hnswInfo.epsilon, epsilon);
+
+    // Check the functionality of the loaded index.
+
+    int8_t new_vec[dim];
+    this->PopulateRandomVector(new_vec);
+    VecSimIndex_AddVector(serialized_index, new_vec, n);
+    auto verify_res = [&](size_t id, double score, size_t index) {
+        ASSERT_EQ(id, n) << "score: " << score;
+        ASSERT_NEAR(score, 0.0, 1e-7);
+    };
+    runTopKSearchTest(serialized_index, new_vec, 1, verify_res);
+    VecSimIndex_DeleteVector(serialized_index, 1);
+
+    size_t n_per_label = n / n_labels[is_multi];
+    ASSERT_TRUE(serialized_hnsw_index->checkIntegrity().valid_state);
+    ASSERT_EQ(VecSimIndex_IndexSize(serialized_index), n + 1 - n_per_label);
+
+    // Clean up.
+    remove(file_name.c_str());
+    VecSimIndex_Free(serialized_index);
+}
+
+TEST_F(INT8HNSWTest, SerializationCurrentVersion) { test_serialization(false); }
+
+TEST_F(INT8HNSWTest, SerializationCurrentVersionMulti) { test_serialization(true); }
+
+template <typename params_t>
+void INT8Test::get_element_neighbors(params_t params) {
+    size_t n = 0;
+
+    SetUp(params);
+    auto *hnsw_index = CastToHNSW();
+
+    // Add vectors until we have at least 2 vectors at level 1.
+    size_t vectors_in_higher_levels = 0;
+    while (vectors_in_higher_levels < 2) {
+        GenerateAndAddVector(n, n);
+        if (hnsw_index->getGraphDataByInternalId(n)->toplevel > 0) {
+            vectors_in_higher_levels++;
+        }
+        n++;
+    }
+    ASSERT_GE(n, 1) << "n: " << n;
+
+    // Go over all vectors and validate that the getElementNeighbors debug command returns the
+    // neighbors properly.
+    for (size_t id = 0; id < n; id++) {
+        ElementLevelData &cur = hnsw_index->getElementLevelData(id, 0);
+        int **neighbors_output;
+        VecSimDebug_GetElementNeighborsInHNSWGraph(index, id, &neighbors_output);
+        auto graph_data = hnsw_index->getGraphDataByInternalId(id);
+        for (size_t l = 0; l <= graph_data->toplevel; l++) {
+            auto &level_data = hnsw_index->getElementLevelData(graph_data, l);
+            auto &neighbours = neighbors_output[l];
+            ASSERT_EQ(neighbours[0], level_data.numLinks);
+            for (size_t j = 1; j <= neighbours[0]; j++) {
+                ASSERT_EQ(neighbours[j], level_data.links[j - 1]);
+            }
+        }
+        VecSimDebug_ReleaseElementNeighborsInHNSWGraph(neighbors_output);
+    }
+}
+
+TEST_F(INT8HNSWTest, getElementNeighbors) {
+    HNSWParams params = {.dim = 4, .M = 20};
+    get_element_neighbors(params);
+}
+
+TEST_F(INT8TieredTest, getElementNeighbors) {
+    HNSWParams params = {.dim = 4, .M = 20};
+    get_element_neighbors(params);
+}
diff --git a/tests/unit/test_spaces.cpp b/tests/unit/test_spaces.cpp
index 7cf7de92b..e554c88ef 100644
--- a/tests/unit/test_spaces.cpp
+++ b/tests/unit/test_spaces.cpp
@@ -23,9 +23,11 @@
 #include "VecSim/spaces/functions/AVX512BW_VBMI2.h"
 #include "VecSim/spaces/functions/AVX512BF16_VL.h"
 #include "VecSim/spaces/functions/AVX512FP16_VL.h"
+#include "VecSim/spaces/functions/AVX512F_BW_VL_VNNI.h"
 #include "VecSim/spaces/functions/AVX2.h"
 #include "VecSim/spaces/functions/SSE3.h"
 #include "VecSim/spaces/functions/F16C.h"
+#include "tests_utils.h"
 
 using bfloat16 = vecsim_types::bfloat16;
 using float16 = vecsim_types::float16;
@@ -102,6 +104,21 @@ TEST_F(SpacesTest, fp16_l2_no_optimization_func_test) {
     ASSERT_EQ(dist, FP32_L2Sqr((const void *)sanity_a, (const void *)sanity_b, dim));
 }
 
+TEST_F(SpacesTest, int8_l2_no_optimization_func_test) {
+    size_t dim = 5;
+
+    int8_t a[dim], b[dim];
+    for (size_t i = 0; i < dim; i++) {
+        a[i] = (i + 1);
+        b[i] = (i + 2);
+    }
+
+    float dist = INT8_L2Sqr((const void *)a, (const void *)b, dim);
+    ASSERT_EQ(dist, 5.0);
+}
+
+/* ======================== IP NO OPT ======================== */
+
 TEST_F(SpacesTest, float_ip_no_optimization_func_test) {
     size_t dim = 5;
 
@@ -211,6 +228,36 @@ TEST_F(SpacesTest, fp16_ip_no_optimization_func_test) {
     ASSERT_EQ(dist, FP32_InnerProduct((const void *)sanity_a, (const void *)sanity_b, dim));
 }
 
+TEST_F(SpacesTest, int8_ip_no_optimization_func_test) {
+    size_t dim = 4;
+    int8_t a[] = {1, 0, 0, 0};
+    int8_t b[] = {1, 0, 0, 0};
+
+    float dist = INT8_InnerProduct((const void *)a, (const void *)b, dim);
+    ASSERT_EQ(dist, 0.0);
+}
+
+/* ======================== Cosine NO OPT ======================== */
+
+TEST_F(SpacesTest, int8_Cosine_no_optimization_func_test) {
+    size_t dim = 4;
+    // create a vector with extra space for the norm
+    int8_t v1[dim + sizeof(float)];
+    int8_t v2[dim + sizeof(float)];
+
+    test_utils::populate_int8_vec(v1, dim, 123);
+    test_utils::populate_int8_vec(v2, dim, 123);
+
+    // write the norm at the end of the vector
+    *(float *)(v1 + dim) = test_utils::integral_compute_norm(v1, dim);
+    *(float *)(v2 + dim) = test_utils::integral_compute_norm(v2, dim);
+
+    float dist = INT8_Cosine((const void *)v1, (const void *)v2, dim);
+    ASSERT_NEAR(dist, 0.0, 0.000001);
+}
+
+/* ======================== Test Getters ======================== */
+
 TEST_F(SpacesTest, GetDistFuncInvalidMetricFP32) {
     EXPECT_THROW(
         (spaces::GetDistFunc<float, float>((VecSimMetric)(VecSimMetric_Cosine + 1), 10, nullptr)),
@@ -231,6 +278,11 @@ TEST_F(SpacesTest, GetDistFuncInvalidMetricFP16) {
         (spaces::GetDistFunc<float16, float>((VecSimMetric)(VecSimMetric_Cosine + 1), 10, nullptr)),
         std::invalid_argument);
 }
+TEST_F(SpacesTest, GetDistFuncInvalidMetricINT8) {
+    EXPECT_THROW(
+        (spaces::GetDistFunc<int8_t, float>((VecSimMetric)(VecSimMetric_Cosine + 1), 10, nullptr)),
+        std::invalid_argument);
+}
 
 using namespace spaces;
 
@@ -241,27 +293,38 @@ TEST_F(SpacesTest, smallDimChooser) {
         ASSERT_EQ(L2_FP64_GetDistFunc(dim), FP64_L2Sqr);
         ASSERT_EQ(L2_BF16_GetDistFunc(dim), BF16_L2Sqr_LittleEndian);
         ASSERT_EQ(L2_FP16_GetDistFunc(dim), FP16_L2Sqr);
+        ASSERT_EQ(L2_INT8_GetDistFunc(dim), INT8_L2Sqr);
         ASSERT_EQ(IP_FP32_GetDistFunc(dim), FP32_InnerProduct);
         ASSERT_EQ(IP_FP64_GetDistFunc(dim), FP64_InnerProduct);
         ASSERT_EQ(IP_BF16_GetDistFunc(dim), BF16_InnerProduct_LittleEndian);
         ASSERT_EQ(IP_FP16_GetDistFunc(dim), FP16_InnerProduct);
+        ASSERT_EQ(IP_INT8_GetDistFunc(dim), INT8_InnerProduct);
+        ASSERT_EQ(Cosine_INT8_GetDistFunc(dim), INT8_Cosine);
     }
     for (size_t dim = 8; dim < 16; dim++) {
         ASSERT_EQ(L2_FP32_GetDistFunc(dim), FP32_L2Sqr);
         ASSERT_EQ(L2_BF16_GetDistFunc(dim), BF16_L2Sqr_LittleEndian);
         ASSERT_EQ(L2_FP16_GetDistFunc(dim), FP16_L2Sqr);
+        ASSERT_EQ(L2_INT8_GetDistFunc(dim), INT8_L2Sqr);
         ASSERT_EQ(IP_FP32_GetDistFunc(dim), FP32_InnerProduct);
         ASSERT_EQ(IP_BF16_GetDistFunc(dim), BF16_InnerProduct_LittleEndian);
         ASSERT_EQ(IP_FP16_GetDistFunc(dim), FP16_InnerProduct);
+        ASSERT_EQ(IP_INT8_GetDistFunc(dim), INT8_InnerProduct);
+        ASSERT_EQ(Cosine_INT8_GetDistFunc(dim), INT8_Cosine);
     }
     for (size_t dim = 16; dim < 32; dim++) {
         ASSERT_EQ(L2_BF16_GetDistFunc(dim), BF16_L2Sqr_LittleEndian);
         ASSERT_EQ(L2_FP16_GetDistFunc(dim), FP16_L2Sqr);
+        ASSERT_EQ(L2_INT8_GetDistFunc(dim), INT8_L2Sqr);
         ASSERT_EQ(IP_BF16_GetDistFunc(dim), BF16_InnerProduct_LittleEndian);
         ASSERT_EQ(IP_FP16_GetDistFunc(dim), FP16_InnerProduct);
+        ASSERT_EQ(IP_INT8_GetDistFunc(dim), INT8_InnerProduct);
+        ASSERT_EQ(Cosine_INT8_GetDistFunc(dim), INT8_Cosine);
     }
 }
 
+/* ======================== Test SIMD Functions ======================== */
+
 // In this following tests we assume that compiler supports all X86 optimizations, so if we have
 // some hardware flag enabled, we check that the corresponding optimization function was chosen.
 #ifdef CPU_FEATURES_ARCH_X86_64
@@ -849,4 +912,118 @@ INSTANTIATE_TEST_SUITE_P(, FP16SpacesOptimizationTestAdvanced,
 
 #endif
 
+class INT8SpacesOptimizationTest : public testing::TestWithParam<size_t> {};
+
+TEST_P(INT8SpacesOptimizationTest, INT8L2SqrTest) {
+    auto optimization = cpu_features::GetX86Info().features;
+    size_t dim = GetParam();
+    int8_t v1[dim];
+    int8_t v2[dim];
+    test_utils::populate_int8_vec(v1, dim, 123);
+    test_utils::populate_int8_vec(v2, dim, 1234);
+
+    auto expected_alignment = [](size_t reg_bit_size, size_t dim) {
+        size_t elements_in_reg = reg_bit_size / sizeof(int8_t) / 8;
+        return (dim % elements_in_reg == 0) ? elements_in_reg * sizeof(int8_t) : 0;
+    };
+
+    dist_func_t<float> arch_opt_func;
+    float baseline = INT8_L2Sqr(v1, v2, dim);
+#ifdef OPT_AVX512_F_BW_VL_VNNI
+    if (optimization.avx512f && optimization.avx512bw && optimization.avx512vl &&
+        optimization.avx512vnni) {
+        unsigned char alignment = 0;
+        arch_opt_func = L2_INT8_GetDistFunc(dim, &alignment, &optimization);
+        ASSERT_EQ(arch_opt_func, Choose_INT8_L2_implementation_AVX512F_BW_VL_VNNI(dim))
+            << "Unexpected distance function chosen for dim " << dim;
+        ASSERT_EQ(baseline, arch_opt_func(v1, v2, dim)) << "AVX512 with dim " << dim;
+        ASSERT_EQ(alignment, expected_alignment(256, dim)) << "AVX512 with dim " << dim;
+        // Unset optimizations flag, so we'll choose the next optimization.
+        optimization.avx512f = optimization.avx512bw = optimization.avx512vl =
+            optimization.avx512vnni = 0;
+    }
+#endif
+    unsigned char alignment = 0;
+    arch_opt_func = L2_INT8_GetDistFunc(dim, &alignment, &optimization);
+    ASSERT_EQ(arch_opt_func, INT8_L2Sqr) << "Unexpected distance function chosen for dim " << dim;
+    ASSERT_EQ(baseline, arch_opt_func(v1, v2, dim)) << "No optimization with dim " << dim;
+    ASSERT_EQ(alignment, 0) << "No optimization with dim " << dim;
+}
+
+TEST_P(INT8SpacesOptimizationTest, INT8InnerProductTest) {
+    auto optimization = cpu_features::GetX86Info().features;
+    size_t dim = GetParam();
+    int8_t v1[dim];
+    int8_t v2[dim];
+    test_utils::populate_int8_vec(v1, dim, 123);
+    test_utils::populate_int8_vec(v2, dim, 1234);
+
+    auto expected_alignment = [](size_t reg_bit_size, size_t dim) {
+        size_t elements_in_reg = reg_bit_size / sizeof(int8_t) / 8;
+        return (dim % elements_in_reg == 0) ? elements_in_reg * sizeof(int8_t) : 0;
+    };
+
+    dist_func_t<float> arch_opt_func;
+    float baseline = INT8_InnerProduct(v1, v2, dim);
+#ifdef OPT_AVX512_F_BW_VL_VNNI
+    if (optimization.avx512f && optimization.avx512bw && optimization.avx512vl &&
+        optimization.avx512vnni) {
+        unsigned char alignment = 0;
+        arch_opt_func = IP_INT8_GetDistFunc(dim, &alignment, &optimization);
+        ASSERT_EQ(arch_opt_func, Choose_INT8_IP_implementation_AVX512F_BW_VL_VNNI(dim))
+            << "Unexpected distance function chosen for dim " << dim;
+        ASSERT_EQ(baseline, arch_opt_func(v1, v2, dim)) << "AVX512 with dim " << dim;
+        ASSERT_EQ(alignment, expected_alignment(256, dim)) << "AVX512 with dim " << dim;
+        // Unset optimizations flag, so we'll choose the next optimization.
+        optimization.avx512f = optimization.avx512bw = optimization.avx512vl =
+            optimization.avx512vnni = 0;
+    }
+#endif
+    unsigned char alignment = 0;
+    arch_opt_func = IP_INT8_GetDistFunc(dim, &alignment, &optimization);
+    ASSERT_EQ(arch_opt_func, INT8_InnerProduct)
+        << "Unexpected distance function chosen for dim " << dim;
+    ASSERT_EQ(baseline, arch_opt_func(v1, v2, dim)) << "No optimization with dim " << dim;
+    ASSERT_EQ(alignment, 0) << "No optimization with dim " << dim;
+}
+
+TEST_P(INT8SpacesOptimizationTest, INT8CosineTest) {
+    auto optimization = cpu_features::GetX86Info().features;
+    size_t dim = GetParam();
+    int8_t v1[dim + sizeof(float)];
+    int8_t v2[dim + sizeof(float)];
+    test_utils::populate_int8_vec(v1, dim, 123);
+    test_utils::populate_int8_vec(v2, dim, 1234);
+
+    // write the norm at the end of the vector
+    *(float *)(v1 + dim) = test_utils::integral_compute_norm(v1, dim);
+    *(float *)(v2 + dim) = test_utils::integral_compute_norm(v2, dim);
+
+    dist_func_t<float> arch_opt_func;
+    float baseline = INT8_Cosine(v1, v2, dim);
+#ifdef OPT_AVX512_F_BW_VL_VNNI
+    if (optimization.avx512f && optimization.avx512bw && optimization.avx512vl &&
+        optimization.avx512vnni) {
+        unsigned char alignment = 0;
+        arch_opt_func = Cosine_INT8_GetDistFunc(dim, &alignment, &optimization);
+        ASSERT_EQ(arch_opt_func, Choose_INT8_Cosine_implementation_AVX512F_BW_VL_VNNI(dim))
+            << "Unexpected distance function chosen for dim " << dim;
+        ASSERT_EQ(baseline, arch_opt_func(v1, v2, dim)) << "AVX512 with dim " << dim;
+        // We don't align int8 vectors with cosine distance
+        ASSERT_EQ(alignment, 0) << "AVX512 with dim " << dim;
+        // Unset optimizations flag, so we'll choose the next optimization.
+        optimization.avx512f = optimization.avx512bw = optimization.avx512vl =
+            optimization.avx512vnni = 0;
+    }
+#endif
+    unsigned char alignment = 0;
+    arch_opt_func = Cosine_INT8_GetDistFunc(dim, &alignment, &optimization);
+    ASSERT_EQ(arch_opt_func, INT8_Cosine) << "Unexpected distance function chosen for dim " << dim;
+    ASSERT_EQ(baseline, arch_opt_func(v1, v2, dim)) << "No optimization with dim " << dim;
+    ASSERT_EQ(alignment, 0) << "No optimization with dim " << dim;
+}
+
+INSTANTIATE_TEST_SUITE_P(INT8OptFuncs, INT8SpacesOptimizationTest,
+                         testing::Range(32UL, 32 * 2UL + 1));
+
 #endif // CPU_FEATURES_ARCH_X86_64
diff --git a/tests/unit/test_utils.cpp b/tests/unit/unit_test_utils.cpp
similarity index 86%
rename from tests/unit/test_utils.cpp
rename to tests/unit/unit_test_utils.cpp
index 7b99eba22..41d8dbcb5 100644
--- a/tests/unit/test_utils.cpp
+++ b/tests/unit/unit_test_utils.cpp
@@ -4,7 +4,7 @@
  *the Server Side Public License v1 (SSPLv1).
  */
 
-#include "test_utils.h"
+#include "unit_test_utils.h"
 #include "gtest/gtest.h"
 #include "VecSim/utils/vec_utils.h"
 #include "VecSim/memory/vecsim_malloc.h"
@@ -46,6 +46,7 @@ VecSimQueryParams CreateQueryParams(const HNSWRuntimeParams &RuntimeParams) {
 
 static bool is_async_index(VecSimIndex *index) {
     return dynamic_cast<VecSimTieredIndex<float, float> *>(index) != nullptr ||
+           dynamic_cast<VecSimTieredIndex<int8_t, float> *>(index) != nullptr ||
            dynamic_cast<VecSimTieredIndex<double, double> *>(index) != nullptr;
 }
 
@@ -376,3 +377,69 @@ size_t getLabelsLookupNodeSize() {
     size_t memory_after = allocator->getAllocationSize();
     return memory_after - memory_before;
 }
+namespace test_utils {
+size_t CalcVectorDataSize(VecSimIndex *index, VecSimType data_type) {
+    switch (data_type) {
+    case VecSimType_FLOAT32: {
+        VecSimIndexAbstract<float, float> *abs_index =
+            dynamic_cast<VecSimIndexAbstract<float, float> *>(index);
+        assert(abs_index &&
+               "dynamic_cast failed: can't convert index to VecSimIndexAbstract<float, float>");
+        return abs_index->getDataSize();
+    }
+    case VecSimType_FLOAT64: {
+        VecSimIndexAbstract<double, double> *abs_index =
+            dynamic_cast<VecSimIndexAbstract<double, double> *>(index);
+        assert(abs_index &&
+               "dynamic_cast failed: can't convert index to VecSimIndexAbstract<double, double>");
+        return abs_index->getDataSize();
+    }
+    case VecSimType_BFLOAT16: {
+        VecSimIndexAbstract<vecsim_types::bfloat16, float> *abs_index =
+            dynamic_cast<VecSimIndexAbstract<vecsim_types::bfloat16, float> *>(index);
+        assert(abs_index && "dynamic_cast failed: can't convert index to "
+                            "VecSimIndexAbstract<vecsim_types::bfloat16, float>");
+        return abs_index->getDataSize();
+    }
+    case VecSimType_FLOAT16: {
+        VecSimIndexAbstract<vecsim_types::float16, float> *abs_index =
+            dynamic_cast<VecSimIndexAbstract<vecsim_types::float16, float> *>(index);
+        assert(abs_index && "dynamic_cast failed: can't convert index to "
+                            "VecSimIndexAbstract<vecsim_types::float16, float>");
+        return abs_index->getDataSize();
+    }
+    case VecSimType_INT8: {
+        VecSimIndexAbstract<int8_t, float> *abs_index =
+            dynamic_cast<VecSimIndexAbstract<int8_t, float> *>(index);
+        assert(abs_index &&
+               "dynamic_cast failed: can't convert index to VecSimIndexAbstract<int8_t, float>");
+        return abs_index->getDataSize();
+    }
+    default:
+        return 0;
+    }
+}
+
+TieredIndexParams CreateTieredParams(VecSimParams &primary_params,
+                                     tieredIndexMock &mock_thread_pool) {
+    TieredIndexParams tiered_params = {.jobQueue = &mock_thread_pool.jobQ,
+                                       .jobQueueCtx = mock_thread_pool.ctx,
+                                       .submitCb = tieredIndexMock::submit_callback,
+                                       .flatBufferLimit = SIZE_MAX,
+                                       .primaryIndexParams = &primary_params,
+                                       .specificParams = {TieredHNSWParams{.swapJobThreshold = 0}}};
+
+    return tiered_params;
+}
+
+VecSimIndex *CreateNewTieredHNSWIndex(const HNSWParams &hnsw_params,
+                                      tieredIndexMock &mock_thread_pool) {
+    VecSimParams primary_params = CreateParams(hnsw_params);
+    auto tiered_params = CreateTieredParams(primary_params, mock_thread_pool);
+    VecSimParams params = CreateParams(tiered_params);
+    VecSimIndex *index = VecSimIndex_New(&params);
+    mock_thread_pool.ctx->index_strong_ref.reset(index);
+
+    return index;
+}
+} // namespace test_utils
diff --git a/tests/unit/test_utils.h b/tests/unit/unit_test_utils.h
similarity index 91%
rename from tests/unit/test_utils.h
rename to tests/unit/unit_test_utils.h
index 54478cfd7..cafde7552 100644
--- a/tests/unit/test_utils.h
+++ b/tests/unit/unit_test_utils.h
@@ -13,6 +13,7 @@
 
 #include "VecSim/vec_sim.h"
 #include "VecSim/algorithms/hnsw/hnsw_tiered.h"
+#include "mock_thread_pool.h"
 #include "gtest/gtest.h"
 
 // IndexType is used to define indices unit tests
@@ -99,6 +100,11 @@ inline VecSimIndex *CreateNewIndex(IndexParams &index_params, VecSimType type,
     return VecSimIndex_New(&params);
 }
 
+TieredIndexParams CreateTieredParams(VecSimParams &primary_params,
+                                     tieredIndexMock &mock_thread_pool);
+VecSimIndex *CreateNewTieredHNSWIndex(const HNSWParams &hnsw_params,
+                                      tieredIndexMock &mock_thread_pool);
+
 extern VecsimQueryType query_types[4];
 
 } // namespace test_utils
@@ -162,6 +168,16 @@ inline double GetInfVal(VecSimType type) {
         throw std::invalid_argument("This type is not supported");
     }
 }
+// TODO: Move all test_utils to this namespace
+namespace test_utils {
+size_t CalcVectorDataSize(VecSimIndex *index, VecSimType data_type);
+
+template <typename data_t, typename dist_t>
+TieredHNSWIndex<data_t, dist_t> *cast_to_tiered_index(VecSimIndex *index) {
+    return dynamic_cast<TieredHNSWIndex<data_t, dist_t> *>(index);
+}
+
+} // namespace test_utils
 
 // Test a specific exception type is thrown and prints the right message.
 #define ASSERT_EXCEPTION_MESSAGE(VALUE, EXCEPTION_TYPE, MESSAGE)                                   \
diff --git a/tests/utils/tests_utils.h b/tests/utils/tests_utils.h
new file mode 100644
index 000000000..0bf8bca53
--- /dev/null
+++ b/tests/utils/tests_utils.h
@@ -0,0 +1,28 @@
+#pragma once
+
+#include <random>
+#include <vector>
+#include "VecSim/spaces/normalize/compute_norm.h"
+
+namespace test_utils {
+
+// Assuming v is a memory allocation of size dim * sizeof(float)
+static void populate_int8_vec(int8_t *v, size_t dim, int seed = 1234) {
+
+    std::mt19937 gen(seed); // Mersenne Twister engine initialized with the fixed seed
+
+    // uniform_int_distribution doesn't support int8,
+    // Define a distribution range for int8_t
+    std::uniform_int_distribution<int16_t> dis(-128, 127);
+
+    for (size_t i = 0; i < dim; i++) {
+        v[i] = static_cast<int8_t>(dis(gen));
+    }
+}
+
+template <typename datatype>
+float integral_compute_norm(const datatype *vec, size_t dim) {
+    return spaces::IntegralType_ComputeNorm<datatype>(vec, dim);
+}
+
+} // namespace test_utils