RedisAI · meiravgri · Dec 24, 2024 · Dec 23, 2024
diff --git a/.github/workflows/flow-temp.yml b/.github/workflows/flow-temp.yml
@@ -11,11 +11,11 @@ on:
   push:
     branches-ignore: ['**'] # ignore all branches. Comment this line to run your workflow below on every push.
 jobs:
-  jammy:
-    uses: ./.github/workflows/task-unit-test.yml
-    with:
-      container: ubuntu:jammy
-      run-valgrind: true
+  # jammy:
+  #   uses: ./.github/workflows/task-unit-test.yml
+  #   with:
+  #     container: ubuntu:jammy
+  #     run-valgrind: true
   # alpine3:
   #   uses: ./.github/workflows/task-unit-test.yml
   #   with:
@@ -32,11 +32,11 @@ jobs:
   #   with:
   #     container: ubuntu:focal
   #     run-valgrind: false
-  # bullseye:
-  #   uses: ./.github/workflows/task-unit-test.yml
-  #   with:
-  #     container: debian:bullseye
-  #     run-valgrind: false
+  bullseye:
+    uses: ./.github/workflows/task-unit-test.yml
+    with:
+      container: debian:bullseye
+      run-valgrind: false
   # amazonlinux2:
   #   uses: ./.github/workflows/task-unit-test.yml
   #   with:

diff --git a/cmake/x86_64InstructionFlags.cmake b/cmake/x86_64InstructionFlags.cmake
@@ -13,6 +13,7 @@ if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "(x86_64)|(AMD64|amd64)|(^i.86$)")
 	CHECK_CXX_COMPILER_FLAG(-mavx512vbmi2 CXX_AVX512VBMI2)
 	CHECK_CXX_COMPILER_FLAG(-mavx512fp16 CXX_AVX512FP16)
 	CHECK_CXX_COMPILER_FLAG(-mavx512f CXX_AVX512F)
+	CHECK_CXX_COMPILER_FLAG(-mavx512vnni CXX_AVX512VNNI)
 	CHECK_CXX_COMPILER_FLAG(-mavx2 CXX_AVX2)
 	CHECK_CXX_COMPILER_FLAG(-mavx CXX_AVX)
 	CHECK_CXX_COMPILER_FLAG(-mf16c CXX_F16C)
@@ -48,6 +49,10 @@ if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "(x86_64)|(AMD64|amd64)|(^i.86$)")
 		add_compile_definitions(OPT_AVX512_BW_VBMI2)
 	endif()
 
+	if(CXX_AVX512F AND CXX_AVX512BW AND CXX_AVX512VL AND CXX_AVX512VNNI)
+		add_compile_definitions(OPT_AVX512_F_BW_VL_VNNI)
+	endif()
+
 	if(CXX_F16C AND CXX_FMA AND CXX_AVX)
 		add_compile_definitions(OPT_F16C)
 	endif()

diff --git a/src/VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h b/src/VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h
@@ -57,6 +57,8 @@ INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTestBasic_switchDeleteModes_Test)
 
 friend class BF16TieredTest;
 friend class FP16TieredTest;
+friend class INT8TieredTest;
+friend class CommonTypeMetricTieredTests_TestDataSizeTieredHNSW_Test;
 
 INDEX_TEST_FRIEND_CLASS(BM_VecSimBasics)
 INDEX_TEST_FRIEND_CLASS(BM_VecSimCommon)
diff --git a/src/VecSim/index_factories/brute_force_factory.cpp b/src/VecSim/index_factories/brute_force_factory.cpp
@@ -33,10 +33,12 @@
 static AbstractIndexInitParams NewAbstractInitParams(const VecSimParams *params) {
 
     const BFParams *bfParams = &params->algoParams.bfParams;
+    size_t dataSize = VecSimParams_GetDataSize(bfParams->type, bfParams->dim, bfParams->metric);
     AbstractIndexInitParams abstractInitParams = {.allocator =
                                                       VecSimAllocator::newVecsimAllocator(),
                                                   .dim = bfParams->dim,
                                                   .vecType = bfParams->type,
+                                                  .dataSize = dataSize,
                                                   .metric = bfParams->metric,
                                                   .blockSize = bfParams->blockSize,
                                                   .multi = bfParams->multi,
@@ -52,32 +54,30 @@
 
 VecSimIndex *NewIndex(const BFParams *bfparams, const AbstractIndexInitParams &abstractInitParams,
                       bool is_normalized) {
-    // If the index metric is Cosine, and is_normalized == true, we will skip normalizing vectors
-    // and query blobs.
-    VecSimMetric metric;
-    if (is_normalized && bfparams->metric == VecSimMetric_Cosine) {
-        metric = VecSimMetric_IP;
-    } else {
-        metric = bfparams->metric;
-    }
+
     if (bfparams->type == VecSimType_FLOAT32) {
         IndexComponents<float, float> indexComponents = CreateIndexComponents<float, float>(
-            abstractInitParams.allocator, metric, bfparams->dim);
+            abstractInitParams.allocator, bfparams->metric, bfparams->dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<float>(bfparams, abstractInitParams, indexComponents);
     } else if (bfparams->type == VecSimType_FLOAT64) {
         IndexComponents<double, double> indexComponents = CreateIndexComponents<double, double>(
-            abstractInitParams.allocator, metric, bfparams->dim);
+            abstractInitParams.allocator, bfparams->metric, bfparams->dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<double>(bfparams, abstractInitParams, indexComponents);
     } else if (bfparams->type == VecSimType_BFLOAT16) {
         IndexComponents<bfloat16, float> indexComponents = CreateIndexComponents<bfloat16, float>(
-            abstractInitParams.allocator, metric, bfparams->dim);
+            abstractInitParams.allocator, bfparams->metric, bfparams->dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<bfloat16, float>(bfparams, abstractInitParams,
                                                              indexComponents);
     } else if (bfparams->type == VecSimType_FLOAT16) {
         IndexComponents<float16, float> indexComponents = CreateIndexComponents<float16, float>(
-            abstractInitParams.allocator, metric, bfparams->dim);
+            abstractInitParams.allocator, bfparams->metric, bfparams->dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<float16, float>(bfparams, abstractInitParams,
                                                             indexComponents);
+    } else if (bfparams->type == VecSimType_INT8) {
+        IndexComponents<int8_t, float> indexComponents = CreateIndexComponents<int8_t, float>(
+            abstractInitParams.allocator, bfparams->metric, bfparams->dim, is_normalized);
+        return NewIndex_ChooseMultiOrSingle<int8_t, float>(bfparams, abstractInitParams,
+                                                           indexComponents);
     }
 
     // If we got here something is wrong.
@@ -117,6 +117,11 @@
     } else if (params->type == VecSimType_FLOAT16) {
         est += EstimateComponentsMemory<float16, float>(params->metric, is_normalized);
         est += EstimateInitialSize_ChooseMultiOrSingle<float16, float>(params->multi);
+    } else if (params->type == VecSimType_INT8) {
+        est += EstimateComponentsMemory<int8_t, float>(params->metric, is_normalized);
+        est += EstimateInitialSize_ChooseMultiOrSingle<int8_t, float>(params->multi);
+    } else {
+        throw std::invalid_argument("Invalid params->type");
     }
 
     est += sizeof(DataBlocksContainer) + allocations_overhead;

diff --git a/src/VecSim/index_factories/components/components_factory.h b/src/VecSim/index_factories/components/components_factory.h
@@ -14,14 +14,24 @@
 
 template <typename DataType, typename DistType>
 IndexComponents<DataType, DistType>
-CreateIndexComponents(std::shared_ptr<VecSimAllocator> allocator, VecSimMetric metric, size_t dim) {
+CreateIndexComponents(std::shared_ptr<VecSimAllocator> allocator, VecSimMetric metric, size_t dim,
+                      bool is_normalized) {
     unsigned char alignment = 0;
     spaces::dist_func_t<DistType> distFunc =
         spaces::GetDistFunc<DataType, DistType>(metric, dim, &alignment);
     // Currently we have only one distance calculator implementation
     auto indexCalculator = new (allocator) DistanceCalculatorCommon<DistType>(allocator, distFunc);
 
-    PreprocessorsContainerParams ppParams = {.metric = metric, .dim = dim, .alignment = alignment};
+    // If the index metric is Cosine, and is_normalized == true, we will skip normalizing vectors
+    // and query blobs.
+    VecSimMetric pp_metric;
+    if (is_normalized && metric == VecSimMetric_Cosine) {
+        pp_metric = VecSimMetric_IP;
+    } else {
+        pp_metric = metric;
+    }
+    PreprocessorsContainerParams ppParams = {
+        .metric = pp_metric, .dim = dim, .alignment = alignment};
     auto preprocessors = CreatePreprocessorsContainer<DataType>(allocator, ppParams);
 
     return {indexCalculator, preprocessors};

diff --git a/src/VecSim/index_factories/hnsw_factory.cpp b/src/VecSim/index_factories/hnsw_factory.cpp
@@ -33,10 +33,14 @@
 
 static AbstractIndexInitParams NewAbstractInitParams(const VecSimParams *params) {
     const HNSWParams *hnswParams = &params->algoParams.hnswParams;
+
+    size_t dataSize =
+        VecSimParams_GetDataSize(hnswParams->type, hnswParams->dim, hnswParams->metric);
     AbstractIndexInitParams abstractInitParams = {.allocator =
                                                       VecSimAllocator::newVecsimAllocator(),
                                                   .dim = hnswParams->dim,
                                                   .vecType = hnswParams->type,
+                                                  .dataSize = dataSize,
                                                   .metric = hnswParams->metric,
                                                   .blockSize = hnswParams->blockSize,
                                                   .multi = hnswParams->multi,
@@ -48,36 +52,32 @@
     const HNSWParams *hnswParams = &params->algoParams.hnswParams;
     AbstractIndexInitParams abstractInitParams = NewAbstractInitParams(params);
 
-    // If the index metric is Cosine, and is_normalized == true, we will skip normalizing vectors
-    // and query blobs.
-    VecSimMetric metric;
-    if (is_normalized && hnswParams->metric == VecSimMetric_Cosine) {
-        metric = VecSimMetric_IP;
-    } else {
-        metric = hnswParams->metric;
-    }
-
     if (hnswParams->type == VecSimType_FLOAT32) {
         IndexComponents<float, float> indexComponents = CreateIndexComponents<float, float>(
-            abstractInitParams.allocator, metric, hnswParams->dim);
+            abstractInitParams.allocator, hnswParams->metric, hnswParams->dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<float>(hnswParams, abstractInitParams, indexComponents);
 
     } else if (hnswParams->type == VecSimType_FLOAT64) {
         IndexComponents<double, double> indexComponents = CreateIndexComponents<double, double>(
-            abstractInitParams.allocator, metric, hnswParams->dim);
+            abstractInitParams.allocator, hnswParams->metric, hnswParams->dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<double>(hnswParams, abstractInitParams,
                                                     indexComponents);
 
     } else if (hnswParams->type == VecSimType_BFLOAT16) {
         IndexComponents<bfloat16, float> indexComponents = CreateIndexComponents<bfloat16, float>(
-            abstractInitParams.allocator, metric, hnswParams->dim);
+            abstractInitParams.allocator, hnswParams->metric, hnswParams->dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<bfloat16, float>(hnswParams, abstractInitParams,
                                                              indexComponents);
     } else if (hnswParams->type == VecSimType_FLOAT16) {
         IndexComponents<float16, float> indexComponents = CreateIndexComponents<float16, float>(
-            abstractInitParams.allocator, metric, hnswParams->dim);
+            abstractInitParams.allocator, hnswParams->metric, hnswParams->dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<float16, float>(hnswParams, abstractInitParams,
                                                             indexComponents);
+    } else if (hnswParams->type == VecSimType_INT8) {
+        IndexComponents<int8_t, float> indexComponents = CreateIndexComponents<int8_t, float>(
+            abstractInitParams.allocator, hnswParams->metric, hnswParams->dim, is_normalized);
+        return NewIndex_ChooseMultiOrSingle<int8_t, float>(hnswParams, abstractInitParams,
+                                                           indexComponents);
     }
 
     // If we got here something is wrong.
@@ -114,6 +114,11 @@
     } else if (params->type == VecSimType_FLOAT16) {
         est += EstimateComponentsMemory<float16, float>(params->metric, is_normalized);
         est += EstimateInitialSize_ChooseMultiOrSingle<float16, float>(params->multi);
+    } else if (params->type == VecSimType_INT8) {
+        est += EstimateComponentsMemory<int8_t, float>(params->metric, is_normalized);
+        est += EstimateInitialSize_ChooseMultiOrSingle<int8_t, float>(params->multi);
+    } else {
+        throw std::invalid_argument("Invalid params->type");
     }
     est += sizeof(DataBlocksContainer) + allocations_overhead;
 
@@ -205,34 +210,32 @@
     VecSimParams vecsimParams = {.algo = VecSimAlgo_HNSWLIB,
                                  .algoParams = {.hnswParams = HNSWParams{params}}};
 
-    VecSimMetric metric;
-    if (is_normalized && params.metric == VecSimMetric_Cosine) {
-        metric = VecSimMetric_IP;
-    } else {
-        metric = params.metric;
-    }
-
     AbstractIndexInitParams abstractInitParams = NewAbstractInitParams(&vecsimParams);
     if (params.type == VecSimType_FLOAT32) {
         IndexComponents<float, float> indexComponents = CreateIndexComponents<float, float>(
-            abstractInitParams.allocator, metric, abstractInitParams.dim);
+            abstractInitParams.allocator, params.metric, abstractInitParams.dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<float>(input, &params, abstractInitParams,
                                                    indexComponents, version);
     } else if (params.type == VecSimType_FLOAT64) {
         IndexComponents<double, double> indexComponents = CreateIndexComponents<double, double>(
-            abstractInitParams.allocator, metric, abstractInitParams.dim);
+            abstractInitParams.allocator, params.metric, abstractInitParams.dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<double>(input, &params, abstractInitParams,
                                                     indexComponents, version);
     } else if (params.type == VecSimType_BFLOAT16) {
         IndexComponents<bfloat16, float> indexComponents = CreateIndexComponents<bfloat16, float>(
-            abstractInitParams.allocator, metric, abstractInitParams.dim);
+            abstractInitParams.allocator, params.metric, abstractInitParams.dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<bfloat16, float>(input, &params, abstractInitParams,
                                                              indexComponents, version);
     } else if (params.type == VecSimType_FLOAT16) {
         IndexComponents<float16, float> indexComponents = CreateIndexComponents<float16, float>(
-            abstractInitParams.allocator, metric, abstractInitParams.dim);
+            abstractInitParams.allocator, params.metric, abstractInitParams.dim, is_normalized);
         return NewIndex_ChooseMultiOrSingle<float16, float>(input, &params, abstractInitParams,
                                                             indexComponents, version);
+    } else if (params.type == VecSimType_INT8) {
+        IndexComponents<int8_t, float> indexComponents = CreateIndexComponents<int8_t, float>(
+            abstractInitParams.allocator, params.metric, abstractInitParams.dim, is_normalized);
+        return NewIndex_ChooseMultiOrSingle<int8_t, float>(input, &params, abstractInitParams,
+                                                           indexComponents, version);
     } else {
         auto bad_name = VecSimType_ToString(params.type);
         if (bad_name == nullptr) {

diff --git a/src/VecSim/index_factories/tiered_factory.cpp b/src/VecSim/index_factories/tiered_factory.cpp
@@ -42,9 +42,12 @@
     BFParams bf_params = NewBFParams(params);
 
     std::shared_ptr<VecSimAllocator> flat_allocator = VecSimAllocator::newVecsimAllocator();
+    size_t dataSize = VecSimParams_GetDataSize(bf_params.type, bf_params.dim, bf_params.metric);
+
     AbstractIndexInitParams abstractInitParams = {.allocator = flat_allocator,
                                                   .dim = bf_params.dim,
                                                   .vecType = bf_params.type,
+                                                  .dataSize = dataSize,
                                                   .metric = bf_params.metric,
                                                   .blockSize = bf_params.blockSize,
                                                   .multi = bf_params.multi,
@@ -80,6 +83,10 @@
         est += sizeof(TieredHNSWIndex<bfloat16, float>);
     } else if (hnsw_params.type == VecSimType_FLOAT16) {
         est += sizeof(TieredHNSWIndex<float16, float>);
+    } else if (hnsw_params.type == VecSimType_INT8) {
+        est += sizeof(TieredHNSWIndex<int8_t, float>);
+    } else {
+        throw std::invalid_argument("Invalid hnsw_params.type");
     }
 
     return est;
@@ -96,6 +103,8 @@
         return TieredHNSWFactory::NewIndex<bfloat16, float>(params);
     } else if (type == VecSimType_FLOAT16) {
         return TieredHNSWFactory::NewIndex<float16, float>(params);
+    } else if (type == VecSimType_INT8) {
+        return TieredHNSWFactory::NewIndex<int8_t, float>(params);
     }
     return nullptr; // Invalid type.
 }

diff --git a/src/VecSim/spaces/CMakeLists.txt b/src/VecSim/spaces/CMakeLists.txt
@@ -44,6 +44,12 @@ if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "(x86_64)|(AMD64|amd64)|(^i.86$)")
 		list(APPEND OPTIMIZATIONS functions/AVX512F.cpp)
 	endif()
 
+	if(CXX_AVX512F AND CXX_AVX512BW AND CXX_AVX512VL AND CXX_AVX512VNNI)
+		message("Building with AVX512F, AVX512BW, AVX512VL and AVX512VNNI")
+		set_source_files_properties(functions/AVX512F_BW_VL_VNNI.cpp PROPERTIES COMPILE_FLAGS "-mavx512f -mavx512bw -mavx512vl -mavx512vnni")
+		list(APPEND OPTIMIZATIONS functions/AVX512F_BW_VL_VNNI.cpp)
+	endif()
+
 	if(CXX_AVX2)
 		message("Building with AVX2")
 		set_source_files_properties(functions/AVX2.cpp PROPERTIES COMPILE_FLAGS -mavx2)

diff --git a/src/VecSim/spaces/IP/IP.cpp b/src/VecSim/spaces/IP/IP.cpp
@@ -66,3 +66,27 @@ float FP16_InnerProduct(const void *pVect1, const void *pVect2, size_t dimension
     }
     return 1.0f - res;
 }
+
+static inline int INT8_InnerProductImp(const void *pVect1v, const void *pVect2v, size_t dimension) {
+    int8_t *pVect1 = (int8_t *)pVect1v;
+    int8_t *pVect2 = (int8_t *)pVect2v;
+
+    int res = 0;
+    for (size_t i = 0; i < dimension; i++) {
+        res += pVect1[i] * pVect2[i];
+    }
+    return res;
+}
+
+float INT8_InnerProduct(const void *pVect1v, const void *pVect2v, size_t dimension) {
+    return 1 - INT8_InnerProductImp(pVect1v, pVect2v, dimension);
+}
+
+float INT8_Cosine(const void *pVect1v, const void *pVect2v, size_t dimension) {
+    // We expect the vectors' norm to be stored at the end of the vector.
+    float norm_v1 =
+        *reinterpret_cast<const float *>(static_cast<const int8_t *>(pVect1v) + dimension);
+    float norm_v2 =
+        *reinterpret_cast<const float *>(static_cast<const int8_t *>(pVect2v) + dimension);
+    return 1.0f - float(INT8_InnerProductImp(pVect1v, pVect2v, dimension)) / (norm_v1 * norm_v2);
+}
diff --git a/src/VecSim/spaces/IP/IP.h b/src/VecSim/spaces/IP/IP.h
@@ -16,3 +16,6 @@ float FP16_InnerProduct(const void *pVect1, const void *pVect2, size_t dimension
 
 float BF16_InnerProduct_LittleEndian(const void *pVect1v, const void *pVect2v, size_t dimension);
 float BF16_InnerProduct_BigEndian(const void *pVect1v, const void *pVect2v, size_t dimension);
+
+float INT8_InnerProduct(const void *pVect1, const void *pVect2, size_t dimension);
+float INT8_Cosine(const void *pVect1, const void *pVect2, size_t dimension);