From 6858240583ed9fc65ee674eb0e0b02a52b138e49 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Mon, 6 May 2024 14:19:41 +0000
Subject: [PATCH 01/15] remove bw_vl arch (not needed, avx512 is enough)

---
 src/VecSim/spaces/CMakeLists.txt              | 11 +------
 ...IP_AVX512BW_VL_FP16.h => IP_AVX512_FP16.h} |  3 +-
 src/VecSim/spaces/IP_space.cpp                |  7 ++---
 ...L2_AVX512BW_VL_FP16.h => L2_AVX512_FP16.h} |  2 +-
 src/VecSim/spaces/L2_space.cpp                |  7 ++---
 src/VecSim/spaces/functions/AVX512.cpp        | 15 ++++++++++
 src/VecSim/spaces/functions/AVX512.h          |  3 ++
 src/VecSim/spaces/functions/AVX512BW_VL.cpp   | 30 -------------------
 src/VecSim/spaces/functions/AVX512BW_VL.h     | 16 ----------
 tests/benchmark/CMakeLists.txt                |  5 ----
 tests/benchmark/spaces_benchmarks/bm_spaces.h |  1 -
 .../spaces_benchmarks/bm_spaces_fp16.cpp      |  8 ++---
 tests/unit/test_spaces.cpp                    | 13 ++++----
 13 files changed, 37 insertions(+), 84 deletions(-)
 rename src/VecSim/spaces/IP/{IP_AVX512BW_VL_FP16.h => IP_AVX512_FP16.h} (93%)
 rename src/VecSim/spaces/L2/{L2_AVX512BW_VL_FP16.h => L2_AVX512_FP16.h} (95%)
 delete mode 100644 src/VecSim/spaces/functions/AVX512BW_VL.cpp
 delete mode 100644 src/VecSim/spaces/functions/AVX512BW_VL.h
diff --git a/src/VecSim/spaces/CMakeLists.txt b/src/VecSim/spaces/CMakeLists.txt
index 2050cf7ae..2ff1618aa 100644
--- a/src/VecSim/spaces/CMakeLists.txt
+++ b/src/VecSim/spaces/CMakeLists.txt
@@ -19,7 +19,6 @@ if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "(x86_64)|(AMD64|amd64)|(^i.86$)")
 	# from gcc14+ -mavx512bw is implicitly enabled when -mavx512vbmi2 is requested
 	CHECK_CXX_COMPILER_FLAG(-mavx512bw CXX_AVX512BW)
 	CHECK_CXX_COMPILER_FLAG(-mavx512vbmi2 CXX_AVX512VBMI2)
-	CHECK_CXX_COMPILER_FLAG(-mavx512vl CXX_AVX512VL)
 	CHECK_CXX_COMPILER_FLAG(-mavx512f CXX_AVX512F)
 	CHECK_CXX_COMPILER_FLAG(-mavx2 CXX_AVX2)
 	CHECK_CXX_COMPILER_FLAG(-mavx CXX_AVX)
@@ -37,17 +36,9 @@ if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "(x86_64)|(AMD64|amd64)|(^i.86$)")
 		add_compile_definitions(OPT_AVX512_BW_VBMI2)
 	endif()
 
-	if(CXX_AVX512BW AND CXX_AVX512VL)
-		message("Building with AVX512BW and AVX512VL")
-		set_source_files_properties(functions/AVX512BW_VL.cpp PROPERTIES
-				COMPILE_FLAGS "-mavx512f -mf16c -mavx512bw -mavx512vl")
-		list(APPEND OPTIMIZATIONS functions/AVX512BW_VL.cpp)
-		add_compile_definitions(OPT_AVX512_BW_VL)
-	endif()
-
 	if(CXX_AVX512F)
 		message("Building with AVX512")
-		set_source_files_properties(functions/AVX512.cpp PROPERTIES COMPILE_FLAGS "-mavx512f -mf16c")
+		set_source_files_properties(functions/AVX512.cpp PROPERTIES COMPILE_FLAGS "-mavx512f")
 		list(APPEND OPTIMIZATIONS functions/AVX512.cpp)
 		add_compile_definitions(OPT_AVX512F)
 	endif()
diff --git a/src/VecSim/spaces/IP/IP_AVX512BW_VL_FP16.h b/src/VecSim/spaces/IP/IP_AVX512_FP16.h
similarity index 93%
rename from src/VecSim/spaces/IP/IP_AVX512BW_VL_FP16.h
rename to src/VecSim/spaces/IP/IP_AVX512_FP16.h
index 32cdca84e..c08343783 100644
--- a/src/VecSim/spaces/IP/IP_AVX512BW_VL_FP16.h
+++ b/src/VecSim/spaces/IP/IP_AVX512_FP16.h
@@ -23,8 +23,7 @@ static void InnerProductStep(float16 *&pVect1, float16 *&pVect2, __m512 &sum) {
 }
 
 template <unsigned short residual> // 0..31
-float FP16_InnerProductSIMD32_AVX512BW_VL(const void *pVect1v, const void *pVect2v,
-                                          size_t dimension) {
+float FP16_InnerProductSIMD32_AVX512(const void *pVect1v, const void *pVect2v, size_t dimension) {
     auto *pVect1 = (uint16_t *)pVect1v;
     auto *pVect2 = (uint16_t *)pVect2v;
 
diff --git a/src/VecSim/spaces/IP_space.cpp b/src/VecSim/spaces/IP_space.cpp
index 1bc822a6c..38b121131 100644
--- a/src/VecSim/spaces/IP_space.cpp
+++ b/src/VecSim/spaces/IP_space.cpp
@@ -10,7 +10,6 @@
 #include "VecSim/types/bfloat16.h"
 #include "VecSim/types/float16.h"
 #include "VecSim/spaces/functions/AVX512.h"
-#include "VecSim/spaces/functions/AVX512BW_VL.h"
 #include "VecSim/spaces/functions/F16C.h"
 #include "VecSim/spaces/functions/AVX.h"
 #include "VecSim/spaces/functions/SSE.h"
@@ -161,11 +160,11 @@ dist_func_t<float> IP_FP16_GetDistFunc(size_t dim, const void *arch_opt, unsigne
     auto features = (arch_opt == nullptr)
                         ? cpu_features::GetX86Info().features
                         : *static_cast<const cpu_features::X86Features *>(arch_opt);
-    if (features.avx512bw && features.avx512vl) {
-#ifdef OPT_AVX512_BW_VL
+    if (features.avx512f) {
+#ifdef OPT_AVX512F
         if (dim % 32 == 0) // no point in aligning if we have an offsetting residual
             *alignment = 32 * sizeof(float16); // handles 32 floats
-        return Choose_FP16_IP_implementation_AVX512BW_VL(dim);
+        return Choose_FP16_IP_implementation_AVX512(dim);
 #endif
     }
     if (features.f16c && features.fma3 && features.avx) {
diff --git a/src/VecSim/spaces/L2/L2_AVX512BW_VL_FP16.h b/src/VecSim/spaces/L2/L2_AVX512_FP16.h
similarity index 95%
rename from src/VecSim/spaces/L2/L2_AVX512BW_VL_FP16.h
rename to src/VecSim/spaces/L2/L2_AVX512_FP16.h
index 8b24c6a83..6b73774a8 100644
--- a/src/VecSim/spaces/L2/L2_AVX512BW_VL_FP16.h
+++ b/src/VecSim/spaces/L2/L2_AVX512_FP16.h
@@ -24,7 +24,7 @@ static void L2SqrStep(float16 *&pVect1, float16 *&pVect2, __m512 &sum) {
 }
 
 template <unsigned short residual> // 0..31
-float FP16_L2SqrSIMD32_AVX512BW_VL(const void *pVect1v, const void *pVect2v, size_t dimension) {
+float FP16_L2SqrSIMD32_AVX512(const void *pVect1v, const void *pVect2v, size_t dimension) {
     auto *pVect1 = (float16 *)pVect1v;
     auto *pVect2 = (float16 *)pVect2v;
 
diff --git a/src/VecSim/spaces/L2_space.cpp b/src/VecSim/spaces/L2_space.cpp
index 36eaacfb1..ee61f9e7c 100644
--- a/src/VecSim/spaces/L2_space.cpp
+++ b/src/VecSim/spaces/L2_space.cpp
@@ -10,7 +10,6 @@
 #include "VecSim/types/bfloat16.h"
 #include "VecSim/types/float16.h"
 #include "VecSim/spaces/functions/F16C.h"
-#include "VecSim/spaces/functions/AVX512BW_VL.h"
 #include "VecSim/spaces/functions/AVX512.h"
 #include "VecSim/spaces/functions/AVX.h"
 #include "VecSim/spaces/functions/SSE.h"
@@ -162,11 +161,11 @@ dist_func_t<float> L2_FP16_GetDistFunc(size_t dim, const void *arch_opt, unsigne
     auto features = (arch_opt == nullptr)
                         ? cpu_features::GetX86Info().features
                         : *static_cast<const cpu_features::X86Features *>(arch_opt);
-    if (features.avx512bw && features.avx512vl) {
-#ifdef OPT_AVX512_BW_VL
+    if (features.avx512f) {
+#ifdef OPT_AVX512F
         if (dim % 32 == 0) // no point in aligning if we have an offsetting residual
             *alignment = 32 * sizeof(float16); // handles 32 floats
-        return Choose_FP16_L2_implementation_AVX512BW_VL(dim);
+        return Choose_FP16_L2_implementation_AVX512(dim);
 #endif
     }
     if (features.f16c && features.fma3 && features.avx) {
diff --git a/src/VecSim/spaces/functions/AVX512.cpp b/src/VecSim/spaces/functions/AVX512.cpp
index 774b5f4cd..1b92d50b6 100644
--- a/src/VecSim/spaces/functions/AVX512.cpp
+++ b/src/VecSim/spaces/functions/AVX512.cpp
@@ -12,6 +12,9 @@
 #include "VecSim/spaces/IP/IP_AVX512_FP32.h"
 #include "VecSim/spaces/IP/IP_AVX512_FP64.h"
 
+#include "VecSim/spaces/IP/IP_AVX512_FP16.h"
+#include "VecSim/spaces/L2/L2_AVX512_FP16.h"
+
 namespace spaces {
 
 #include "implementation_chooser.h"
@@ -40,6 +43,18 @@ dist_func_t<double> Choose_FP64_L2_implementation_AVX512(size_t dim) {
     return ret_dist_func;
 }
 
+dist_func_t<float> Choose_FP16_IP_implementation_AVX512(size_t dim) {
+    dist_func_t<float> ret_dist_func;
+    CHOOSE_IMPLEMENTATION(ret_dist_func, dim, 32, FP16_InnerProductSIMD32_AVX512);
+    return ret_dist_func;
+}
+
+dist_func_t<float> Choose_FP16_L2_implementation_AVX512(size_t dim) {
+    dist_func_t<float> ret_dist_func;
+    CHOOSE_IMPLEMENTATION(ret_dist_func, dim, 32, FP16_L2SqrSIMD32_AVX512);
+    return ret_dist_func;
+}
+
 #include "implementation_chooser_cleanup.h"
 
 } // namespace spaces
diff --git a/src/VecSim/spaces/functions/AVX512.h b/src/VecSim/spaces/functions/AVX512.h
index 4ee7c6fe9..b37e8bda2 100644
--- a/src/VecSim/spaces/functions/AVX512.h
+++ b/src/VecSim/spaces/functions/AVX512.h
@@ -16,4 +16,7 @@ dist_func_t<double> Choose_FP64_IP_implementation_AVX512(size_t dim);
 dist_func_t<float> Choose_FP32_L2_implementation_AVX512(size_t dim);
 dist_func_t<double> Choose_FP64_L2_implementation_AVX512(size_t dim);
 
+dist_func_t<float> Choose_FP16_IP_implementation_AVX512(size_t dim);
+dist_func_t<float> Choose_FP16_L2_implementation_AVX512(size_t dim);
+
 } // namespace spaces
diff --git a/src/VecSim/spaces/functions/AVX512BW_VL.cpp b/src/VecSim/spaces/functions/AVX512BW_VL.cpp
deleted file mode 100644
index aed4e8508..000000000
--- a/src/VecSim/spaces/functions/AVX512BW_VL.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- *Copyright Redis Ltd. 2021 - present
- *Licensed under your choice of the Redis Source Available License 2.0 (RSALv2) or
- *the Server Side Public License v1 (SSPLv1).
- */
-
-#include "AVX512BW_VL.h"
-
-#include "VecSim/spaces/IP/IP_AVX512BW_VL_FP16.h"
-#include "VecSim/spaces/L2/L2_AVX512BW_VL_FP16.h"
-
-namespace spaces {
-
-#include "implementation_chooser.h"
-
-dist_func_t<float> Choose_FP16_IP_implementation_AVX512BW_VL(size_t dim) {
-    dist_func_t<float> ret_dist_func;
-    CHOOSE_IMPLEMENTATION(ret_dist_func, dim, 32, FP16_InnerProductSIMD32_AVX512BW_VL);
-    return ret_dist_func;
-}
-
-dist_func_t<float> Choose_FP16_L2_implementation_AVX512BW_VL(size_t dim) {
-    dist_func_t<float> ret_dist_func;
-    CHOOSE_IMPLEMENTATION(ret_dist_func, dim, 32, FP16_L2SqrSIMD32_AVX512BW_VL);
-    return ret_dist_func;
-}
-
-#include "implementation_chooser_cleanup.h"
-
-} // namespace spaces
diff --git a/src/VecSim/spaces/functions/AVX512BW_VL.h b/src/VecSim/spaces/functions/AVX512BW_VL.h
deleted file mode 100644
index da139b699..000000000
--- a/src/VecSim/spaces/functions/AVX512BW_VL.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- *Copyright Redis Ltd. 2021 - present
- *Licensed under your choice of the Redis Source Available License 2.0 (RSALv2) or
- *the Server Side Public License v1 (SSPLv1).
- */
-
-#pragma once
-
-#include "VecSim/spaces/spaces.h"
-
-namespace spaces {
-
-dist_func_t<float> Choose_FP16_IP_implementation_AVX512BW_VL(size_t dim);
-dist_func_t<float> Choose_FP16_L2_implementation_AVX512BW_VL(size_t dim);
-
-} // namespace spaces
diff --git a/tests/benchmark/CMakeLists.txt b/tests/benchmark/CMakeLists.txt
index 5d5c54bed..5eea47afc 100644
--- a/tests/benchmark/CMakeLists.txt
+++ b/tests/benchmark/CMakeLists.txt
@@ -36,7 +36,6 @@ if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "(x86_64)|(AMD64|amd64)|(^i.86$)")
 	# Check that the compiler supports instructions flag.
 	# This will add the relevant flag both the the space selector and the optimization.
 	CHECK_CXX_COMPILER_FLAG(-mavx512bw CXX_AVX512BW)
-	CHECK_CXX_COMPILER_FLAG(-mavx512vl CXX_AVX512VL)
 	CHECK_CXX_COMPILER_FLAG(-mavx512vbmi2 CXX_AVX512VBMI2)
 	CHECK_CXX_COMPILER_FLAG(-mavx512f CXX_AVX512F)
 	CHECK_CXX_COMPILER_FLAG(-mavx2 CXX_AVX2)
@@ -54,10 +53,6 @@ if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "(x86_64)|(AMD64|amd64)|(^i.86$)")
 		add_compile_definitions(OPT_AVX512_BW_VBMI2)
 	endif()
 
-	if(CXX_AVX512BW AND CXX_AVX512VL)
-		add_compile_definitions(OPT_AVX512_BW_VL)
-	endif()
-
 	if(CXX_F16C AND CXX_FMA AND CXX_AVX)
 		add_compile_definitions(OPT_F16C)
 	endif()
diff --git a/tests/benchmark/spaces_benchmarks/bm_spaces.h b/tests/benchmark/spaces_benchmarks/bm_spaces.h
index 80e64c197..c08cc6f9b 100644
--- a/tests/benchmark/spaces_benchmarks/bm_spaces.h
+++ b/tests/benchmark/spaces_benchmarks/bm_spaces.h
@@ -15,7 +15,6 @@
 #include "VecSim/spaces/functions/AVX512.h"
 #include "VecSim/spaces/functions/AVX.h"
 #include "VecSim/spaces/functions/AVX512BW_VBMI2.h"
-#include "VecSim/spaces/functions/AVX512BW_VL.h"
 #include "VecSim/spaces/functions/AVX2.h"
 #include "VecSim/spaces/functions/F16C.h"
 #include "VecSim/spaces/functions/SSE3.h"
diff --git a/tests/benchmark/spaces_benchmarks/bm_spaces_fp16.cpp b/tests/benchmark/spaces_benchmarks/bm_spaces_fp16.cpp
index ebb49cdfb..ffbf2c049 100644
--- a/tests/benchmark/spaces_benchmarks/bm_spaces_fp16.cpp
+++ b/tests/benchmark/spaces_benchmarks/bm_spaces_fp16.cpp
@@ -12,10 +12,10 @@
 cpu_features::X86Features opt = cpu_features::GetX86Info().features;
 
 // AVX512_BW_VL functions
-#ifdef OPT_AVX512_BW_VL
-bool avx512_bw_vl_supported = opt.avx512bw && opt.avx512vl;
-INITIALIZE_BENCHMARKS_SET(FP16, AVX512BW_VL, 32, avx512_bw_vl_supported);
-#endif // OPT_AVX512_BW_VL
+#ifdef OPT_AVX512F
+bool avx512_supported = opt.avx512f;
+INITIALIZE_BENCHMARKS_SET(FP16, AVX512, 32, avx512_supported);
+#endif // OPT_AVX512F
 
 // AVX functions
 #ifdef OPT_F16C
diff --git a/tests/unit/test_spaces.cpp b/tests/unit/test_spaces.cpp
index 25ce4a195..9f30e9833 100644
--- a/tests/unit/test_spaces.cpp
+++ b/tests/unit/test_spaces.cpp
@@ -21,7 +21,6 @@
 #include "VecSim/spaces/functions/AVX512BW_VBMI2.h"
 #include "VecSim/spaces/functions/AVX2.h"
 #include "VecSim/spaces/functions/SSE3.h"
-#include "VecSim/spaces/functions/AVX512BW_VL.h"
 #include "VecSim/spaces/functions/F16C.h"
 
 using bfloat16 = vecsim_types::bfloat16;
@@ -469,12 +468,12 @@ TEST_P(FP16SpacesOptimizationTest, FP16InnerProductTest) {
     float baseline = FP16_InnerProduct(v1, v2, dim);
     ASSERT_EQ(baseline, FP32_InnerProduct(v1_fp32, v2_fp32, dim)) << "Baseline check " << dim;
 
-    if (optimization.avx512bw && optimization.avx512vl) {
+    if (optimization.avx512f) {
         arch_opt_func = IP_FP16_GetDistFunc(dim, &optimization);
-        ASSERT_EQ(arch_opt_func, Choose_FP16_IP_implementation_AVX512BW_VL(dim))
+        ASSERT_EQ(arch_opt_func, Choose_FP16_IP_implementation_AVX512(dim))
             << "Unexpected distance function chosen for dim " << dim;
         ASSERT_EQ(baseline, arch_opt_func(v1, v2, dim)) << "AVX512 with dim " << dim;
-        optimization.avx512bw = optimization.avx512vl = 0;
+        optimization.avx512f = 0;
     }
     if (optimization.f16c && optimization.fma3 && optimization.avx) {
         arch_opt_func = IP_FP16_GetDistFunc(dim, &optimization);
@@ -505,12 +504,12 @@ TEST_P(FP16SpacesOptimizationTest, FP16L2SqrTest) {
     float baseline = FP16_L2Sqr(v1, v2, dim);
     ASSERT_EQ(baseline, FP32_L2Sqr(v1_fp32, v2_fp32, dim)) << "Baseline check " << dim;
 
-    if (optimization.avx512bw && optimization.avx512vl) {
+    if (optimization.avx512f) {
         arch_opt_func = L2_FP16_GetDistFunc(dim, &optimization);
-        ASSERT_EQ(arch_opt_func, Choose_FP16_L2_implementation_AVX512BW_VL(dim))
+        ASSERT_EQ(arch_opt_func, Choose_FP16_L2_implementation_AVX512(dim))
             << "Unexpected distance function chosen for dim " << dim;
         ASSERT_EQ(baseline, arch_opt_func(v1, v2, dim)) << "AVX512 with dim " << dim;
-        optimization.avx512bw = optimization.avx512vl = 0;
+        optimization.avx512f = 0;
     }
     if (optimization.f16c && optimization.fma3 && optimization.avx) {
         arch_opt_func = L2_FP16_GetDistFunc(dim, &optimization);

From ef2c719a2cce0bb0b5ed58c08b78c812966f7cab Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Mon, 6 May 2024 14:53:14 +0000
Subject: [PATCH 02/15] remove xenial from merge queue flow

---
 .github/workflows/event-merge-to-queue.yml | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/.github/workflows/event-merge-to-queue.yml b/.github/workflows/event-merge-to-queue.yml
index 8ea5967e8..37b4b20b1 100644
--- a/.github/workflows/event-merge-to-queue.yml
+++ b/.github/workflows/event-merge-to-queue.yml
@@ -28,12 +28,6 @@ jobs:
     uses: ./.github/workflows/task-unit-test.yml
     with:
       container: ubuntu:bionic
-  xenial:
-    needs: [check-if-docs-only]
-    if: ${{ needs.check-if-docs-only.outputs.only-docs-changed == 'false' }}
-    uses: ./.github/workflows/task-unit-test.yml
-    with:
-      container: ubuntu:xenial
   bullseye:
     needs: [check-if-docs-only]
     if: ${{ needs.check-if-docs-only.outputs.only-docs-changed == 'false' }}

From 26088bf734124ef91ae72fa0b891933a09b6c4fc Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Tue, 7 May 2024 14:07:06 +0000
Subject: [PATCH 03/15] introduce fp16 type!

change bfloat and fp16 to struct typedef to avoid ambiguty between them
---
 .../hnsw/hnsw_single_tests_friends.h          |    2 +
 .../hnsw/hnsw_tiered_tests_friends.h          |    1 +
 .../index_factories/brute_force_factory.cpp   |    6 +
 src/VecSim/index_factories/hnsw_factory.cpp   |    9 +
 src/VecSim/index_factories/tiered_factory.cpp |    6 +
 src/VecSim/spaces/IP/IP_AVX512_FP16.h         |    6 +-
 src/VecSim/spaces/IP/IP_F16C_FP16.h           |    2 +-
 src/VecSim/spaces/L2/L2_F16C_FP16.h           |    4 +-
 src/VecSim/spaces/normalize/normalize_naive.h |   22 +
 src/VecSim/spaces/spaces.cpp                  |   20 +
 src/VecSim/types/bfloat16.h                   |    8 +-
 src/VecSim/types/float16.h                    |   10 +-
 src/VecSim/utils/vec_utils.cpp                |    7 +
 src/VecSim/utils/vec_utils.h                  |    2 +
 src/VecSim/vec_sim.cpp                        |    2 +
 src/VecSim/vec_sim_common.h                   |    1 +
 src/VecSim/vec_sim_debug.cpp                  |    6 +
 tests/unit/CMakeLists.txt                     |    5 +-
 tests/unit/test_bf16.cpp                      |    9 +-
 tests/unit/test_common.cpp                    |   27 +
 tests/unit/test_fp16.cpp                      | 1099 +++++++++++++++++
 tests/unit/test_spaces.cpp                    |   62 +-
 22 files changed, 1300 insertions(+), 16 deletions(-)
 create mode 100644 tests/unit/test_fp16.cpp

diff --git a/src/VecSim/algorithms/hnsw/hnsw_single_tests_friends.h b/src/VecSim/algorithms/hnsw/hnsw_single_tests_friends.h
index 7fc12e6b6..08ab8ed8d 100644
--- a/src/VecSim/algorithms/hnsw/hnsw_single_tests_friends.h
+++ b/src/VecSim/algorithms/hnsw/hnsw_single_tests_friends.h
@@ -15,3 +15,5 @@ INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTest_testSizeEstimation_Test)
 INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTest_swapJobBasic_Test)
 friend class BF16HNSWTest_testSizeEstimation_Test;
 friend class BF16TieredTest_testSizeEstimation_Test;
+friend class FP16HNSWTest_testSizeEstimation_Test;
+friend class FP16TieredTest_testSizeEstimation_Test;
diff --git a/src/VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h b/src/VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h
index a828a96b5..ff3ea2eb7 100644
--- a/src/VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h
+++ b/src/VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h
@@ -50,6 +50,7 @@ INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTestBasic_preferAdHocOptimization_Test)
 INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTestBasic_runGCAPI_Test)
 
 friend class BF16TieredTest;
+friend class FP16TieredTest;
 
 INDEX_TEST_FRIEND_CLASS(BM_VecSimBasics)
 INDEX_TEST_FRIEND_CLASS(BM_VecSimCommon)
diff --git a/src/VecSim/index_factories/brute_force_factory.cpp b/src/VecSim/index_factories/brute_force_factory.cpp
index a8218b594..c189ed45d 100644
--- a/src/VecSim/index_factories/brute_force_factory.cpp
+++ b/src/VecSim/index_factories/brute_force_factory.cpp
@@ -9,8 +9,10 @@
 #include "VecSim/algorithms/brute_force/brute_force_single.h"
 #include "VecSim/algorithms/brute_force/brute_force_multi.h"
 #include "VecSim/types/bfloat16.h"
+#include "VecSim/types/float16.h"
 
 using bfloat16 = vecsim_types::bfloat16;
+using float16 = vecsim_types::float16;
 
 namespace BruteForceFactory {
 template <typename DataType, typename DistType = DataType>
@@ -54,6 +56,8 @@ VecSimIndex *NewIndex(const BFParams *bfparams, const AbstractIndexInitParams &a
         return NewIndex_ChooseMultiOrSingle<double>(bfparams, abstractInitParams);
     } else if (bfparams->type == VecSimType_BFLOAT16) {
         return NewIndex_ChooseMultiOrSingle<bfloat16, float>(bfparams, abstractInitParams);
+    } else if (bfparams->type == VecSimType_FLOAT16) {
+        return NewIndex_ChooseMultiOrSingle<float16, float>(bfparams, abstractInitParams);
     }
 
     // If we got here something is wrong.
@@ -87,6 +91,8 @@ size_t EstimateInitialSize(const BFParams *params) {
         est += EstimateInitialSize_ChooseMultiOrSingle<double>(params->multi);
     } else if (params->type == VecSimType_BFLOAT16) {
         est += EstimateInitialSize_ChooseMultiOrSingle<bfloat16, float>(params->multi);
+    } else if (params->type == VecSimType_FLOAT16) {
+        est += EstimateInitialSize_ChooseMultiOrSingle<float16, float>(params->multi);
     }
     // Parameters related part.
 
diff --git a/src/VecSim/index_factories/hnsw_factory.cpp b/src/VecSim/index_factories/hnsw_factory.cpp
index b1f9ddce5..8fe757dc8 100644
--- a/src/VecSim/index_factories/hnsw_factory.cpp
+++ b/src/VecSim/index_factories/hnsw_factory.cpp
@@ -9,8 +9,10 @@
 #include "VecSim/index_factories/hnsw_factory.h"
 #include "VecSim/algorithms/hnsw/hnsw.h"
 #include "VecSim/types/bfloat16.h"
+#include "VecSim/types/float16.h"
 
 using bfloat16 = vecsim_types::bfloat16;
+using float16 = vecsim_types::float16;
 
 namespace HNSWFactory {
 
@@ -49,6 +51,8 @@ VecSimIndex *NewIndex(const VecSimParams *params) {
         return NewIndex_ChooseMultiOrSingle<double>(hnswParams, abstractInitParams);
     } else if (hnswParams->type == VecSimType_BFLOAT16) {
         return NewIndex_ChooseMultiOrSingle<bfloat16, float>(hnswParams, abstractInitParams);
+    } else if (hnswParams->type == VecSimType_FLOAT16) {
+        return NewIndex_ChooseMultiOrSingle<float16, float>(hnswParams, abstractInitParams);
     }
 
     // If we got here something is wrong.
@@ -82,6 +86,8 @@ size_t EstimateInitialSize(const HNSWParams *params) {
         est += EstimateInitialSize_ChooseMultiOrSingle<double>(params->multi);
     } else if (params->type == VecSimType_BFLOAT16) {
         est += EstimateInitialSize_ChooseMultiOrSingle<bfloat16, float>(params->multi);
+    } else if (params->type == VecSimType_FLOAT16) {
+        est += EstimateInitialSize_ChooseMultiOrSingle<float16, float>(params->multi);
     }
 
     // Account for the visited nodes pool (assume that it holds one pointer to a handler).
@@ -194,6 +200,9 @@ VecSimIndex *NewIndex(const std::string &location) {
     } else if (params.type == VecSimType_BFLOAT16) {
         return NewIndex_ChooseMultiOrSingle<bfloat16, float>(input, &params, abstractInitParams,
                                                              version);
+    } else if (params.type == VecSimType_FLOAT16) {
+        return NewIndex_ChooseMultiOrSingle<float16, float>(input, &params, abstractInitParams,
+                                                            version);
     } else {
         auto bad_name = VecSimType_ToString(params.type);
         if (bad_name == nullptr) {
diff --git a/src/VecSim/index_factories/tiered_factory.cpp b/src/VecSim/index_factories/tiered_factory.cpp
index d11dabb20..1d8c9e692 100644
--- a/src/VecSim/index_factories/tiered_factory.cpp
+++ b/src/VecSim/index_factories/tiered_factory.cpp
@@ -10,8 +10,10 @@
 
 #include "VecSim/algorithms/hnsw/hnsw_tiered.h"
 #include "VecSim/types/bfloat16.h"
+#include "VecSim/types/float16.h"
 
 using bfloat16 = vecsim_types::bfloat16;
+using float16 = vecsim_types::float16;
 
 namespace TieredFactory {
 
@@ -66,6 +68,8 @@ inline size_t EstimateInitialSize(const TieredIndexParams *params, BFParams &bf_
         est += sizeof(TieredHNSWIndex<double, double>);
     } else if (hnsw_params.type == VecSimType_BFLOAT16) {
         est += sizeof(TieredHNSWIndex<bfloat16, float>);
+    } else if (hnsw_params.type == VecSimType_FLOAT16) {
+        est += sizeof(TieredHNSWIndex<float16, float>);
     }
     bf_params_output.type = hnsw_params.type;
     bf_params_output.multi = hnsw_params.multi;
@@ -82,6 +86,8 @@ VecSimIndex *NewIndex(const TieredIndexParams *params) {
         return TieredHNSWFactory::NewIndex<double>(params);
     } else if (type == VecSimType_BFLOAT16) {
         return TieredHNSWFactory::NewIndex<bfloat16, float>(params);
+    } else if (type == VecSimType_FLOAT16) {
+        return TieredHNSWFactory::NewIndex<float16, float>(params);
     }
     return nullptr; // Invalid type.
 }
diff --git a/src/VecSim/spaces/IP/IP_AVX512_FP16.h b/src/VecSim/spaces/IP/IP_AVX512_FP16.h
index c08343783..ae6f7e2dc 100644
--- a/src/VecSim/spaces/IP/IP_AVX512_FP16.h
+++ b/src/VecSim/spaces/IP/IP_AVX512_FP16.h
@@ -24,10 +24,10 @@ static void InnerProductStep(float16 *&pVect1, float16 *&pVect2, __m512 &sum) {
 
 template <unsigned short residual> // 0..31
 float FP16_InnerProductSIMD32_AVX512(const void *pVect1v, const void *pVect2v, size_t dimension) {
-    auto *pVect1 = (uint16_t *)pVect1v;
-    auto *pVect2 = (uint16_t *)pVect2v;
+    auto *pVect1 = (float16 *)pVect1v;
+    auto *pVect2 = (float16 *)pVect2v;
 
-    const uint16_t *pEnd1 = pVect1 + dimension;
+    const float16 *pEnd1 = pVect1 + dimension;
 
     auto sum = _mm512_setzero_ps();
 
diff --git a/src/VecSim/spaces/IP/IP_F16C_FP16.h b/src/VecSim/spaces/IP/IP_F16C_FP16.h
index 7ccb86b3d..0b86b1ea7 100644
--- a/src/VecSim/spaces/IP/IP_F16C_FP16.h
+++ b/src/VecSim/spaces/IP/IP_F16C_FP16.h
@@ -27,7 +27,7 @@ float FP16_InnerProductSIMD32_F16C(const void *pVect1v, const void *pVect2v, siz
     auto *pVect1 = (float16 *)pVect1v;
     auto *pVect2 = (float16 *)pVect2v;
 
-    const uint16_t *pEnd1 = pVect1 + dimension;
+    const float16 *pEnd1 = pVect1 + dimension;
 
     auto sum = _mm256_setzero_ps();
 
diff --git a/src/VecSim/spaces/L2/L2_F16C_FP16.h b/src/VecSim/spaces/L2/L2_F16C_FP16.h
index 93bc0666a..e53d0a22d 100644
--- a/src/VecSim/spaces/L2/L2_F16C_FP16.h
+++ b/src/VecSim/spaces/L2/L2_F16C_FP16.h
@@ -11,7 +11,7 @@
 
 using float16 = vecsim_types::float16;
 
-static void L2SqrStep(uint16_t *&pVect1, uint16_t *&pVect2, __m256 &sum) {
+static void L2SqrStep(float16 *&pVect1, float16 *&pVect2, __m256 &sum) {
     // Convert 8 half-floats into floats and store them in 256 bits register.
     auto v1 = _mm256_cvtph_ps(_mm_loadu_si128((__m128i_u const *)(pVect1)));
     auto v2 = _mm256_cvtph_ps(_mm_loadu_si128((__m128i_u const *)(pVect2)));
@@ -28,7 +28,7 @@ float FP16_L2SqrSIMD32_F16C(const void *pVect1v, const void *pVect2v, size_t dim
     auto *pVect1 = (float16 *)pVect1v;
     auto *pVect2 = (float16 *)pVect2v;
 
-    const uint16_t *pEnd1 = pVect1 + dimension;
+    const float16 *pEnd1 = pVect1 + dimension;
 
     auto sum = _mm256_setzero_ps();
 
diff --git a/src/VecSim/spaces/normalize/normalize_naive.h b/src/VecSim/spaces/normalize/normalize_naive.h
index d4a893833..239db9cf3 100644
--- a/src/VecSim/spaces/normalize/normalize_naive.h
+++ b/src/VecSim/spaces/normalize/normalize_naive.h
@@ -7,9 +7,11 @@
 #pragma once
 
 #include "VecSim/types/bfloat16.h"
+#include "VecSim/types/float16.h"
 #include <cmath>
 
 using bfloat16 = vecsim_types::bfloat16;
+using float16 = vecsim_types::float16;
 
 namespace spaces {
 
@@ -50,4 +52,24 @@ static inline void bfloat16_normalizeVector(void *vec, const size_t dim) {
     }
 }
 
+static inline void float16_normalizeVector(void *vec, const size_t dim) {
+    float16 *input_vector = (float16 *)vec;
+
+    float f32_tmp[dim];
+
+    float sum = 0;
+
+    for (size_t i = 0; i < dim; i++) {
+        float val = vecsim_types::FP16_to_FP32(input_vector[i]);
+        f32_tmp[i] = val;
+        sum += val * val;
+    }
+
+    float norm = sqrt(sum);
+
+    for (size_t i = 0; i < dim; i++) {
+        input_vector[i] = vecsim_types::FP32_to_FP16(f32_tmp[i] / norm);
+    }
+}
+
 } // namespace spaces
diff --git a/src/VecSim/spaces/spaces.cpp b/src/VecSim/spaces/spaces.cpp
index 0419b7720..9e9e7275c 100644
--- a/src/VecSim/spaces/spaces.cpp
+++ b/src/VecSim/spaces/spaces.cpp
@@ -5,6 +5,7 @@
  */
 
 #include "VecSim/types/bfloat16.h"
+#include "VecSim/types/float16.h"
 #include "VecSim/spaces/space_includes.h"
 #include "VecSim/spaces/spaces.h"
 #include "VecSim/spaces/IP_space.h"
@@ -30,6 +31,19 @@ dist_func_t<float> GetDistFunc<vecsim_types::bfloat16, float>(VecSimMetric metri
     throw std::invalid_argument("Invalid metric");
 }
 
+template <>
+dist_func_t<float> GetDistFunc<vecsim_types::float16, float>(VecSimMetric metric, size_t dim,
+                                                             unsigned char *alignment) {
+    switch (metric) {
+    case VecSimMetric_Cosine:
+    case VecSimMetric_IP:
+        return IP_FP16_GetDistFunc(dim, nullptr, alignment);
+    case VecSimMetric_L2:
+        return L2_FP16_GetDistFunc(dim, nullptr, alignment);
+    }
+    throw std::invalid_argument("Invalid metric");
+}
+
 template <>
 dist_func_t<float> GetDistFunc<float, float>(VecSimMetric metric, size_t dim,
                                              unsigned char *alignment) {
@@ -74,4 +88,10 @@ normalizeVector_f<vecsim_types::bfloat16> GetNormalizeFunc<vecsim_types::bfloat1
         return bfloat16_normalizeVector<false>;
     }
 }
+
+template <>
+normalizeVector_f<vecsim_types::float16> GetNormalizeFunc<vecsim_types::float16>(void) {
+    return float16_normalizeVector;
+}
+
 } // namespace spaces
diff --git a/src/VecSim/types/bfloat16.h b/src/VecSim/types/bfloat16.h
index cb27a6468..8bda8e885 100644
--- a/src/VecSim/types/bfloat16.h
+++ b/src/VecSim/types/bfloat16.h
@@ -11,8 +11,12 @@
 #include <cmath>
 
 namespace vecsim_types {
-
-using bfloat16 = unsigned short;
+struct bfloat16 {
+    uint16_t val;
+    constexpr bfloat16(uint16_t val = 0) : val(val) {}
+    operator uint16_t() const { return val; }
+};
+using bfloat16 = struct bfloat16;
 
 static inline bfloat16 float_to_bf16(const float ff) {
     uint32_t *p_f32 = (uint32_t *)&ff;
diff --git a/src/VecSim/types/float16.h b/src/VecSim/types/float16.h
index 29487c8b9..8e22e792a 100644
--- a/src/VecSim/types/float16.h
+++ b/src/VecSim/types/float16.h
@@ -10,8 +10,12 @@
 #include <cstring>
 #include <algorithm>
 namespace vecsim_types {
-
-using float16 = uint16_t;
+struct float16 {
+    uint16_t val;
+    constexpr float16(uint16_t val = 0) : val(val) {}
+    operator uint16_t() const { return val; }
+};
+using float16 = struct float16;
 
 inline float _interpret_as_float(uint32_t num) {
     void *num_ptr = &num;
@@ -42,7 +46,7 @@ static inline float FP16_to_FP32(float16 input) {
     return _interpret_as_float(((exp == shifted_exp) ? infnan_val : reg_val) | sign_bit);
 }
 
-static inline float16 FP32_to_FP16(float input) {
+static inline struct float16 FP32_to_FP16(float input) {
     // via Fabian "ryg" Giesen.
     // https://gist.github.com/2156668
     uint32_t sign_mask = 0x80000000u;
diff --git a/src/VecSim/utils/vec_utils.cpp b/src/VecSim/utils/vec_utils.cpp
index c079ce96e..99160c247 100644
--- a/src/VecSim/utils/vec_utils.cpp
+++ b/src/VecSim/utils/vec_utils.cpp
@@ -7,6 +7,7 @@
 #include "vec_utils.h"
 #include "VecSim/query_result_definitions.h"
 #include "VecSim/types/bfloat16.h"
+#include "VecSim/types/float16.h"
 #include <cmath>
 #include <cerrno>
 #include <climits>
@@ -14,6 +15,7 @@
 #include <algorithm>
 
 using bfloat16 = vecsim_types::bfloat16;
+using float16 = vecsim_types::float16;
 
 const char *VecSimCommonStrings::ALGORITHM_STRING = "ALGORITHM";
 const char *VecSimCommonStrings::FLAT_STRING = "FLAT";
@@ -24,6 +26,7 @@ const char *VecSimCommonStrings::TYPE_STRING = "TYPE";
 const char *VecSimCommonStrings::FLOAT32_STRING = "FLOAT32";
 const char *VecSimCommonStrings::FLOAT64_STRING = "FLOAT64";
 const char *VecSimCommonStrings::BFLOAT16_STRING = "BFLOAT16";
+const char *VecSimCommonStrings::FLOAT16_STRING = "FLOAT16";
 const char *VecSimCommonStrings::INT32_STRING = "INT32";
 const char *VecSimCommonStrings::INT64_STRING = "INT64";
 
@@ -142,6 +145,8 @@ const char *VecSimType_ToString(VecSimType vecsimType) {
         return VecSimCommonStrings::FLOAT64_STRING;
     case VecSimType_BFLOAT16:
         return VecSimCommonStrings::BFLOAT16_STRING;
+    case VecSimType_FLOAT16:
+        return VecSimCommonStrings::FLOAT16_STRING;
     case VecSimType_INT32:
         return VecSimCommonStrings::INT32_STRING;
     case VecSimType_INT64:
@@ -188,6 +193,8 @@ size_t VecSimType_sizeof(VecSimType type) {
         return sizeof(double);
     case VecSimType_BFLOAT16:
         return sizeof(bfloat16);
+    case VecSimType_FLOAT16:
+        return sizeof(float16);
     case VecSimType_INT32:
         return sizeof(int32_t);
     case VecSimType_INT64:
diff --git a/src/VecSim/utils/vec_utils.h b/src/VecSim/utils/vec_utils.h
index 3e0ac946c..2b92fa49e 100644
--- a/src/VecSim/utils/vec_utils.h
+++ b/src/VecSim/utils/vec_utils.h
@@ -9,6 +9,7 @@
 #include <stdlib.h>
 #include "VecSim/vec_sim_common.h"
 #include "VecSim/types/bfloat16.h"
+#include "VecSim/types/float16.h"
 #include <VecSim/query_results.h>
 #include <utility>
 #include <cassert>
@@ -24,6 +25,7 @@ struct VecSimCommonStrings {
     static const char *FLOAT32_STRING;
     static const char *FLOAT64_STRING;
     static const char *BFLOAT16_STRING;
+    static const char *FLOAT16_STRING;
     static const char *INT32_STRING;
     static const char *INT64_STRING;
 
diff --git a/src/VecSim/vec_sim.cpp b/src/VecSim/vec_sim.cpp
index d54f70810..35e111996 100644
--- a/src/VecSim/vec_sim.cpp
+++ b/src/VecSim/vec_sim.cpp
@@ -136,6 +136,8 @@ extern "C" void VecSim_Normalize(void *blob, size_t dim, VecSimType type) {
         spaces::GetNormalizeFunc<double>()(blob, dim);
     } else if (type == VecSimType_BFLOAT16) {
         spaces::GetNormalizeFunc<vecsim_types::bfloat16>()(blob, dim);
+    } else if (type == VecSimType_FLOAT16) {
+        spaces::GetNormalizeFunc<vecsim_types::float16>()(blob, dim);
     }
 }
 
diff --git a/src/VecSim/vec_sim_common.h b/src/VecSim/vec_sim_common.h
index 2f93e6161..f4a873aa3 100644
--- a/src/VecSim/vec_sim_common.h
+++ b/src/VecSim/vec_sim_common.h
@@ -34,6 +34,7 @@ typedef enum {
     VecSimType_FLOAT32,
     VecSimType_FLOAT64,
     VecSimType_BFLOAT16,
+    VecSimType_FLOAT16,
     VecSimType_INT32,
     VecSimType_INT64
 } VecSimType;
diff --git a/src/VecSim/vec_sim_debug.cpp b/src/VecSim/vec_sim_debug.cpp
index 3de2569be..98cc05c91 100644
--- a/src/VecSim/vec_sim_debug.cpp
+++ b/src/VecSim/vec_sim_debug.cpp
@@ -29,6 +29,9 @@ extern "C" int VecSimDebug_GetElementNeighborsInHNSWGraph(VecSimIndex *index, si
         } else if (info.type == VecSimType_BFLOAT16) {
             return dynamic_cast<HNSWIndex<vecsim_types::bfloat16, float> *>(index)
                 ->getHNSWElementNeighbors(label, neighborsData);
+        } else if (info.type == VecSimType_FLOAT16) {
+            return dynamic_cast<HNSWIndex<vecsim_types::float16, float> *>(index)
+                ->getHNSWElementNeighbors(label, neighborsData);
         } else {
             assert(false && "Invalid data type");
         }
@@ -42,6 +45,9 @@ extern "C" int VecSimDebug_GetElementNeighborsInHNSWGraph(VecSimIndex *index, si
         } else if (info.type == VecSimType_BFLOAT16) {
             return dynamic_cast<TieredHNSWIndex<vecsim_types::bfloat16, float> *>(index)
                 ->getHNSWElementNeighbors(label, neighborsData);
+        } else if (info.type == VecSimType_FLOAT16) {
+            return dynamic_cast<TieredHNSWIndex<vecsim_types::float16, float> *>(index)
+                ->getHNSWElementNeighbors(label, neighborsData);
         } else {
             assert(false && "Invalid data type");
         }
diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt
index ff84306b0..e90e023b2 100644
--- a/tests/unit/CMakeLists.txt
+++ b/tests/unit/CMakeLists.txt
@@ -35,6 +35,7 @@ add_executable(test_allocator test_allocator.cpp test_utils.cpp)
 add_executable(test_spaces test_spaces.cpp)
 add_executable(test_common ../utils/mock_thread_pool.cpp test_utils.cpp test_common.cpp)
 add_executable(test_bf16 ../utils/mock_thread_pool.cpp test_bf16.cpp test_utils.cpp)
+add_executable(test_fp16 ../utils/mock_thread_pool.cpp test_fp16.cpp test_utils.cpp)
 
 target_link_libraries(test_hnsw PUBLIC gtest_main VectorSimilarity)
 target_link_libraries(test_hnsw_parallel PUBLIC gtest_main VectorSimilarity)
@@ -43,6 +44,7 @@ target_link_libraries(test_allocator PUBLIC gtest_main VectorSimilarity)
 target_link_libraries(test_spaces PUBLIC gtest_main VectorSimilarity)
 target_link_libraries(test_common PUBLIC gtest_main VectorSimilarity)
 target_link_libraries(test_bf16 PUBLIC gtest_main VectorSimilarity)
+target_link_libraries(test_fp16 PUBLIC gtest_main VectorSimilarity)
 
 include(GoogleTest)
 
@@ -52,4 +54,5 @@ gtest_discover_tests(test_bruteforce)
 gtest_discover_tests(test_allocator)
 gtest_discover_tests(test_spaces)
 gtest_discover_tests(test_common)
-gtest_discover_tests(test_bf16)
+gtest_discover_tests(test_bf16 TEST_PREFIX BF16UNIT_)
+gtest_discover_tests(test_fp16 TEST_PREFIX FP16UNIT_)
diff --git a/tests/unit/test_bf16.cpp b/tests/unit/test_bf16.cpp
index bc5b44ab6..e316a5d65 100644
--- a/tests/unit/test_bf16.cpp
+++ b/tests/unit/test_bf16.cpp
@@ -78,14 +78,14 @@ class BF16Test : public ::testing::Test {
     template <typename params_t>
     void test_override(params_t index_params);
     template <typename params_t>
+    void test_get_distance(params_t params, VecSimMetric metric);
+    template <typename params_t>
     void test_batch_iterator_basic(params_t index_params);
     template <typename params_t>
     VecSimIndexInfo test_info(params_t index_params);
     template <typename params_t>
     void test_info_iterator(VecSimMetric metric);
     template <typename params_t>
-    void test_get_distance(params_t params, VecSimMetric metric);
-    template <typename params_t>
     void get_element_neighbors(params_t params);
 
     VecSimIndex *index;
@@ -113,8 +113,8 @@ class BF16HNSWTest : public BF16Test {
         return CastIndex<HNSWIndex<bfloat16, float>>(index);
     }
 
-    void test_serialization(bool is_multi);
     void test_info(bool is_multi);
+    void test_serialization(bool is_multi);
 };
 
 class BF16BruteForceTest : public BF16Test {
@@ -749,6 +749,7 @@ void BF16Test::test_batch_iterator_basic(params_t params) {
     ASSERT_EQ(iteration_num, n / n_res);
     VecSimBatchIterator_Free(batchIterator);
 }
+
 TEST_F(BF16HNSWTest, BatchIteratorBasic) {
     size_t n = 250;
     HNSWParams params = {
@@ -770,6 +771,7 @@ TEST_F(BF16TieredTest, BatchIteratorBasic) {
 }
 
 /* ---------------------------- Info tests ---------------------------- */
+
 template <typename params_t>
 VecSimIndexInfo BF16Test::test_info(params_t params) {
     VecSimIndexInfo info = VecSimIndex_Info(index);
@@ -1045,6 +1047,7 @@ void BF16HNSWTest::test_serialization(bool is_multi) {
 
     // Clean up.
     remove(file_name.c_str());
+    VecSimIndex_Free(serialized_index);
 }
 
 TEST_F(BF16HNSWTest, SerializationCurrentVersion) { test_serialization(false); }
diff --git a/tests/unit/test_common.cpp b/tests/unit/test_common.cpp
index db3262cdb..4546da7f4 100644
--- a/tests/unit/test_common.cpp
+++ b/tests/unit/test_common.cpp
@@ -18,6 +18,7 @@
 #include "VecSim/index_factories/tiered_factory.h"
 #include "VecSim/spaces/spaces.h"
 #include "VecSim/types/bfloat16.h"
+#include "VecSim/types/float16.h"
 
 #include <cstdlib>
 #include <limits>
@@ -26,6 +27,7 @@
 #include <cstdarg>
 
 using bfloat16 = vecsim_types::bfloat16;
+using float16 = vecsim_types::float16;
 
 template <typename index_type_t>
 class CommonIndexTest : public ::testing::Test {};
@@ -573,3 +575,28 @@ TEST(CommonAPITest, NormalizeBfloat16) {
 
     ASSERT_NEAR(1.0, norm, 0.001);
 }
+
+TEST(CommonAPITest, NormalizeFloat16) {
+    size_t dim = 20;
+    float16 v[dim];
+
+    std::mt19937 gen(42);
+    std::uniform_real_distribution<> dis(-5.0, -5.0);
+
+    for (size_t i = 0; i < dim; i++) {
+        float random_number = dis(gen);
+        v[i] = vecsim_types::FP32_to_FP16(random_number);
+    }
+
+    VecSim_Normalize(v, dim, VecSimType_FLOAT16);
+
+    // Check that the normalized vector norm is 1.
+    float norm = 0;
+    for (size_t i = 0; i < dim; ++i) {
+        // Convert assuming little endian system.
+        float val = vecsim_types::FP16_to_FP32(v[i]);
+        norm += val * val;
+    }
+
+    ASSERT_NEAR(1.0, norm, 0.001);
+}
diff --git a/tests/unit/test_fp16.cpp b/tests/unit/test_fp16.cpp
new file mode 100644
index 000000000..e730c51a2
--- /dev/null
+++ b/tests/unit/test_fp16.cpp
@@ -0,0 +1,1099 @@
+#include "gtest/gtest.h"
+#include "VecSim/vec_sim.h"
+#include "VecSim/algorithms/hnsw/hnsw_single.h"
+#include "VecSim/index_factories/hnsw_factory.h"
+#include "test_utils.h"
+#include "VecSim/utils/serializer.h"
+#include "mock_thread_pool.h"
+#include "VecSim/query_result_definitions.h"
+#include "VecSim/types/float16.h"
+#include "VecSim/vec_sim_debug.h"
+#include "VecSim/spaces/L2/L2.h"
+
+#include <random>
+
+using float16 = vecsim_types::float16;
+
+class FP16Test : public ::testing::Test {
+protected:
+    virtual void SetUp(HNSWParams &params) {
+        FAIL() << "F16Test::SetUp(HNSWParams) this method should be overriden";
+    }
+
+    virtual void SetUp(BFParams &params) {
+        FAIL() << "F16Test::SetUp(BFParams) this method should be overriden";
+    }
+
+    virtual void SetUp(TieredIndexParams &tiered_params) {
+        FAIL() << "F16Test::SetUp(TieredIndexParams) this method should be overriden";
+    }
+
+    virtual void TearDown() { VecSimIndex_Free(index); }
+
+    virtual const void *GetDataByInternalId(idType id) = 0;
+
+    template <typename algo_t>
+    algo_t *CastIndex() {
+        return dynamic_cast<algo_t *>(index);
+    }
+
+    template <typename algo_t>
+    algo_t *CastIndex(VecSimIndex *vecsim_index) {
+        return dynamic_cast<algo_t *>(vecsim_index);
+    }
+
+    virtual HNSWIndex<float16, float> *CastToHNSW() {
+        return CastIndex<HNSWIndex<float16, float>>();
+    }
+
+    void GenerateVector(float16 *out_vec, float initial_value = 0.25f, float step = 0.0f) {
+        for (size_t i = 0; i < dim; i++) {
+            out_vec[i] = vecsim_types::FP32_to_FP16(initial_value + step * i);
+        }
+    }
+
+    int GenerateAndAddVector(size_t id, float initial_value = 0.25f, float step = 0.0f) {
+        float16 v[dim];
+        GenerateVector(v, initial_value, step);
+        return VecSimIndex_AddVector(index, v, id);
+    }
+
+    int GenerateAndAddVector(VecSimIndex *target_index, size_t id, float initial_value = 0.25f,
+                             float step = 0.0f) {
+        float16 v[dim];
+        GenerateVector(v, initial_value, step);
+        return VecSimIndex_AddVector(target_index, v, id);
+    }
+
+    template <typename params_t>
+    void create_index_test(params_t index_params);
+    template <typename params_t>
+    void search_by_id_test(params_t index_params);
+    template <typename params_t>
+    void search_by_score_test(params_t index_params);
+    template <typename params_t>
+    void search_empty_index_test(params_t index_params);
+    template <typename params_t>
+    void test_range_query(params_t index_params);
+    template <typename params_t>
+    void test_override(params_t index_params);
+    template <typename params_t>
+    void test_get_distance(params_t params, VecSimMetric metric);
+    template <typename params_t>
+    void test_batch_iterator_basic(params_t index_params);
+    template <typename params_t>
+    VecSimIndexInfo test_info(params_t index_params);
+    template <typename params_t>
+    void test_info_iterator(VecSimMetric metric);
+    template <typename params_t>
+    void get_element_neighbors(params_t params);
+
+    VecSimIndex *index;
+    size_t dim;
+};
+
+class FP16HNSWTest : public FP16Test {
+protected:
+    virtual void SetUp(HNSWParams &params) {
+        params.type = VecSimType_FLOAT16;
+        VecSimParams vecsim_params = CreateParams(params);
+        index = VecSimIndex_New(&vecsim_params);
+        dim = params.dim;
+    }
+
+    virtual const void *GetDataByInternalId(idType id) {
+        return CastIndex<HNSWIndex_Single<float16, float>>()->getDataByInternalId(id);
+    }
+
+    HNSWIndex<float16, float> *CastToHNSW(VecSimIndex *new_index) {
+        return CastIndex<HNSWIndex<float16, float>>(new_index);
+    }
+
+    virtual HNSWIndex<float16, float> *CastToHNSW() {
+        return CastIndex<HNSWIndex<float16, float>>(index);
+    }
+
+    void test_info(bool is_multi);
+    void test_serialization(bool is_multi);
+};
+
+class FP16BruteForceTest : public FP16Test {
+protected:
+    virtual void SetUp(BFParams &params) {
+        params.type = VecSimType_FLOAT16;
+        VecSimParams vecsim_params = CreateParams(params);
+        index = VecSimIndex_New(&vecsim_params);
+        dim = params.dim;
+    }
+
+    virtual const void *GetDataByInternalId(idType id) {
+        return CastIndex<BruteForceIndex_Single<float16, float>>()->getDataByInternalId(id);
+    }
+
+    virtual HNSWIndex<float16, float> *CastToHNSW() {
+        ADD_FAILURE() << "FP16BruteForceTest::CastToHNSW() this method should not be called";
+        return nullptr;
+    }
+
+    void test_info(bool is_multi);
+};
+
+class FP16TieredTest : public FP16Test {
+protected:
+    TieredIndexParams generate_tiered_params(HNSWParams &hnsw_params, size_t swap_job_threshold = 0,
+                                             size_t flat_buffer_limit = SIZE_MAX) {
+        hnsw_params.type = VecSimType_FLOAT16;
+        vecsim_hnsw_params = CreateParams(hnsw_params);
+        TieredIndexParams tiered_params = {
+            .jobQueue = &mock_thread_pool.jobQ,
+            .jobQueueCtx = mock_thread_pool.ctx,
+            .submitCb = tieredIndexMock::submit_callback,
+            .flatBufferLimit = flat_buffer_limit,
+            .primaryIndexParams = &vecsim_hnsw_params,
+            .specificParams = {TieredHNSWParams{.swapJobThreshold = swap_job_threshold}}};
+        return tiered_params;
+    }
+
+    virtual void SetUp(TieredIndexParams &tiered_params) override {
+        VecSimParams params = CreateParams(tiered_params);
+        index = VecSimIndex_New(&params);
+        dim = tiered_params.primaryIndexParams->algoParams.hnswParams.dim;
+
+        // Set the created tiered index in the index external context.
+        mock_thread_pool.ctx->index_strong_ref.reset(index);
+    }
+
+    virtual void SetUp(HNSWParams &hnsw_params) override {
+        TieredIndexParams tiered_params = generate_tiered_params(hnsw_params);
+        SetUp(tiered_params);
+    }
+
+    virtual void TearDown() override {}
+
+    virtual const void *GetDataByInternalId(idType id) {
+        return CastIndex<BruteForceIndex<float16, float>>(CastToBruteForce())
+            ->getDataByInternalId(id);
+    }
+
+    virtual HNSWIndex<float16, float> *CastToHNSW() {
+        auto tiered_index = dynamic_cast<TieredHNSWIndex<float16, float> *>(index);
+        return tiered_index->getHNSWIndex();
+    }
+
+    VecSimIndexAbstract<float16, float> *CastToBruteForce() {
+        auto tiered_index = dynamic_cast<TieredHNSWIndex<float16, float> *>(index);
+        return tiered_index->getFlatBufferIndex();
+    }
+
+    void test_info_iterator(VecSimMetric metric);
+    void test_info(bool is_multi);
+
+    VecSimParams vecsim_hnsw_params;
+    tieredIndexMock mock_thread_pool;
+};
+/* ---------------------------- Create index tests ---------------------------- */
+
+template <typename params_t>
+void FP16Test::create_index_test(params_t index_params) {
+    SetUp(index_params);
+    float initial_value = 0.5f;
+    float step = 1.0f;
+
+    ASSERT_EQ(VecSimIndex_IndexSize(index), 0);
+
+    float16 vector[dim];
+    this->GenerateVector(vector, initial_value, step);
+    VecSimIndex_AddVector(index, vector, 0);
+
+    ASSERT_EQ(VecSimIndex_IndexSize(index), 1);
+    ASSERT_EQ(index->getDistanceFrom_Unsafe(0, vector), 0);
+
+    const void *v = this->GetDataByInternalId(0);
+
+    for (size_t i = 0; i < dim; i++) {
+        // Convert assuming little endian system.
+        ASSERT_EQ(vecsim_types::FP16_to_FP32(((float16 *)v)[i]), initial_value + step * float(i));
+    }
+}
+
+TEST_F(FP16HNSWTest, createIndex) {
+    HNSWParams params = {.dim = 40, .initialCapacity = 200, .M = 16, .efConstruction = 200};
+    create_index_test(params);
+    ASSERT_EQ(index->basicInfo().type, VecSimType_FLOAT16);
+    ASSERT_EQ(index->basicInfo().algo, VecSimAlgo_HNSWLIB);
+}
+
+TEST_F(FP16BruteForceTest, createIndex) {
+    BFParams params = {.dim = 40, .initialCapacity = 200};
+    create_index_test(params);
+    ASSERT_EQ(index->basicInfo().type, VecSimType_FLOAT16);
+    ASSERT_EQ(index->basicInfo().algo, VecSimAlgo_BF);
+}
+
+TEST_F(FP16TieredTest, createIndex) {
+    HNSWParams params = {.dim = 40, .initialCapacity = 200, .M = 16, .efConstruction = 200};
+    create_index_test(params);
+    ASSERT_EQ(index->basicInfo().type, VecSimType_FLOAT16);
+    ASSERT_EQ(index->basicInfo().isTiered, true);
+}
+/* ---------------------------- Size Estimation tests ---------------------------- */
+
+TEST_F(FP16HNSWTest, testSizeEstimation) {
+    size_t n = 200;
+    size_t bs = 256;
+    size_t M = 64;
+
+    // Initial capacity is rounded up to the block size.
+    size_t extra_cap = n % bs == 0 ? 0 : bs - n % bs;
+
+    HNSWParams params = {.dim = 256, .initialCapacity = n, .blockSize = bs, .M = M};
+    SetUp(params);
+
+    // EstimateInitialSize is called after CreateNewIndex because params struct is
+    // changed in CreateNewIndex.
+    size_t estimation = EstimateInitialSize(params);
+
+    size_t actual = index->getAllocationSize();
+    // labels_lookup hash table has additional memory, since STL implementation chooses "an
+    // appropriate prime number" higher than n as the number of allocated buckets (for n=1000, 1031
+    // buckets are created)
+    estimation += (this->CastIndex<HNSWIndex_Single<float16, float>>()->labelLookup.bucket_count() -
+                   (n + extra_cap)) *
+                  sizeof(size_t);
+
+    ASSERT_EQ(estimation, actual);
+
+    // Fill the initial capacity + fill the last block.
+    for (size_t i = 0; i < n; i++) {
+        ASSERT_EQ(this->GenerateAndAddVector(i), 1);
+    }
+    idType cur = n;
+    while (index->indexSize() % bs != 0) {
+        this->GenerateAndAddVector(cur++);
+    }
+
+    // Estimate the memory delta of adding a single vector that requires a full new block.
+    estimation = EstimateElementSize(params) * bs;
+    size_t before = index->getAllocationSize();
+    this->GenerateAndAddVector(bs);
+    actual = index->getAllocationSize() - before;
+
+    // We check that the actual size is within 1% of the estimation.
+    ASSERT_GE(estimation, actual * 0.99);
+    ASSERT_LE(estimation, actual * 1.01);
+}
+
+TEST_F(FP16HNSWTest, testSizeEstimation_No_InitialCapacity) {
+    size_t dim = 128;
+    size_t n = 0;
+    size_t bs = DEFAULT_BLOCK_SIZE;
+
+    HNSWParams params = {.dim = dim, .initialCapacity = n, .blockSize = bs};
+    SetUp(params);
+
+    // EstimateInitialSize is called after CreateNewIndex because params struct is
+    // changed in CreateNewIndex.
+    size_t estimation = EstimateInitialSize(params);
+
+    size_t actual = index->getAllocationSize();
+
+    // labels_lookup and element_levels containers are not allocated at all in some platforms,
+    // when initial capacity is zero, while in other platforms labels_lookup is allocated with a
+    // single bucket. This, we get the following range in which we expect the initial memory to be
+    // in.
+    ASSERT_GE(actual, estimation);
+    ASSERT_LE(actual, estimation + sizeof(size_t) + 2 * sizeof(size_t));
+}
+
+TEST_F(FP16BruteForceTest, testSizeEstimation) {
+    size_t dim = 128;
+    size_t n = 0;
+    size_t bs = DEFAULT_BLOCK_SIZE;
+
+    BFParams params = {.dim = dim, .initialCapacity = n, .blockSize = bs};
+    SetUp(params);
+
+    // EstimateInitialSize is called after CreateNewIndex because params struct is
+    // changed in CreateNewIndex.
+    size_t estimation = EstimateInitialSize(params);
+
+    size_t actual = index->getAllocationSize();
+    ASSERT_EQ(estimation, actual);
+
+    estimation = EstimateElementSize(params) * bs;
+
+    ASSERT_EQ(this->GenerateAndAddVector(0), 1);
+
+    actual = index->getAllocationSize() - actual; // get the delta
+    ASSERT_GE(estimation * 1.01, actual);
+    ASSERT_LE(estimation * 0.99, actual);
+}
+
+TEST_F(FP16BruteForceTest, testSizeEstimation_No_InitialCapacity) {
+    size_t dim = 128;
+    size_t n = 100;
+    size_t bs = DEFAULT_BLOCK_SIZE;
+
+    BFParams params = {
+        .dim = dim, .metric = VecSimMetric_Cosine, .initialCapacity = n, .blockSize = bs};
+    SetUp(params);
+
+    // EstimateInitialSize is called after CreateNewIndex because params struct is
+    // changed in CreateNewIndex.
+    size_t estimation = EstimateInitialSize(params);
+
+    size_t actual = index->getAllocationSize();
+    ASSERT_EQ(estimation, actual);
+}
+
+TEST_F(FP16TieredTest, testSizeEstimation) {
+    size_t n = DEFAULT_BLOCK_SIZE;
+    size_t M = 32;
+    size_t bs = DEFAULT_BLOCK_SIZE;
+
+    HNSWParams hnsw_params = {.dim = 128, .initialCapacity = n, .M = M};
+    SetUp(hnsw_params);
+    TieredIndexParams tiered_params = generate_tiered_params(hnsw_params);
+    VecSimParams params = CreateParams(tiered_params);
+
+    // auto allocator = index->getAllocator();
+    size_t initial_size_estimation = VecSimIndex_EstimateInitialSize(&params);
+
+    // labels_lookup hash table has additional memory, since STL implementation chooses "an
+    // appropriate prime number" higher than n as the number of allocated buckets (for n=1000, 1031
+    // buckets are created)
+    auto hnsw_index = CastToHNSW();
+    auto hnsw = CastIndex<HNSWIndex_Single<float16, float>>(hnsw_index);
+    initial_size_estimation += (hnsw->labelLookup.bucket_count() - n) * sizeof(size_t);
+
+    ASSERT_EQ(initial_size_estimation, index->getAllocationSize());
+
+    // Add vectors up to initial capacity (initial capacity == block size).
+    for (size_t i = 0; i < n; i++) {
+        GenerateAndAddVector(i, i);
+        mock_thread_pool.thread_iteration();
+    }
+
+    // Estimate memory delta for filling up the first block and adding another block.
+    size_t estimation = VecSimIndex_EstimateElementSize(&params) * bs;
+
+    size_t before = index->getAllocationSize();
+    GenerateAndAddVector(bs + n, bs + n);
+    mock_thread_pool.thread_iteration();
+    size_t actual = index->getAllocationSize() - before;
+
+    // Flat index should be empty, hence the index size includes only hnsw size.
+    ASSERT_EQ(index->indexSize(), hnsw_index->indexSize());
+    ASSERT_EQ(index->indexCapacity(), hnsw_index->indexCapacity());
+    // We added n + 1 vectors
+    ASSERT_EQ(index->indexSize(), n + 1);
+    // We should have 2 blocks now
+    ASSERT_EQ(index->indexCapacity(), 2 * bs);
+
+    // We check that the actual size is within 1% of the estimation.
+    ASSERT_GE(estimation, actual * 0.99);
+    ASSERT_LE(estimation, actual * 1.01);
+}
+
+/* ---------------------------- Functionality tests ---------------------------- */
+
+template <typename params_t>
+void FP16Test::search_by_id_test(params_t index_params) {
+    SetUp(index_params);
+
+    size_t k = 11;
+    size_t n = 100;
+
+    for (size_t i = 0; i < n; i++) {
+        this->GenerateAndAddVector(i, i); // {i, i, i, i}
+    }
+    ASSERT_EQ(VecSimIndex_IndexSize(index), n);
+
+    float16 query[dim];
+    GenerateVector(query, 50); // {50, 50, 50, 50}
+
+    // Vectors values are equal to the id, so the 11 closest vectors are 45, 46...50
+    // (closest), 51...55
+    static size_t expected_res_order[] = {45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55};
+    auto verify_res = [&](size_t id, double score, size_t index) {
+        ASSERT_EQ(id, expected_res_order[index]);    // results are sorted by ID
+        ASSERT_EQ(score, 4 * (50 - id) * (50 - id)); // L2 distance
+    };
+
+    runTopKSearchTest(index, query, k, verify_res, nullptr, BY_ID);
+}
+
+TEST_F(FP16HNSWTest, searchByID) {
+    HNSWParams params = {.dim = 4, .initialCapacity = 200, .M = 16, .efConstruction = 200};
+    search_by_id_test(params);
+}
+
+TEST_F(FP16BruteForceTest, searchByID) {
+    BFParams params = {.dim = 4, .initialCapacity = 200};
+    search_by_id_test(params);
+}
+
+TEST_F(FP16TieredTest, searchByID) {
+    HNSWParams params = {.dim = 4, .initialCapacity = 200, .M = 16, .efConstruction = 200};
+    search_by_id_test(params);
+}
+
+template <typename params_t>
+void FP16Test::search_by_score_test(params_t index_params) {
+    SetUp(index_params);
+
+    size_t k = 11;
+    size_t n = 100;
+
+    for (size_t i = 0; i < n; i++) {
+        this->GenerateAndAddVector(i, i); // {i, i, i, i}
+    }
+    ASSERT_EQ(VecSimIndex_IndexSize(index), n);
+
+    float16 query[dim];
+    GenerateVector(query, 50); // {50, 50, 50, 50}
+    // Vectors values are equal to the id, so the 11 closest vectors are
+    // 45, 46...50 (closest), 51...55
+    static size_t expected_res_order[] = {50, 49, 51, 48, 52, 47, 53, 46, 54, 45, 55};
+    auto verify_res = [&](size_t id, double score, size_t index) {
+        ASSERT_EQ(id, expected_res_order[index]);
+        ASSERT_EQ(score, 4 * (50 - id) * (50 - id)); // L2 distance
+    };
+
+    runTopKSearchTest(index, query, k, verify_res);
+}
+
+TEST_F(FP16HNSWTest, searchByScore) {
+    HNSWParams params = {.dim = 4, .initialCapacity = 200, .M = 16, .efConstruction = 200};
+    search_by_score_test(params);
+}
+
+TEST_F(FP16BruteForceTest, searchByScore) {
+    BFParams params = {.dim = 4, .initialCapacity = 200};
+    search_by_score_test(params);
+}
+
+TEST_F(FP16TieredTest, searchByScore) {
+    HNSWParams params = {.dim = 4, .initialCapacity = 200, .M = 16, .efConstruction = 200};
+    search_by_score_test(params);
+}
+
+template <typename params_t>
+void FP16Test::search_empty_index_test(params_t params) {
+    size_t n = 100;
+    size_t k = 11;
+
+    SetUp(params);
+    ASSERT_EQ(VecSimIndex_IndexSize(index), 0);
+
+    float16 query[dim];
+    GenerateVector(query, 50);
+
+    // We do not expect any results.
+    VecSimQueryReply *res = VecSimIndex_TopKQuery(index, query, k, NULL, BY_SCORE);
+    ASSERT_EQ(VecSimQueryReply_Len(res), 0);
+    VecSimQueryReply_Iterator *it = VecSimQueryReply_GetIterator(res);
+    ASSERT_EQ(VecSimQueryReply_IteratorNext(it), nullptr);
+    VecSimQueryReply_IteratorFree(it);
+    VecSimQueryReply_Free(res);
+
+    res = VecSimIndex_RangeQuery(index, query, 1.0, NULL, BY_SCORE);
+    ASSERT_EQ(VecSimQueryReply_Len(res), 0);
+    VecSimQueryReply_Free(res);
+
+    // Add some vectors and remove them all from index, so it will be empty again.
+    for (size_t i = 0; i < n; i++) {
+        GenerateAndAddVector(i);
+    }
+    ASSERT_EQ(VecSimIndex_IndexSize(index), n);
+    for (size_t i = 0; i < n; i++) {
+        VecSimIndex_DeleteVector(index, i);
+    }
+    ASSERT_EQ(VecSimIndex_IndexSize(index), 0);
+
+    // Again - we do not expect any results.
+    res = VecSimIndex_TopKQuery(index, query, k, NULL, BY_SCORE);
+    ASSERT_EQ(VecSimQueryReply_Len(res), 0);
+    it = VecSimQueryReply_GetIterator(res);
+    ASSERT_EQ(VecSimQueryReply_IteratorNext(it), nullptr);
+    VecSimQueryReply_IteratorFree(it);
+    VecSimQueryReply_Free(res);
+
+    res = VecSimIndex_RangeQuery(index, query, 1.0, NULL, BY_SCORE);
+    ASSERT_EQ(VecSimQueryReply_Len(res), 0);
+    VecSimQueryReply_Free(res);
+}
+
+TEST_F(FP16HNSWTest, SearchEmptyIndex) {
+    HNSWParams params = {.dim = 4, .initialCapacity = 0};
+    search_empty_index_test(params);
+}
+
+TEST_F(FP16BruteForceTest, SearchEmptyIndex) {
+    BFParams params = {.dim = 4, .initialCapacity = 0};
+    search_empty_index_test(params);
+}
+
+TEST_F(FP16TieredTest, SearchEmptyIndex) {
+    HNSWParams params = {.dim = 4, .initialCapacity = 0};
+    search_empty_index_test(params);
+}
+
+template <typename params_t>
+void FP16Test::test_override(params_t params) {
+    size_t n = 100;
+    size_t new_n = 250;
+    SetUp(params);
+
+    // Insert n vectors.
+    for (size_t i = 0; i < n; i++) {
+        ASSERT_EQ(GenerateAndAddVector(i, i), 1);
+    }
+    ASSERT_EQ(VecSimIndex_IndexSize(index), n);
+
+    // Override n vectors, the first 100 will be overwritten (deleted first).
+    for (size_t i = 0; i < n; i++) {
+        ASSERT_EQ(GenerateAndAddVector(i, i), 0);
+    }
+
+    // Add up to new_n vectors.
+    for (size_t i = n; i < new_n; i++) {
+        ASSERT_EQ(GenerateAndAddVector(i, i), 1);
+    }
+
+    float16 query[dim];
+    GenerateVector(query, new_n);
+
+    // Vectors values equals their id, so we expect the larger the id the closest it will be to the
+    // query.
+    auto verify_res = [&](size_t id, double score, size_t index) {
+        ASSERT_EQ(id, new_n - 1 - index) << "id: " << id << " score: " << score;
+        float16 a = vecsim_types::FP32_to_FP16(new_n);
+        float16 b = vecsim_types::FP32_to_FP16(id);
+        float diff = vecsim_types::FP16_to_FP32(a) - vecsim_types::FP16_to_FP32(b);
+        float exp_score = 4 * diff * diff;
+        ASSERT_EQ(score, exp_score) << "id: " << id << " score: " << score;
+    };
+    runTopKSearchTest(index, query, 300, verify_res);
+}
+
+TEST_F(FP16HNSWTest, Override) {
+    HNSWParams params = {
+        .dim = 4, .initialCapacity = 100, .M = 8, .efConstruction = 20, .efRuntime = 250};
+    test_override(params);
+}
+
+TEST_F(FP16BruteForceTest, Override) {
+    BFParams params = {.dim = 4, .initialCapacity = 100};
+    test_override(params);
+}
+
+TEST_F(FP16TieredTest, Override) {
+    HNSWParams params = {
+        .dim = 4, .initialCapacity = 100, .M = 8, .efConstruction = 20, .efRuntime = 250};
+    test_override(params);
+}
+
+template <typename params_t>
+void FP16Test::test_range_query(params_t params) {
+    size_t n = params.initialCapacity;
+    SetUp(params);
+
+    float pivot_value = 1.0f;
+    float16 pivot_vec[dim];
+    GenerateVector(pivot_vec, pivot_value);
+
+    float radius = 1.5f;
+    std::mt19937 gen(42);
+    std::uniform_real_distribution<> dis(pivot_value - radius, pivot_value + radius);
+
+    // insert 20 vectors near a pivot vector.
+    size_t n_close = 20;
+    for (size_t i = 0; i < n_close; i++) {
+        float random_number = dis(gen);
+        GenerateAndAddVector(i, random_number);
+    }
+    float16 max_vec[dim];
+    GenerateVector(max_vec, pivot_value + radius);
+    double max_dist = FP16_L2Sqr(pivot_vec, max_vec, dim);
+
+    // Add more vectors far from the pivot vector
+    for (size_t i = n_close; i < n; i++) {
+        float random_number = dis(gen);
+        GenerateAndAddVector(i, 5.0 + random_number);
+    }
+    ASSERT_EQ(VecSimIndex_IndexSize(index), n);
+
+    auto verify_res_by_score = [&](size_t id, double score, size_t index) {
+        ASSERT_LE(id, n_close - 1) << "score: " << score;
+        ASSERT_LE(score, max_dist);
+    };
+    uint expected_num_results = n_close;
+
+    runRangeQueryTest(index, pivot_vec, max_dist, verify_res_by_score, expected_num_results,
+                      BY_SCORE);
+}
+
+TEST_F(FP16HNSWTest, rangeQuery) {
+    HNSWParams params = {.dim = 4, .initialCapacity = 100};
+    test_range_query(params);
+}
+
+TEST_F(FP16BruteForceTest, rangeQuery) {
+    BFParams params = {.dim = 4, .initialCapacity = 100};
+    test_range_query(params);
+}
+
+TEST_F(FP16TieredTest, rangeQuery) {
+    HNSWParams params = {.dim = 4, .initialCapacity = 100};
+    test_range_query(params);
+}
+
+template <typename params_t>
+void FP16Test::test_get_distance(params_t params, VecSimMetric metric) {
+    static double constexpr expected_dists[2] = {0.25, -1.5}; // L2, IP
+    size_t n = 1;
+    params.metric = metric;
+    SetUp(params);
+
+    float16 vec[dim];
+    GenerateVector(vec, 0.25, 0.25); // {0.25, 0.5, 0.75, 1}
+    VecSimIndex_AddVector(index, vec, 0);
+    ASSERT_EQ(VecSimIndex_IndexSize(index), 1);
+
+    float16 query[dim];
+    GenerateVector(query, 0.5, 0.25); // {0.5, 0.75, 1, 1.25}
+
+    double dist = VecSimIndex_GetDistanceFrom_Unsafe(index, 0, query);
+
+    // manually calculated. Values were chosen as such that don't cause any accuracy loss in
+    // conversion from bfloat16 to float.
+    ASSERT_EQ(dist, expected_dists[metric]) << "metric: " << metric;
+}
+
+TEST_F(FP16HNSWTest, GetDistanceL2Test) {
+    HNSWParams params = {.dim = 4, .initialCapacity = 1};
+    test_get_distance(params, VecSimMetric_L2);
+}
+
+TEST_F(FP16BruteForceTest, GetDistanceL2Test) {
+    BFParams params = {.dim = 4, .initialCapacity = 1};
+    test_get_distance(params, VecSimMetric_L2);
+}
+
+TEST_F(FP16TieredTest, GetDistanceL2Test) {
+    HNSWParams params = {.dim = 4, .initialCapacity = 1};
+    test_get_distance(params, VecSimMetric_L2);
+}
+
+TEST_F(FP16HNSWTest, GetDistanceIPTest) {
+    HNSWParams params = {.dim = 4, .initialCapacity = 1};
+    test_get_distance(params, VecSimMetric_IP);
+}
+
+TEST_F(FP16BruteForceTest, GetDistanceIPTest) {
+    BFParams params = {.dim = 4, .initialCapacity = 1};
+    test_get_distance(params, VecSimMetric_IP);
+}
+
+TEST_F(FP16TieredTest, GetDistanceIPTest) {
+    HNSWParams params = {.dim = 4, .initialCapacity = 1};
+    test_get_distance(params, VecSimMetric_IP);
+}
+
+/* ---------------------------- Batch iterator tests ---------------------------- */
+
+// See comment above test_override for why we run this test up to n = 250
+template <typename params_t>
+void FP16Test::test_batch_iterator_basic(params_t params) {
+    size_t n = params.initialCapacity;
+    SetUp(params);
+
+    // For every i, add the vector (i,i,i,i) under the label i.
+    for (size_t i = 0; i < n; i++) {
+        ASSERT_EQ(GenerateAndAddVector(i, i), 1);
+    }
+
+    ASSERT_EQ(VecSimIndex_IndexSize(index), n);
+
+    // Query for (n,n,n,n) vector (recall that n-1 is the largest id in te index).
+    float16 query[dim];
+    GenerateVector(query, n);
+
+    VecSimBatchIterator *batchIterator = VecSimBatchIterator_New(index, query, nullptr);
+    size_t iteration_num = 0;
+
+    // Get the 5 vectors whose ids are the maximal among those that hasn't been returned yet
+    // in every iteration. The results order should be sorted by their score (distance from the
+    // query vector), which means sorted from the largest id to the lowest.
+    size_t n_res = 5;
+    while (VecSimBatchIterator_HasNext(batchIterator)) {
+        std::vector<size_t> expected_ids(n_res);
+        for (size_t i = 0; i < n_res; i++) {
+            expected_ids[i] = (n - iteration_num * n_res - i - 1);
+        }
+        auto verify_res = [&](size_t id, double score, size_t index) {
+            ASSERT_EQ(expected_ids[index], id)
+                << "iteration_num: " << iteration_num << " index: " << index << " score: " << score;
+        };
+        runBatchIteratorSearchTest(batchIterator, n_res, verify_res);
+        iteration_num++;
+    }
+    ASSERT_EQ(iteration_num, n / n_res);
+    VecSimBatchIterator_Free(batchIterator);
+}
+
+TEST_F(FP16HNSWTest, BatchIteratorBasic) {
+    size_t n = 250;
+    HNSWParams params = {
+        .dim = 4, .initialCapacity = n, .M = 8, .efConstruction = 20, .efRuntime = n};
+    test_batch_iterator_basic(params);
+}
+
+TEST_F(FP16BruteForceTest, BatchIteratorBasic) {
+    size_t n = 250;
+    BFParams params = {.dim = 4, .initialCapacity = n};
+    test_batch_iterator_basic(params);
+}
+
+TEST_F(FP16TieredTest, BatchIteratorBasic) {
+    size_t n = 250;
+    HNSWParams params = {
+        .dim = 4, .initialCapacity = n, .M = 8, .efConstruction = 20, .efRuntime = n};
+    test_batch_iterator_basic(params);
+}
+
+/* ---------------------------- Info tests ---------------------------- */
+
+template <typename params_t>
+VecSimIndexInfo FP16Test::test_info(params_t params) {
+    VecSimIndexInfo info = VecSimIndex_Info(index);
+    EXPECT_EQ(info.commonInfo.basicInfo.dim, params.dim);
+    EXPECT_EQ(info.commonInfo.basicInfo.isMulti, params.multi);
+    EXPECT_EQ(info.commonInfo.basicInfo.type, VecSimType_FLOAT16);
+    EXPECT_EQ(info.commonInfo.basicInfo.blockSize, DEFAULT_BLOCK_SIZE);
+    EXPECT_EQ(info.commonInfo.indexSize, 0);
+    EXPECT_EQ(info.commonInfo.indexLabelCount, 0);
+    EXPECT_EQ(info.commonInfo.memory, index->getAllocationSize());
+    EXPECT_EQ(info.commonInfo.basicInfo.metric, VecSimMetric_L2);
+
+    // Validate that Static info returns the right restricted info as well.
+    VecSimIndexBasicInfo s_info = VecSimIndex_BasicInfo(index);
+    EXPECT_EQ(info.commonInfo.basicInfo.algo, s_info.algo);
+    EXPECT_EQ(info.commonInfo.basicInfo.dim, s_info.dim);
+    EXPECT_EQ(info.commonInfo.basicInfo.blockSize, s_info.blockSize);
+    EXPECT_EQ(info.commonInfo.basicInfo.type, s_info.type);
+    EXPECT_EQ(info.commonInfo.basicInfo.isMulti, s_info.isMulti);
+    EXPECT_EQ(info.commonInfo.basicInfo.type, s_info.type);
+    EXPECT_EQ(info.commonInfo.basicInfo.isTiered, s_info.isTiered);
+
+    return info;
+}
+
+void FP16HNSWTest::test_info(bool is_multi) {
+    HNSWParams params = {.dim = 128, .multi = is_multi};
+    SetUp(params);
+    VecSimIndexInfo info = FP16Test::test_info(params);
+    ASSERT_EQ(info.commonInfo.basicInfo.algo, VecSimAlgo_HNSWLIB);
+
+    ASSERT_EQ(info.hnswInfo.M, HNSW_DEFAULT_M);
+    ASSERT_EQ(info.hnswInfo.efConstruction, HNSW_DEFAULT_EF_C);
+    ASSERT_EQ(info.hnswInfo.efRuntime, HNSW_DEFAULT_EF_RT);
+    ASSERT_DOUBLE_EQ(info.hnswInfo.epsilon, HNSW_DEFAULT_EPSILON);
+}
+TEST_F(FP16HNSWTest, testInfoSingle) { test_info(false); }
+
+TEST_F(FP16HNSWTest, testInfoMulti) { test_info(true); }
+
+void FP16BruteForceTest::test_info(bool is_multi) {
+    BFParams params = {.dim = 128, .multi = is_multi};
+    SetUp(params);
+    VecSimIndexInfo info = FP16Test::test_info(params);
+    ASSERT_EQ(info.commonInfo.basicInfo.algo, VecSimAlgo_BF);
+}
+
+TEST_F(FP16BruteForceTest, testInfoSingle) { test_info(false); }
+TEST_F(FP16BruteForceTest, testInfoMulti) { test_info(true); }
+
+void FP16TieredTest::test_info(bool is_multi) {
+    size_t bufferLimit = 1000;
+    HNSWParams hnsw_params = {.dim = 128, .multi = is_multi};
+    TieredIndexParams params = generate_tiered_params(hnsw_params, 1, bufferLimit);
+    SetUp(params);
+
+    VecSimIndexInfo info = FP16Test::test_info(hnsw_params);
+    ASSERT_EQ(info.commonInfo.basicInfo.algo, VecSimAlgo_HNSWLIB);
+    VecSimIndexInfo frontendIndexInfo = CastToBruteForce()->info();
+    VecSimIndexInfo backendIndexInfo = CastToHNSW()->info();
+
+    compareCommonInfo(info.tieredInfo.frontendCommonInfo, frontendIndexInfo.commonInfo);
+    compareFlatInfo(info.tieredInfo.bfInfo, frontendIndexInfo.bfInfo);
+    compareCommonInfo(info.tieredInfo.backendCommonInfo, backendIndexInfo.commonInfo);
+    compareHNSWInfo(info.tieredInfo.backendInfo.hnswInfo, backendIndexInfo.hnswInfo);
+
+    EXPECT_EQ(info.commonInfo.memory, info.tieredInfo.management_layer_memory +
+                                          backendIndexInfo.commonInfo.memory +
+                                          frontendIndexInfo.commonInfo.memory);
+    EXPECT_EQ(info.tieredInfo.backgroundIndexing, false);
+    EXPECT_EQ(info.tieredInfo.bufferLimit, bufferLimit);
+    EXPECT_EQ(info.tieredInfo.specificTieredBackendInfo.hnswTieredInfo.pendingSwapJobsThreshold, 1);
+
+    GenerateAndAddVector(1, 1);
+    info = index->info();
+
+    EXPECT_EQ(info.commonInfo.indexSize, 1);
+    EXPECT_EQ(info.commonInfo.indexLabelCount, 1);
+    EXPECT_EQ(info.tieredInfo.backendCommonInfo.indexSize, 0);
+    EXPECT_EQ(info.tieredInfo.backendCommonInfo.indexLabelCount, 0);
+    EXPECT_EQ(info.tieredInfo.frontendCommonInfo.indexSize, 1);
+    EXPECT_EQ(info.tieredInfo.frontendCommonInfo.indexLabelCount, 1);
+    EXPECT_EQ(info.commonInfo.memory, info.tieredInfo.management_layer_memory +
+                                          info.tieredInfo.backendCommonInfo.memory +
+                                          info.tieredInfo.frontendCommonInfo.memory);
+    EXPECT_EQ(info.tieredInfo.backgroundIndexing, true);
+
+    mock_thread_pool.thread_iteration();
+    info = index->info();
+
+    EXPECT_EQ(info.commonInfo.indexSize, 1);
+    EXPECT_EQ(info.commonInfo.indexLabelCount, 1);
+    EXPECT_EQ(info.tieredInfo.backendCommonInfo.indexSize, 1);
+    EXPECT_EQ(info.tieredInfo.backendCommonInfo.indexLabelCount, 1);
+    EXPECT_EQ(info.tieredInfo.frontendCommonInfo.indexSize, 0);
+    EXPECT_EQ(info.tieredInfo.frontendCommonInfo.indexLabelCount, 0);
+    EXPECT_EQ(info.commonInfo.memory, info.tieredInfo.management_layer_memory +
+                                          info.tieredInfo.backendCommonInfo.memory +
+                                          info.tieredInfo.frontendCommonInfo.memory);
+    EXPECT_EQ(info.tieredInfo.backgroundIndexing, false);
+
+    if (is_multi) {
+        GenerateAndAddVector(1, 1);
+        info = index->info();
+
+        EXPECT_EQ(info.commonInfo.indexSize, 2);
+        EXPECT_EQ(info.commonInfo.indexLabelCount, 1);
+        EXPECT_EQ(info.tieredInfo.backendCommonInfo.indexSize, 1);
+        EXPECT_EQ(info.tieredInfo.backendCommonInfo.indexLabelCount, 1);
+        EXPECT_EQ(info.tieredInfo.frontendCommonInfo.indexSize, 1);
+        EXPECT_EQ(info.tieredInfo.frontendCommonInfo.indexLabelCount, 1);
+        EXPECT_EQ(info.commonInfo.memory, info.tieredInfo.management_layer_memory +
+                                              info.tieredInfo.backendCommonInfo.memory +
+                                              info.tieredInfo.frontendCommonInfo.memory);
+        EXPECT_EQ(info.tieredInfo.backgroundIndexing, true);
+    }
+
+    VecSimIndex_DeleteVector(index, 1);
+    info = index->info();
+
+    EXPECT_EQ(info.commonInfo.indexSize, 0);
+    EXPECT_EQ(info.commonInfo.indexLabelCount, 0);
+    EXPECT_EQ(info.tieredInfo.backendCommonInfo.indexSize, 0);
+    EXPECT_EQ(info.tieredInfo.backendCommonInfo.indexLabelCount, 0);
+    EXPECT_EQ(info.tieredInfo.frontendCommonInfo.indexSize, 0);
+    EXPECT_EQ(info.tieredInfo.frontendCommonInfo.indexLabelCount, 0);
+    EXPECT_EQ(info.commonInfo.memory, info.tieredInfo.management_layer_memory +
+                                          info.tieredInfo.backendCommonInfo.memory +
+                                          info.tieredInfo.frontendCommonInfo.memory);
+    EXPECT_EQ(info.tieredInfo.backgroundIndexing, false);
+}
+
+TEST_F(FP16TieredTest, testInfoSingle) { test_info(false); }
+TEST_F(FP16TieredTest, testInfoMulti) { test_info(true); }
+
+template <typename params_t>
+void FP16Test::test_info_iterator(VecSimMetric metric) {
+    size_t n = 100;
+    size_t d = 128;
+    params_t params = {.dim = d, .metric = metric, .initialCapacity = n};
+    SetUp(params);
+    VecSimIndexInfo info = VecSimIndex_Info(index);
+    VecSimInfoIterator *infoIter = VecSimIndex_InfoIterator(index);
+    VecSimAlgo algo = info.commonInfo.basicInfo.algo;
+    if (algo == VecSimAlgo_HNSWLIB) {
+        compareHNSWIndexInfoToIterator(info, infoIter);
+    } else if (algo == VecSimAlgo_BF) {
+        compareFlatIndexInfoToIterator(info, infoIter);
+    }
+    VecSimInfoIterator_Free(infoIter);
+}
+
+TEST_F(FP16BruteForceTest, InfoIteratorCosine) {
+    test_info_iterator<BFParams>(VecSimMetric_Cosine);
+}
+TEST_F(FP16BruteForceTest, InfoIteratorIP) { test_info_iterator<BFParams>(VecSimMetric_IP); }
+TEST_F(FP16BruteForceTest, InfoIteratorL2) { test_info_iterator<BFParams>(VecSimMetric_L2); }
+TEST_F(FP16HNSWTest, InfoIteratorCosine) { test_info_iterator<HNSWParams>(VecSimMetric_Cosine); }
+TEST_F(FP16HNSWTest, InfoIteratorIP) { test_info_iterator<HNSWParams>(VecSimMetric_IP); }
+TEST_F(FP16HNSWTest, InfoIteratorL2) { test_info_iterator<HNSWParams>(VecSimMetric_L2); }
+
+void FP16TieredTest::test_info_iterator(VecSimMetric metric) {
+    size_t n = 100;
+    size_t d = 128;
+    HNSWParams params = {.dim = d, .metric = metric, .initialCapacity = n};
+    SetUp(params);
+    VecSimIndexInfo info = VecSimIndex_Info(index);
+    VecSimInfoIterator *infoIter = VecSimIndex_InfoIterator(index);
+    VecSimIndexInfo frontendIndexInfo = CastToBruteForce()->info();
+    VecSimIndexInfo backendIndexInfo = CastToHNSW()->info();
+    VecSimInfoIterator_Free(infoIter);
+}
+
+TEST_F(FP16TieredTest, InfoIteratorCosine) { test_info_iterator(VecSimMetric_Cosine); }
+TEST_F(FP16TieredTest, InfoIteratorIP) { test_info_iterator(VecSimMetric_IP); }
+TEST_F(FP16TieredTest, InfoIteratorL2) { test_info_iterator(VecSimMetric_L2); }
+
+/* ---------------------------- HNSW specific tests ---------------------------- */
+
+void FP16HNSWTest::test_serialization(bool is_multi) {
+    size_t dim = 4;
+    size_t n = 1001;
+    size_t n_labels[] = {n, 100};
+    size_t M = 8;
+    size_t ef = 10;
+    double epsilon = 0.004;
+    size_t blockSize = 20;
+    std::string multiToString[] = {"single", "multi_100labels_"};
+    int i = is_multi;
+
+    HNSWParams params{.type = VecSimType_FLOAT16,
+                      .dim = dim,
+                      .metric = VecSimMetric_L2,
+                      .multi = is_multi,
+                      .initialCapacity = n,
+                      .blockSize = blockSize,
+                      .M = M,
+                      .efConstruction = ef,
+                      .efRuntime = ef,
+                      .epsilon = epsilon};
+    SetUp(params);
+
+    auto *hnsw_index = this->CastToHNSW();
+
+    std::vector<float16> data(n * dim);
+    std::mt19937 gen(42);
+    std::uniform_real_distribution<> dis;
+
+    for (size_t i = 0; i < n * dim; ++i) {
+        float val = dis(gen);
+        data[i] = vecsim_types::FP32_to_FP16(val);
+    }
+
+    for (size_t j = 0; j < n; ++j) {
+        VecSimIndex_AddVector(index, data.data() + dim * j, j % n_labels[i]);
+    }
+
+    auto file_name = std::string(getenv("ROOT")) + "/tests/unit/data/1k-d4-L2-M8-ef_c10_" +
+                     VecSimType_ToString(VecSimType_FLOAT16) + "_" + multiToString[i] +
+                     ".hnsw_current_version";
+
+    // Save the index with the default version (V3).
+    hnsw_index->saveIndex(file_name);
+
+    // Fetch info after saving, as memory size change during saving.
+    VecSimIndexInfo info = VecSimIndex_Info(index);
+    ASSERT_EQ(info.commonInfo.basicInfo.algo, VecSimAlgo_HNSWLIB);
+    ASSERT_EQ(info.hnswInfo.M, M);
+    ASSERT_EQ(info.hnswInfo.efConstruction, ef);
+    ASSERT_EQ(info.hnswInfo.efRuntime, ef);
+    ASSERT_EQ(info.commonInfo.indexSize, n);
+    ASSERT_EQ(info.commonInfo.basicInfo.metric, VecSimMetric_L2);
+    ASSERT_EQ(info.commonInfo.basicInfo.type, VecSimType_FLOAT16);
+    ASSERT_EQ(info.commonInfo.basicInfo.dim, dim);
+    ASSERT_EQ(info.commonInfo.indexLabelCount, n_labels[i]);
+
+    // Load the index from the file.
+    VecSimIndex *serialized_index = HNSWFactory::NewIndex(file_name);
+    auto *serialized_hnsw_index = this->CastToHNSW(serialized_index);
+
+    // Verify that the index was loaded as expected.
+    ASSERT_TRUE(serialized_hnsw_index->checkIntegrity().valid_state);
+
+    VecSimIndexInfo info2 = VecSimIndex_Info(serialized_index);
+    ASSERT_EQ(info2.commonInfo.basicInfo.algo, VecSimAlgo_HNSWLIB);
+    ASSERT_EQ(info2.hnswInfo.M, M);
+    ASSERT_EQ(info2.commonInfo.basicInfo.isMulti, is_multi);
+    ASSERT_EQ(info2.commonInfo.basicInfo.blockSize, blockSize);
+    ASSERT_EQ(info2.hnswInfo.efConstruction, ef);
+    ASSERT_EQ(info2.hnswInfo.efRuntime, ef);
+    ASSERT_EQ(info2.commonInfo.indexSize, n);
+    ASSERT_EQ(info2.commonInfo.basicInfo.metric, VecSimMetric_L2);
+    ASSERT_EQ(info2.commonInfo.basicInfo.type, VecSimType_FLOAT16);
+    ASSERT_EQ(info2.commonInfo.basicInfo.dim, dim);
+    ASSERT_EQ(info2.commonInfo.indexLabelCount, n_labels[i]);
+    ASSERT_EQ(info2.hnswInfo.epsilon, epsilon);
+
+    // Check the functionality of the loaded index.
+
+    float16 new_vec[dim];
+    GenerateVector(new_vec, 0.25);
+    VecSimIndex_AddVector(serialized_index, new_vec, n);
+    auto verify_res = [&](size_t id, double score, size_t index) {
+        ASSERT_EQ(id, n) << "score: " << score;
+        ASSERT_EQ(score, 0);
+    };
+    runTopKSearchTest(serialized_index, new_vec, 1, verify_res);
+    VecSimIndex_DeleteVector(serialized_index, 1);
+
+    size_t n_per_label = n / n_labels[i];
+    ASSERT_TRUE(serialized_hnsw_index->checkIntegrity().valid_state);
+    ASSERT_EQ(VecSimIndex_IndexSize(serialized_index), n + 1 - n_per_label);
+
+    // Clean up.
+    remove(file_name.c_str());
+    VecSimIndex_Free(serialized_index);
+}
+
+TEST_F(FP16HNSWTest, SerializationCurrentVersion) { test_serialization(false); }
+
+TEST_F(FP16HNSWTest, SerializationCurrentVersionMulti) { test_serialization(true); }
+
+template <typename params_t>
+void FP16Test::get_element_neighbors(params_t params) {
+    size_t n = 0;
+
+    SetUp(params);
+    auto *hnsw_index = CastToHNSW();
+
+    // Add vectors until we have at least 2 vectors at level 1.
+    size_t vectors_in_higher_levels = 0;
+    while (vectors_in_higher_levels < 2) {
+        GenerateAndAddVector(hnsw_index, n, n);
+        if (hnsw_index->getGraphDataByInternalId(n)->toplevel > 0) {
+            vectors_in_higher_levels++;
+        }
+        n++;
+    }
+    ASSERT_GE(n, 1) << "n: " << n;
+
+    // Go over all vectors and validate that the getElementNeighbors debug command returns the
+    // neighbors properly.
+    for (size_t id = 0; id < n; id++) {
+        LevelData &cur = hnsw_index->getLevelData(id, 0);
+        int **neighbors_output;
+        VecSimDebug_GetElementNeighborsInHNSWGraph(index, id, &neighbors_output);
+        auto graph_data = hnsw_index->getGraphDataByInternalId(id);
+        for (size_t l = 0; l <= graph_data->toplevel; l++) {
+            auto &level_data = hnsw_index->getLevelData(graph_data, l);
+            auto &neighbours = neighbors_output[l];
+            ASSERT_EQ(neighbours[0], level_data.numLinks);
+            for (size_t j = 1; j <= neighbours[0]; j++) {
+                ASSERT_EQ(neighbours[j], level_data.links[j - 1]);
+            }
+        }
+        VecSimDebug_ReleaseElementNeighborsInHNSWGraph(neighbors_output);
+    }
+}
+
+TEST_F(FP16HNSWTest, getElementNeighbors) {
+    size_t dim = 4;
+    size_t M = 20;
+    HNSWParams params = {.dim = 4, .M = 20};
+    get_element_neighbors(params);
+}
+
+TEST_F(FP16TieredTest, getElementNeighbors) {
+    size_t dim = 4;
+    size_t M = 20;
+    HNSWParams params = {.dim = 4, .M = 20};
+    get_element_neighbors(params);
+}
diff --git a/tests/unit/test_spaces.cpp b/tests/unit/test_spaces.cpp
index 9f30e9833..85da64b45 100644
--- a/tests/unit/test_spaces.cpp
+++ b/tests/unit/test_spaces.cpp
@@ -81,6 +81,23 @@ TEST_F(SpacesTest, bf16_l2_no_optimization_func_test) {
     ASSERT_EQ(dist, FP32_L2Sqr((const void *)sanity_a, (const void *)sanity_b, dim));
 }
 
+TEST_F(SpacesTest, fp16_l2_no_optimization_func_test) {
+    size_t dim = 4;
+
+    float16 a[dim], b[dim];
+    float sanity_a[dim], sanity_b[dim];
+    for (size_t i = 0; i < dim; i++) {
+        // multiplication of 0.25 have no rounding error when converted to bfloat16
+        sanity_a[i] = 0.5f + i * 0.25f;
+        a[i] = vecsim_types::FP32_to_FP16(sanity_a[i]);
+        sanity_b[i] = (float)i * 0.25f;
+        b[i] = vecsim_types::FP32_to_FP16(sanity_b[i]);
+    }
+
+    float dist = FP16_L2Sqr((const void *)a, (const void *)b, dim);
+    ASSERT_EQ(dist, FP32_L2Sqr((const void *)sanity_a, (const void *)sanity_b, dim));
+}
+
 TEST_F(SpacesTest, float_ip_no_optimization_func_test) {
     size_t dim = 5;
 
@@ -135,7 +152,28 @@ TEST_F(SpacesTest, bf16_normalize_test) {
     }
 }
 
-TEST_F(SpacesTest, bf16_ip_no_optimization_func_test2) {
+TEST_F(SpacesTest, fp16_normalize_test) {
+    size_t dim = 4;
+
+    float16 a[dim];
+    float sanity_a[dim];
+    for (size_t i = 0; i < dim; i++) {
+        // unit vector
+        sanity_a[i] = float(4);
+        a[i] = vecsim_types::FP32_to_FP16(sanity_a[i]);
+    }
+
+    spaces::GetNormalizeFunc<float16>()(a, dim);
+    spaces::GetNormalizeFunc<float>()(sanity_a, dim);
+    for (size_t i = 0; i < dim; i++) {
+        ASSERT_EQ(vecsim_types::FP16_to_FP32(a[i]), sanity_a[i])
+            << " fp16 normalization failed for i = " << i;
+        ASSERT_EQ(vecsim_types::FP16_to_FP32(a[i]), 0.5)
+            << " fp16 normalization failed for i = " << i;
+    }
+}
+
+TEST_F(SpacesTest, bf16_ip_no_optimization_func_test) {
     size_t dim = 4;
 
     bfloat16 a[dim], b[dim];
@@ -152,6 +190,23 @@ TEST_F(SpacesTest, bf16_ip_no_optimization_func_test2) {
     ASSERT_EQ(dist, FP32_InnerProduct((const void *)sanity_a, (const void *)sanity_b, dim));
 }
 
+TEST_F(SpacesTest, fp16_ip_no_optimization_func_test) {
+    size_t dim = 4;
+
+    float16 a[dim], b[dim];
+    float sanity_a[dim], sanity_b[dim];
+    for (size_t i = 0; i < dim; i++) {
+        // multiplication of 0.25 have no rounding error when converted to bfloat16
+        sanity_a[i] = 0.5f + i * 0.25f;
+        a[i] = vecsim_types::FP32_to_FP16(sanity_a[i]);
+        sanity_b[i] = (float)i * 0.25f;
+        b[i] = vecsim_types::FP32_to_FP16(sanity_b[i]);
+    }
+
+    float dist = FP16_InnerProduct((const void *)a, (const void *)b, dim);
+    ASSERT_EQ(dist, FP32_InnerProduct((const void *)sanity_a, (const void *)sanity_b, dim));
+}
+
 TEST_F(SpacesTest, GetDistFuncInvalidMetricFP32) {
     EXPECT_THROW(
         (spaces::GetDistFunc<float, float>((VecSimMetric)(VecSimMetric_Cosine + 1), 10, nullptr)),
@@ -167,6 +222,11 @@ TEST_F(SpacesTest, GetDistFuncInvalidMetricBF16) {
                                                        nullptr)),
                  std::invalid_argument);
 }
+TEST_F(SpacesTest, GetDistFuncInvalidMetricFP16) {
+    EXPECT_THROW(
+        (spaces::GetDistFunc<float16, float>((VecSimMetric)(VecSimMetric_Cosine + 1), 10, nullptr)),
+        std::invalid_argument);
+}
 
 using namespace spaces;
 

From fce1754a5de7853553afc253a85b3eb60f5c307c Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Tue, 7 May 2024 15:09:39 +0000
Subject: [PATCH 04/15] added flow tests

---
 src/VecSim/types/float16.h       |   2 +-
 src/python_bindings/bindings.cpp |  58 +++++----
 tests/flow/common.py             |   3 +
 tests/flow/test_bruteforce.py    | 109 ++++++++++++++++
 tests/flow/test_hnsw.py          | 210 +++++++++++++++++++++++++++++++
 tests/flow/test_hnsw_tiered.py   |  12 +-
 6 files changed, 367 insertions(+), 27 deletions(-)

diff --git a/src/VecSim/types/float16.h b/src/VecSim/types/float16.h
index 8e22e792a..0e80e84fc 100644
--- a/src/VecSim/types/float16.h
+++ b/src/VecSim/types/float16.h
@@ -46,7 +46,7 @@ static inline float FP16_to_FP32(float16 input) {
     return _interpret_as_float(((exp == shifted_exp) ? infnan_val : reg_val) | sign_bit);
 }
 
-static inline struct float16 FP32_to_FP16(float input) {
+static inline float16 FP32_to_FP16(float input) {
     // via Fabian "ryg" Giesen.
     // https://gist.github.com/2156668
     uint32_t sign_mask = 0x80000000u;
diff --git a/src/python_bindings/bindings.cpp b/src/python_bindings/bindings.cpp
index 5c568c1dd..4d15374cf 100644
--- a/src/python_bindings/bindings.cpp
+++ b/src/python_bindings/bindings.cpp
@@ -9,6 +9,7 @@
 #include "VecSim/index_factories/hnsw_factory.h"
 #include "VecSim/batch_iterator.h"
 #include "VecSim/types/bfloat16.h"
+#include "VecSim/types/float16.h"
 
 #include "pybind11/pybind11.h"
 #include "pybind11/numpy.h"
@@ -22,6 +23,7 @@
 namespace py = pybind11;
 
 using bfloat16 = vecsim_types::bfloat16;
+using float16 = vecsim_types::float16;
 
 // Helper function that iterates query results and wrap them in python numpy object -
 // a tuple of two 2D arrays: (labels, distances)
@@ -95,7 +97,7 @@ class PyBatchIterator {
 // @input or @query arguments are a py::object object. (numpy arrays are acceptable)
 class PyVecSimIndex {
 private:
-    template <typename DataType, typename DistType>
+    template <typename DataType, typename DistType, typename NPArrayType = DataType>
     inline py::object rawVectorsAsNumpy(labelType label, size_t dim) {
         std::vector<std::vector<DataType>> vectors;
         if (index->basicInfo().algo == VecSimAlgo_BF) {
@@ -107,37 +109,35 @@ class PyVecSimIndex {
                 ->getDataByLabel(label, vectors);
         }
         size_t n_vectors = vectors.size();
-        if (std::is_same_v<DataType, bfloat16>) {
-            // Convert the vectors to float32 for numpy array.
-            auto *data_numpy = new float[n_vectors * dim];
+        auto *data_numpy = new NPArrayType[n_vectors * dim];
+
+        // Copy the vector blobs into one contiguous array of data, and free the original buffer
+        // afterwards.
+        if constexpr (std::is_same_v<DataType, bfloat16>) {
             for (size_t i = 0; i < n_vectors; i++) {
                 for (size_t j = 0; j < dim; j++) {
                     data_numpy[i * dim + j] = vecsim_types::bfloat16_to_float32(vectors[i][j]);
                 }
             }
-            py::capsule free_when_done(data_numpy,
-                                       [](void *vector_data) { delete[](float *) vector_data; });
-            return py::array_t<float>(
-                {n_vectors, dim}, // shape
-                {dim * sizeof(float),
-                 sizeof(float)}, // C-style contiguous strides for the data type
-                data_numpy,      // the data pointer
-                free_when_done);
-        }
-        auto *data_numpy = new DataType[n_vectors * dim];
-        // Copy the vector blobs into one contiguous array of data, and free the original buffer
-        // afterwards.
-        for (size_t i = 0; i < n_vectors; i++) {
-            memcpy(data_numpy + i * dim, vectors[i].data(), dim * sizeof(DataType));
+        } else if constexpr (std::is_same_v<DataType, float16>) {
+            for (size_t i = 0; i < n_vectors; i++) {
+                for (size_t j = 0; j < dim; j++) {
+                    data_numpy[i * dim + j] = vecsim_types::FP16_to_FP32(vectors[i][j]);
+                }
+            }
+        } else {
+            for (size_t i = 0; i < n_vectors; i++) {
+                memcpy(data_numpy + i * dim, vectors[i].data(), dim * sizeof(NPArrayType));
+            }
         }
 
         py::capsule free_when_done(data_numpy,
-                                   [](void *vector_data) { delete[](DataType *) vector_data; });
-        return py::array_t<DataType>(
+                                   [](void *vector_data) { delete[](NPArrayType *) vector_data; });
+        return py::array_t<NPArrayType>(
             {n_vectors, dim}, // shape
-            {dim * sizeof(DataType),
-             sizeof(DataType)}, // C-style contiguous strides for the data type
-            data_numpy,         // the data pointer
+            {dim * sizeof(NPArrayType),
+             sizeof(NPArrayType)}, // C-style contiguous strides for the data type
+            data_numpy,            // the data pointer
             free_when_done);
     }
 
@@ -213,7 +213,9 @@ class PyVecSimIndex {
         } else if (info.commonInfo.basicInfo.type == VecSimType_FLOAT64) {
             return rawVectorsAsNumpy<double, double>(label, dim);
         } else if (info.commonInfo.basicInfo.type == VecSimType_BFLOAT16) {
-            return rawVectorsAsNumpy<bfloat16, float>(label, dim);
+            return rawVectorsAsNumpy<bfloat16, float, float>(label, dim);
+        } else if (info.commonInfo.basicInfo.type == VecSimType_FLOAT16) {
+            return rawVectorsAsNumpy<float16, float, float>(label, dim);
         } else {
             throw std::runtime_error("Invalid vector data type");
         }
@@ -289,6 +291,9 @@ class PyHNSWLibIndex : public PyVecSimIndex {
         } else if (type == VecSimType_BFLOAT16) {
             auto *hnsw = dynamic_cast<HNSWIndex<bfloat16, float> *>(index.get());
             hnsw->saveIndex(location);
+        } else if (type == VecSimType_FLOAT16) {
+            auto *hnsw = dynamic_cast<HNSWIndex<float16, float> *>(index.get());
+            hnsw->saveIndex(location);
         } else {
             throw std::runtime_error("Invalid index data type");
         }
@@ -397,6 +402,10 @@ class PyHNSWLibIndex : public PyVecSimIndex {
             return dynamic_cast<HNSWIndex<bfloat16, float> *>(this->index.get())
                 ->checkIntegrity()
                 .valid_state;
+        } else if (type == VecSimType_FLOAT16) {
+            return dynamic_cast<HNSWIndex<float16, float> *>(this->index.get())
+                ->checkIntegrity()
+                .valid_state;
         } else {
             throw std::runtime_error("Invalid index data type");
         }
@@ -486,6 +495,7 @@ PYBIND11_MODULE(VecSim, m) {
         .value("VecSimType_FLOAT32", VecSimType_FLOAT32)
         .value("VecSimType_FLOAT64", VecSimType_FLOAT64)
         .value("VecSimType_BFLOAT16", VecSimType_BFLOAT16)
+        .value("VecSimType_FLOAT16", VecSimType_FLOAT16)
         .value("VecSimType_INT32", VecSimType_INT32)
         .value("VecSimType_INT64", VecSimType_INT64)
         .export_values();
diff --git a/tests/flow/common.py b/tests/flow/common.py
index 0815324ee..721a15320 100644
--- a/tests/flow/common.py
+++ b/tests/flow/common.py
@@ -60,6 +60,9 @@ def round_ms(f_value, ndigits = 2):
 def vec_to_bfloat16(vec):
     return vec.astype(bfloat16)
 
+def vec_to_float16(vec):
+    return vec.astype(np.float16)
+
 def get_ground_truth_results(dist_func, query, vectors, k):
     results = [{"dist": dist_func(query, vec), "label": key} for key, vec in vectors]
     results = sorted(results, key=lambda x: x["dist"])
diff --git a/tests/flow/test_bruteforce.py b/tests/flow/test_bruteforce.py
index 9512b02ad..fc738e299 100644
--- a/tests/flow/test_bruteforce.py
+++ b/tests/flow/test_bruteforce.py
@@ -434,3 +434,112 @@ def test_bf_bfloat16_multivalue():
 
     assert_allclose(bf_labels, [keys],  rtol=1e-5, atol=0)
     assert_allclose(bf_distances, [dists],  rtol=1e-5, atol=0)
+
+class TestFloat16():
+
+    num_labels=10_000
+    num_per_label=1
+    dim = 128
+    data = Data(VecSimType_FLOAT16, VecSimMetric_L2, spatial.distance.sqeuclidean, vec_to_float16, dim, num_labels, num_per_label)
+
+    # Not testing bfloat16 cosine as type conversion biases mess up the results
+    def test_bf_float16_L2(self):
+        k = 10
+
+        keys, dists = self.data.measure_dists(k)
+        bf_labels, bf_distances = self.data.index.knn_query(self.data.query, k=k)
+        assert_allclose(bf_labels, [keys],  rtol=1e-5, atol=0)
+        assert_allclose(bf_distances, [dists],  rtol=1e-5, atol=0)
+        print(f"\nsanity test for {self.data.metric} and {self.data.type} pass")
+
+    def test_bf_float16_batch_iterator(self):
+        bfindex = self.data.index
+        num_elements = self.num_labels
+
+        batch_iterator = bfindex.create_batch_iterator(self.data.query)
+        labels_first_batch, distances_first_batch = batch_iterator.get_next_results(10, BY_ID)
+        for i, _ in enumerate(labels_first_batch[0][:-1]):
+            # assert sorting by id
+            assert(labels_first_batch[0][i] < labels_first_batch[0][i+1])
+
+        _, distances_second_batch = batch_iterator.get_next_results(10, BY_SCORE)
+        for i, dist in enumerate(distances_second_batch[0][:-1]):
+            # assert sorting by score
+            assert(distances_second_batch[0][i] < distances_second_batch[0][i+1])
+            # assert that every distance in the second batch is higher than any distance of the first batch
+            assert(len(distances_first_batch[0][np.where(distances_first_batch[0] > dist)]) == 0)
+
+        # reset
+        batch_iterator.reset()
+
+        # Run again in batches until depleted
+        batch_size = 1500
+        returned_results_num = 0
+        iterations = 0
+        start = time.time()
+        while batch_iterator.has_next():
+            iterations += 1
+            labels, distances = batch_iterator.get_next_results(batch_size, BY_SCORE)
+            returned_results_num += len(labels[0])
+
+        print(f'Total search time for running batches of size {batch_size} for index with {num_elements} of dim={self.dim}: {time.time() - start}')
+        assert (returned_results_num == num_elements)
+        assert (iterations == np.ceil(num_elements/batch_size))
+
+    def test_bf_float16_range_query(self):
+        bfindex = self.data.index
+        query_data = self.data.query
+
+        radius = 14
+        start = time.time()
+        bf_labels, bf_distances = bfindex.range_query(query_data, radius=radius)
+        end = time.time()
+        res_num = len(bf_labels[0])
+        print(f'\nlookup time for {self.num_labels} vectors with dim={self.dim} took {end - start} seconds, got {res_num} results')
+
+        # Verify that we got exactly all vectors within the range
+        results, keys = get_ground_truth_results(spatial.distance.sqeuclidean, query_data.flat, self.data.vectors, res_num)
+
+        assert_allclose(max(bf_distances[0]), results[res_num-1]["dist"], rtol=1e-05)
+        assert np.array_equal(np.array(bf_labels[0]), np.array(keys))
+        assert max(bf_distances[0]) <= radius
+        # Verify that the next closest vector that hasn't returned is not within the range
+        assert results[res_num]["dist"] > radius
+
+        # Expect zero results for radius==0
+        bf_labels, bf_distances = bfindex.range_query(query_data, radius=0)
+        assert len(bf_labels[0]) == 0
+
+def test_bf_float16_multivalue():
+    num_labels=5_000
+    num_per_label=20
+    num_elements = num_labels * num_per_label
+
+    dim = 128
+
+    data = Data(VecSimType_FLOAT16, VecSimMetric_L2, spatial.distance.sqeuclidean, vec_to_float16, dim, num_labels, num_per_label)
+
+    k=10
+
+    query_data = data.query
+    dists = {}
+    for key, vec in data.vectors:
+        # Setting or updating the score for each label.
+        # If it's the first time we calculate a score for a label dists.get(key, dist)
+        # will return dist so we will choose the actual score the first time.
+        dist = spatial.distance.sqeuclidean(query_data.flat, vec)
+        dists[key] = min(dist, dists.get(key, dist))
+
+    dists = list(dists.items())
+    dists = sorted(dists, key=lambda pair: pair[1])[:k]
+    keys = [key for key, _ in dists[:k]]
+    dists = [dist for _, dist in dists[:k]]
+
+    start = time.time()
+    bf_labels, bf_distances = data.index.knn_query(query_data, k=10)
+    end = time.time()
+
+    print(f'\nlookup time for {num_elements} vectors ({num_labels} labels and {num_per_label} vectors per label) with dim={dim} took {end - start} seconds')
+
+    assert_allclose(bf_labels, [keys],  rtol=1e-5, atol=0)
+    assert_allclose(bf_distances, [dists],  rtol=1e-5, atol=0)
diff --git a/tests/flow/test_hnsw.py b/tests/flow/test_hnsw.py
index 1c80c9fce..c2ad519e7 100644
--- a/tests/flow/test_hnsw.py
+++ b/tests/flow/test_hnsw.py
@@ -650,3 +650,213 @@ def test_hnsw_bfloat16_multi_value():
     recall = float(correct) / (k * num_queries)
     print("\nrecall is: \n", recall)
     assert (recall > 0.9)
+
+class TestFloat16():
+    dim = 50
+    num_elements = 10_000
+    M = 32
+    efConstruction = 200
+    efRuntime = 50
+    data_type = VecSimType_FLOAT16
+
+    hnsw_index = create_hnsw_index(dim, num_elements, VecSimMetric_L2, data_type, efConstruction, M, efRuntime)
+    hnsw_index.set_ef(efRuntime)
+
+    rng = np.random.default_rng(seed=42)
+    data = vec_to_float16(rng.random((num_elements, dim)))
+
+    vectors = []
+    for i, vector in enumerate(data):
+        hnsw_index.add_vector(vector, i)
+        vectors.append((i, vector))
+
+    #### Create queries
+    num_queries = 10
+    query_data = vec_to_float16(rng.random((num_queries, dim)))
+
+    def test_serialization(self):
+        hnsw_index = self.hnsw_index
+        k = 10
+
+        correct = 0
+        correct_labels = []  # cache these
+        for target_vector in self.query_data:
+            hnswlib_labels, _ = hnsw_index.knn_query(target_vector, 10)
+
+            # sort distances of every vector from the target vector and get actual k nearest vectors
+            dists = [(spatial.distance.euclidean(target_vector, vec), key) for key, vec in self.vectors]
+            dists = sorted(dists)
+            keys = [key for _, key in dists[:k]]
+            correct_labels.append(keys)
+
+            for label in hnswlib_labels[0]:
+                for correct_label in keys:
+                    if label == correct_label:
+                        correct += 1
+                        break
+        # Measure recall
+        recall = float(correct) / (k * self.num_queries)
+        print("\nrecall is: \n", recall)
+
+        # Persist, delete and restore index.
+        file_name = os.getcwd() + "/dump"
+        hnsw_index.save_index(file_name)
+
+        new_hnsw_index = HNSWIndex(file_name)
+        os.remove(file_name)
+        assert new_hnsw_index.index_size() == self.num_elements
+        assert new_hnsw_index.index_type() == VecSimType_FLOAT16
+
+        # Check recall
+        correct_after = 0
+        for i, target_vector in enumerate(self.query_data):
+            hnswlib_labels, _ = new_hnsw_index.knn_query(target_vector, 10)
+            correct_labels_cur = correct_labels[i]
+            for label in hnswlib_labels[0]:
+                for correct_label in correct_labels_cur:
+                    if label == correct_label:
+                        correct_after += 1
+                        break
+
+        # Compare recall after reloading the index
+        recall_after = float(correct_after) / (k * self.num_queries)
+        print("\nrecall after is: \n", recall_after)
+        assert recall == recall_after
+
+    def test_float16_L2(self):
+        hnsw_index = self.hnsw_index
+        k = 10
+
+        correct = 0
+        for target_vector in self.query_data:
+            hnswlib_labels, hnswlib_distances = hnsw_index.knn_query(target_vector, 10)
+
+            results, keys = get_ground_truth_results(spatial.distance.sqeuclidean, target_vector, self.vectors, k)
+            for i, label in enumerate(hnswlib_labels[0]):
+                for j, correct_label in enumerate(keys):
+                    if label == correct_label:
+                        correct += 1
+                        assert math.isclose(np.float16(hnswlib_distances[0][i]), (results[j]["dist"]), rel_tol=1e-2), f"label: {label}"
+                        break
+
+        # Measure recall
+        recall = float(correct) / (k * self.num_queries)
+        print("\nrecall is: \n", recall)
+        assert (recall > 0.9)
+
+    def test_batch_iterator(self):
+        hnsw_index = self.hnsw_index
+
+        efRuntime = 180
+        hnsw_index.set_ef(efRuntime)
+
+        batch_iterator = hnsw_index.create_batch_iterator(self.query_data)
+        labels_first_batch, distances_first_batch = batch_iterator.get_next_results(10, BY_ID)
+        for i, _ in enumerate(labels_first_batch[0][:-1]):
+            # Assert sorting by id
+            assert (labels_first_batch[0][i] < labels_first_batch[0][i + 1])
+
+        labels_second_batch, distances_second_batch = batch_iterator.get_next_results(10, BY_SCORE)
+        should_have_return_in_first_batch = []
+        for i, dist in enumerate(distances_second_batch[0][:-1]):
+            # Assert sorting by score
+            assert (distances_second_batch[0][i] < distances_second_batch[0][i + 1])
+            # Assert that every distance in the second batch is higher than any distance of the first batch
+            if len(distances_first_batch[0][np.where(distances_first_batch[0] > dist)]) != 0:
+                should_have_return_in_first_batch.append(dist)
+        assert (len(should_have_return_in_first_batch) <= 2)
+
+        # Verify that runtime args are sent properly to the batch iterator.
+        query_params = VecSimQueryParams()
+        query_params.hnswRuntimeParams.efRuntime = 5
+        batch_iterator_new = hnsw_index.create_batch_iterator(self.query_data, query_params)
+        labels_first_batch_new, distances_first_batch_new = batch_iterator_new.get_next_results(10, BY_ID)
+        # Verify that accuracy is worse with the new lower ef_runtime.
+        assert (sum(distances_first_batch[0]) < sum(distances_first_batch_new[0]))
+
+    def test_range_query(self):
+        index = self.hnsw_index
+
+        radius = 7.0
+        recalls = {}
+
+        for epsilon_rt in [0.001, 0.01, 0.1]:
+            query_params = VecSimQueryParams()
+            query_params.hnswRuntimeParams.epsilon = epsilon_rt
+            start = time.time()
+            hnsw_labels, hnsw_distances = index.range_query(self.query_data[0], radius=radius, query_param=query_params)
+            end = time.time()
+            res_num = len(hnsw_labels[0])
+
+            dists = sorted([(key, spatial.distance.sqeuclidean(self.query_data[0], vec)) for key, vec in self.vectors])
+            actual_results = [(key, dist) for key, dist in dists if dist <= radius]
+
+            print(
+                f'\nlookup time for {self.num_elements} vectors with dim={self.dim} took {end - start} seconds with epsilon={epsilon_rt},'
+                f' got {res_num} results, which are {res_num / len(actual_results)} of the entire results in the range.')
+
+            # Compare the number of vectors that are actually within the range to the returned results.
+            assert np.all(np.isin(hnsw_labels, np.array([label for label, _ in actual_results])))
+
+            assert max(hnsw_distances[0]) <= radius
+            recalls[epsilon_rt] = res_num / len(actual_results)
+
+        # Expect higher recalls for higher epsilon values.
+        assert recalls[0.001] <= recalls[0.01] <= recalls[0.1]
+
+        # Expect zero results for radius==0
+        hnsw_labels, hnsw_distances = index.range_query(self.query_data[0], radius=0)
+        assert len(hnsw_labels[0]) == 0
+
+def test_hnsw_float16_multi_value():
+    num_labels = 1_000
+    num_per_label = 5
+    num_elements = num_labels * num_per_label
+
+    dim = 128
+    M = 32
+    efConstruction = 100
+    num_queries = 10
+
+    hnsw_index = create_hnsw_index(dim, num_elements, VecSimMetric_L2, VecSimType_FLOAT16, efConstruction, M,
+                                   is_multi=True)
+    k = 10
+    hnsw_index.set_ef(50)
+
+    data = vec_to_float16(np.random.random((num_labels, dim)))
+    vectors = []
+    for i, vector in enumerate(data):
+        for _ in range(num_per_label):
+            hnsw_index.add_vector(vector, i)
+            vectors.append((i, vector))
+
+    query_data = vec_to_float16(np.random.random((num_queries, dim)))
+    correct = 0
+    for target_vector in query_data:
+        hnswlib_labels, hnswlib_distances = hnsw_index.knn_query(target_vector, 10)
+        assert (len(hnswlib_labels[0]) == len(np.unique(hnswlib_labels[0])))
+
+        # sort distances of every vector from the target vector and get actual k nearest vectors
+        dists = {}
+        for key, vec in vectors:
+            # Setting or updating the score for each label.
+            # If it's the first time we calculate a score for a label dists.get(key, dist)
+            # will return dist so we will choose the actual score the first time.
+            dist = spatial.distance.sqeuclidean(target_vector, vec)
+            dists[key] = min(dist, dists.get(key, dist))
+
+        dists = list(dists.items())
+        dists = sorted(dists, key=lambda pair: pair[1])[:k]
+        keys = [key for key, _ in dists]
+
+        for i, label in enumerate(hnswlib_labels[0]):
+            for j, correct_label in enumerate(keys):
+                if label == correct_label:
+                    correct += 1
+                    assert math.isclose(hnswlib_distances[0][i], dists[j][1], rel_tol=1e-5)
+                    break
+
+    # Measure recall
+    recall = float(correct) / (k * num_queries)
+    print("\nrecall is: \n", recall)
+    assert (recall > 0.9)
diff --git a/tests/flow/test_hnsw_tiered.py b/tests/flow/test_hnsw_tiered.py
index 3cbd79c23..ffe762d0f 100644
--- a/tests/flow/test_hnsw_tiered.py
+++ b/tests/flow/test_hnsw_tiered.py
@@ -12,7 +12,7 @@ def create_tiered_hnsw_params(swap_job_threshold = 0):
     return tiered_hnsw_params
 
 class IndexCtx:
-    array_conversion_func = {VecSimType_FLOAT32: np.float32, VecSimType_BFLOAT16: vec_to_bfloat16}
+    array_conversion_func = {VecSimType_FLOAT32: np.float32, VecSimType_BFLOAT16: vec_to_bfloat16, VecSimType_FLOAT16: vec_to_float16}
     def __init__(self, data_size=10000,
                  dim=16,
                  M=16,
@@ -109,7 +109,7 @@ def generate_queries(self, num_queries):
         return queries
 
     def get_vectors_memory_size(self):
-        memory_size = {VecSimType_FLOAT32:4, VecSimType_FLOAT64:8, VecSimType_BFLOAT16:2}
+        memory_size = {VecSimType_FLOAT32:4, VecSimType_FLOAT64:8, VecSimType_BFLOAT16:2, VecSimType_FLOAT16:2}
         return bytes_to_mega(self.num_vectors * self.dim * memory_size[self.data_type])
 
 
@@ -230,6 +230,10 @@ def test_create_bf16():
     print("Test create multi label tiered hnsw index")
     create_tiered_index(is_multi=False, data_type=VecSimType_BFLOAT16)
 
+def test_create_fp16():
+    print("Test create multi label tiered hnsw index")
+    create_tiered_index(is_multi=False, data_type=VecSimType_FLOAT16)
+
 def test_search_insert():
     print(f"\nStart insert & search test")
     search_insert(is_multi=False)
@@ -238,6 +242,10 @@ def test_search_insert_bf16():
     print(f"\nStart insert & search test")
     search_insert(is_multi=False, data_type=VecSimType_BFLOAT16)
 
+def test_search_insert_fp16():
+    print(f"\nStart insert & search test")
+    search_insert(is_multi=False, data_type=VecSimType_FLOAT16)
+
 def test_search_insert_multi_index():
     print(f"\nStart insert & search test for multi index")
 

From 55ef50ffcb77a684d756f55e3da5a31ce600d502 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Wed, 8 May 2024 05:16:33 +0000
Subject: [PATCH 05/15] fp16 benchmarks

---
 tests/benchmark/benchmarks.sh                 | 10 ++-
 tests/benchmark/bm_definitions.h              |  2 +
 tests/benchmark/bm_files.sh                   |  6 ++
 .../bm_basics_initialize_fp16.h               | 74 +++++++++++++++++++
 .../bm_batch_initialize_fp16.h                | 37 ++++++++++
 tests/benchmark/bm_vecsim_index.h             |  7 ++
 .../run_files/bm_basics_multi_fp16.cpp        | 37 ++++++++++
 .../run_files/bm_basics_single_fp16.cpp       | 36 +++++++++
 .../bm_batch_iterator_multi_fp16.cpp          | 23 ++++++
 .../bm_batch_iterator_single_fp16.cpp         | 23 ++++++
 10 files changed, 254 insertions(+), 1 deletion(-)
 create mode 100644 tests/benchmark/bm_initialization/bm_basics_initialize_fp16.h
 create mode 100644 tests/benchmark/bm_initialization/bm_batch_initialize_fp16.h
 create mode 100644 tests/benchmark/run_files/bm_basics_multi_fp16.cpp
 create mode 100644 tests/benchmark/run_files/bm_basics_single_fp16.cpp
 create mode 100644 tests/benchmark/run_files/bm_batch_iterator_multi_fp16.cpp
 create mode 100644 tests/benchmark/run_files/bm_batch_iterator_single_fp16.cpp

diff --git a/tests/benchmark/benchmarks.sh b/tests/benchmark/benchmarks.sh
index b203dad43..b2f5e7b9b 100755
--- a/tests/benchmark/benchmarks.sh
+++ b/tests/benchmark/benchmarks.sh
@@ -3,7 +3,7 @@ BM_TYPE=$1;
 if [ -z "$BM_TYPE"  ] || [ "$BM_TYPE" = "benchmarks-all" ]; then
     for bm_class in basics batch_iterator; do
         for type in single multi; do
-            for data_type in fp32 fp64 bf16; do
+            for data_type in fp32 fp64 bf16 fp16; do
                 echo ${bm_class}_${type}_${data_type};
             done
         done
@@ -29,6 +29,10 @@ elif [ "$BM_TYPE" = "bm-basics-bf16-single" ] ; then
     echo basics_single_bf16
 elif [ "$BM_TYPE" = "bm-basics-bf16-multi" ] ; then
     echo basics_multi_bf16
+elif [ "$BM_TYPE" = "bm-basics-fp16-single" ] ; then
+    echo basics_single_fp16
+elif [ "$BM_TYPE" = "bm-basics-fp16-multi" ] ; then
+    echo basics_multi_fp16
 
 # Batch iterator benchmarks
 elif [ "$BM_TYPE" = "bm-batch-iter-fp32-single" ] ; then
@@ -43,6 +47,10 @@ elif [ "$BM_TYPE" = "bm-batch-iter-bf16-single" ] ; then
     echo batch_iterator_single_bf16
 elif [ "$BM_TYPE" = "bm-batch-iter-bf16-multi" ] ; then
     echo batch_iterator_multi_bf16
+elif [ "$BM_TYPE" = "bm-batch-iter-fp16-single" ] ; then
+    echo batch_iterator_single_fp16
+elif [ "$BM_TYPE" = "bm-batch-iter-fp16-multi" ] ; then
+    echo batch_iterator_multi_fp16
 
 # Updated index benchmarks
 elif [ "$BM_TYPE" = "bm-updated-fp32-single" ] ; then
diff --git a/tests/benchmark/bm_definitions.h b/tests/benchmark/bm_definitions.h
index 46d798156..94a552987 100644
--- a/tests/benchmark/bm_definitions.h
+++ b/tests/benchmark/bm_definitions.h
@@ -2,6 +2,7 @@
 
 #include "VecSim/vec_sim_common.h"
 #include "VecSim/types/bfloat16.h"
+#include "VecSim/types/float16.h"
 template <VecSimType type, typename DataType, typename DistType = DataType>
 struct IndexType {
 
@@ -14,6 +15,7 @@ struct IndexType {
 using fp32_index_t = IndexType<VecSimType_FLOAT32, float, float>;
 using fp64_index_t = IndexType<VecSimType_FLOAT64, double, double>;
 using bf16_index_t = IndexType<VecSimType_BFLOAT16, vecsim_types::bfloat16, float>;
+using fp16_index_t = IndexType<VecSimType_FLOAT16, vecsim_types::float16, float>;
 
 #define INDICES   BM_VecSimIndex<index_type_t>::indices
 #define QUERIES   BM_VecSimIndex<index_type_t>::queries
diff --git a/tests/benchmark/bm_files.sh b/tests/benchmark/bm_files.sh
index 4916630e6..f4dbb0982 100755
--- a/tests/benchmark/bm_files.sh
+++ b/tests/benchmark/bm_files.sh
@@ -22,6 +22,12 @@ elif [ "$BM_TYPE" = "bm-basics-bf16-single" ] \
 || [ "$BM_TYPE" = "bm-batch-iter-bf16-multi" ]
 then
     file_name="basic_bf16"
+elif [ "$BM_TYPE" = "bm-basics-fp16-single" ] \
+|| [ "$BM_TYPE" = "bm-basics-fp16-multi" ] \
+|| [ "$BM_TYPE" = "bm-batch-iter-fp16-single" ] \
+|| [ "$BM_TYPE" = "bm-batch-iter-fp16-multi" ]
+then
+    file_name="basic_fp16"
 elif [ "$BM_TYPE" = "bm-updated-fp32-single" ]; then
     file_name="updated"
 fi
diff --git a/tests/benchmark/bm_initialization/bm_basics_initialize_fp16.h b/tests/benchmark/bm_initialization/bm_basics_initialize_fp16.h
new file mode 100644
index 000000000..b548a5bd5
--- /dev/null
+++ b/tests/benchmark/bm_initialization/bm_basics_initialize_fp16.h
@@ -0,0 +1,74 @@
+#pragma once
+/**************************************
+  Define and register tests
+  NOTE: benchmarks' tests order can affect their results. Please add new benchmarks at the end of
+the file.
+***************************************/
+
+// Memory BF
+BENCHMARK_TEMPLATE_DEFINE_F(BM_VecSimCommon, BM_FUNC_NAME(Memory, FLAT), fp16_index_t)
+(benchmark::State &st) { Memory_FLAT(st); }
+BENCHMARK_REGISTER_F(BM_VecSimCommon, BM_FUNC_NAME(Memory, FLAT))->Iterations(1);
+
+// Memory HNSW
+BENCHMARK_TEMPLATE_DEFINE_F(BM_VecSimCommon, BM_FUNC_NAME(Memory, HNSW), fp16_index_t)
+(benchmark::State &st) { Memory_HNSW(st); }
+BENCHMARK_REGISTER_F(BM_VecSimCommon, BM_FUNC_NAME(Memory, HNSW))->Iterations(1);
+
+// Memory Tiered
+BENCHMARK_TEMPLATE_DEFINE_F(BM_VecSimCommon, BM_FUNC_NAME(Memory, Tiered), fp16_index_t)
+(benchmark::State &st) { Memory_Tiered(st); }
+BENCHMARK_REGISTER_F(BM_VecSimCommon, BM_FUNC_NAME(Memory, Tiered))->Iterations(1);
+
+// AddLabel
+BENCHMARK_TEMPLATE_DEFINE_F(BM_VecSimBasics, BM_ADD_LABEL, fp16_index_t)
+(benchmark::State &st) { AddLabel(st); }
+REGISTER_AddLabel(BM_ADD_LABEL, VecSimAlgo_BF);
+REGISTER_AddLabel(BM_ADD_LABEL, VecSimAlgo_HNSWLIB);
+
+// DeleteLabel Registration. Definition is placed in the .cpp file.
+REGISTER_DeleteLabel(BM_FUNC_NAME(DeleteLabel, BF));
+REGISTER_DeleteLabel(BM_FUNC_NAME(DeleteLabel, HNSW));
+
+// TopK BF
+BENCHMARK_TEMPLATE_DEFINE_F(BM_VecSimCommon, BM_FUNC_NAME(TopK, BF), fp16_index_t)
+(benchmark::State &st) { TopK_BF(st); }
+REGISTER_TopK_BF(BM_VecSimCommon, BM_FUNC_NAME(TopK, BF));
+
+// TopK HNSW
+BENCHMARK_TEMPLATE_DEFINE_F(BM_VecSimCommon, BM_FUNC_NAME(TopK, HNSW), fp16_index_t)
+(benchmark::State &st) { TopK_HNSW(st); }
+REGISTER_TopK_HNSW(BM_VecSimCommon, BM_FUNC_NAME(TopK, HNSW));
+
+// TopK Tiered HNSW
+BENCHMARK_TEMPLATE_DEFINE_F(BM_VecSimCommon, BM_FUNC_NAME(TopK, Tiered), fp16_index_t)
+(benchmark::State &st) { TopK_Tiered(st); }
+REGISTER_TopK_Tiered(BM_VecSimCommon, BM_FUNC_NAME(TopK, Tiered));
+
+// Range BF
+BENCHMARK_TEMPLATE_DEFINE_F(BM_VecSimBasics, BM_FUNC_NAME(Range, BF), fp16_index_t)
+(benchmark::State &st) { Range_BF(st); }
+REGISTER_Range_BF(BM_FUNC_NAME(Range, BF));
+
+// Range HNSW
+BENCHMARK_TEMPLATE_DEFINE_F(BM_VecSimBasics, BM_FUNC_NAME(Range, HNSW), fp16_index_t)
+(benchmark::State &st) { Range_HNSW(st); }
+REGISTER_Range_HNSW(BM_FUNC_NAME(Range, HNSW));
+
+// Tiered HNSW add/delete benchmarks
+REGISTER_AddLabel(BM_ADD_LABEL, VecSimAlgo_TIERED);
+REGISTER_DeleteLabel(BM_FUNC_NAME(DeleteLabel, Tiered));
+
+BENCHMARK_TEMPLATE_DEFINE_F(BM_VecSimBasics, BM_ADD_LABEL_ASYNC, fp16_index_t)
+(benchmark::State &st) { AddLabel_AsyncIngest(st); }
+BENCHMARK_REGISTER_F(BM_VecSimBasics, BM_ADD_LABEL_ASYNC)
+    ->UNIT_AND_ITERATIONS->Arg(VecSimAlgo_TIERED)
+    ->ArgName("VecSimAlgo_TIERED");
+
+BENCHMARK_TEMPLATE_DEFINE_F(BM_VecSimBasics, BM_DELETE_LABEL_ASYNC, fp16_index_t)
+(benchmark::State &st) { DeleteLabel_AsyncRepair(st); }
+BENCHMARK_REGISTER_F(BM_VecSimBasics, BM_DELETE_LABEL_ASYNC)
+    ->UNIT_AND_ITERATIONS->Arg(1)
+    ->Arg(100)
+    ->Arg(BM_VecSimGeneral::block_size)
+    ->ArgName("SwapJobsThreshold");
diff --git a/tests/benchmark/bm_initialization/bm_batch_initialize_fp16.h b/tests/benchmark/bm_initialization/bm_batch_initialize_fp16.h
new file mode 100644
index 000000000..677a84259
--- /dev/null
+++ b/tests/benchmark/bm_initialization/bm_batch_initialize_fp16.h
@@ -0,0 +1,37 @@
+#pragma once
+
+/**************************************
+  Define and register benchmarks for batch iterator with index of data type fp64
+  NOTE: benchmarks' tests order can affect their results. Please add new benchmarks at the end of
+the file.
+***************************************/
+
+// Fixed size batch BF
+BENCHMARK_TEMPLATE_DEFINE_F(BM_BatchIterator, BM_FUNC_NAME(BF, FixedBatchSize), fp16_index_t)
+(benchmark::State &st) { BF_FixedBatchSize(st); }
+REGISTER_FixedBatchSize(BM_FUNC_NAME(BF, FixedBatchSize));
+
+// Variable size batch BF
+BENCHMARK_TEMPLATE_DEFINE_F(BM_BatchIterator, BM_FUNC_NAME(BF, VariableBatchSize), fp16_index_t)
+(benchmark::State &st) { BF_VariableBatchSize(st); }
+REGISTER_VariableBatchSize(BM_FUNC_NAME(BF, VariableBatchSize));
+
+// Batches to hadoc BF
+BENCHMARK_TEMPLATE_DEFINE_F(BM_BatchIterator, BM_FUNC_NAME(BF, BatchesToAdhocBF), fp16_index_t)
+(benchmark::State &st) { BF_BatchesToAdhocBF(st); }
+REGISTER_BatchesToAdhocBF(BM_FUNC_NAME(BF, BatchesToAdhocBF));
+
+// Fixed size batch HNSW
+BENCHMARK_TEMPLATE_DEFINE_F(BM_BatchIterator, BM_FUNC_NAME(HNSW, FixedBatchSize), fp16_index_t)
+(benchmark::State &st) { HNSW_FixedBatchSize(st); }
+REGISTER_FixedBatchSize(BM_FUNC_NAME(HNSW, FixedBatchSize));
+
+// Variable size batch BF
+BENCHMARK_TEMPLATE_DEFINE_F(BM_BatchIterator, BM_FUNC_NAME(HNSW, VariableBatchSize), fp16_index_t)
+(benchmark::State &st) { HNSW_VariableBatchSize(st); }
+REGISTER_VariableBatchSize(BM_FUNC_NAME(HNSW, VariableBatchSize));
+
+// Batches to hadoc HSNW
+BENCHMARK_TEMPLATE_DEFINE_F(BM_BatchIterator, BM_FUNC_NAME(HNSW, BatchesToAdhocBF), fp16_index_t)
+(benchmark::State &st) { HNSW_BatchesToAdhocBF(st); }
+REGISTER_HNSW_BatchesToAdhocBF(BM_FUNC_NAME(HNSW, BatchesToAdhocBF));
diff --git a/tests/benchmark/bm_vecsim_index.h b/tests/benchmark/bm_vecsim_index.h
index 94e21692b..e2e939f80 100644
--- a/tests/benchmark/bm_vecsim_index.h
+++ b/tests/benchmark/bm_vecsim_index.h
@@ -3,6 +3,7 @@
 #include "bm_vecsim_general.h"
 #include "VecSim/index_factories/tiered_factory.h"
 #include "VecSim/types/bfloat16.h"
+#include "VecSim/types/float16.h"
 
 template <typename index_type_t>
 class BM_VecSimIndex : public BM_VecSimGeneral {
@@ -47,6 +48,9 @@ std::vector<std::vector<double>> BM_VecSimIndex<fp64_index_t>::queries{};
 template <>
 std::vector<std::vector<vecsim_types::bfloat16>> BM_VecSimIndex<bf16_index_t>::queries{};
 
+template <>
+std::vector<std::vector<vecsim_types::float16>> BM_VecSimIndex<fp16_index_t>::queries{};
+
 template <>
 std::vector<VecSimIndex *> BM_VecSimIndex<fp32_index_t>::indices{};
 
@@ -56,6 +60,9 @@ std::vector<VecSimIndex *> BM_VecSimIndex<fp64_index_t>::indices{};
 template <>
 std::vector<VecSimIndex *> BM_VecSimIndex<bf16_index_t>::indices{};
 
+template <>
+std::vector<VecSimIndex *> BM_VecSimIndex<fp16_index_t>::indices{};
+
 template <typename index_type_t>
 BM_VecSimIndex<index_type_t>::~BM_VecSimIndex() {
     ref_count--;
diff --git a/tests/benchmark/run_files/bm_basics_multi_fp16.cpp b/tests/benchmark/run_files/bm_basics_multi_fp16.cpp
new file mode 100644
index 000000000..04d0d04df
--- /dev/null
+++ b/tests/benchmark/run_files/bm_basics_multi_fp16.cpp
@@ -0,0 +1,37 @@
+#include "benchmark/bm_vecsim_basics.h"
+#include "VecSim/algorithms/brute_force/brute_force_multi.h"
+#include "VecSim/algorithms/hnsw/hnsw_multi.h"
+#include "VecSim/types/float16.h"
+
+/**************************************
+  Basic tests for multi value index.
+***************************************/
+
+bool BM_VecSimGeneral::is_multi = true;
+
+size_t BM_VecSimGeneral::n_queries = 10000;
+size_t BM_VecSimGeneral::n_vectors = 1111025;
+size_t BM_VecSimGeneral::dim = 512;
+size_t BM_VecSimGeneral::M = 64;
+size_t BM_VecSimGeneral::EF_C = 512;
+tieredIndexMock BM_VecSimGeneral::mock_thread_pool{};
+
+const char *BM_VecSimGeneral::hnsw_index_file =
+    "tests/benchmark/data/fashion_images_multi_value-cosine-dim512-M64-efc512-fp16.hnsw_v3";
+const char *BM_VecSimGeneral::test_queries_file =
+    "tests/benchmark/data/fashion_images_multi_value-cosine-dim512-fp16-test_vectors.raw";
+
+#define BM_FUNC_NAME(bm_func, algo) bm_func##_##algo##_Multi
+#define BM_ADD_LABEL                AddLabel_Multi
+#define BM_ADD_LABEL_ASYNC          AddLabel_Async_Multi
+#define BM_DELETE_LABEL_ASYNC       DeleteLabel_Async_Multi
+
+DEFINE_DELETE_LABEL(BM_FUNC_NAME(DeleteLabel, BF), fp16_index_t, BruteForceIndex_Multi,
+                    vecsim_types::float16, float, VecSimAlgo_BF)
+DEFINE_DELETE_LABEL(BM_FUNC_NAME(DeleteLabel, HNSW), fp16_index_t, HNSWIndex_Multi,
+                    vecsim_types::float16, float, VecSimAlgo_HNSWLIB)
+DEFINE_DELETE_LABEL(BM_FUNC_NAME(DeleteLabel, Tiered), fp16_index_t, TieredHNSWIndex,
+                    vecsim_types::float16, float, VecSimAlgo_TIERED)
+#include "benchmark/bm_initialization/bm_basics_initialize_fp16.h"
+
+BENCHMARK_MAIN();
diff --git a/tests/benchmark/run_files/bm_basics_single_fp16.cpp b/tests/benchmark/run_files/bm_basics_single_fp16.cpp
new file mode 100644
index 000000000..0a1948b84
--- /dev/null
+++ b/tests/benchmark/run_files/bm_basics_single_fp16.cpp
@@ -0,0 +1,36 @@
+#include "benchmark/bm_vecsim_basics.h"
+#include "VecSim/algorithms/brute_force/brute_force_single.h"
+#include "VecSim/algorithms/hnsw/hnsw_single.h"
+#include "VecSim/types/float16.h"
+
+/**************************************
+  Basic tests for single value index with bf16 data type.
+***************************************/
+
+bool BM_VecSimGeneral::is_multi = false;
+
+size_t BM_VecSimGeneral::n_queries = 10000;
+size_t BM_VecSimGeneral::n_vectors = 1000000;
+size_t BM_VecSimGeneral::dim = 768;
+size_t BM_VecSimGeneral::M = 64;
+size_t BM_VecSimGeneral::EF_C = 512;
+tieredIndexMock BM_VecSimGeneral::mock_thread_pool{};
+
+const char *BM_VecSimGeneral::hnsw_index_file =
+    "tests/benchmark/data/dbpedia-cosine-dim768-M64-efc512-fp16.hnsw_v3";
+const char *BM_VecSimGeneral::test_queries_file =
+    "tests/benchmark/data/dbpedia-cosine-dim768-fp16-test_vectors.raw";
+
+#define BM_FUNC_NAME(bm_func, algo) bm_func##_##algo##_Single
+#define BM_ADD_LABEL                AddLabel_Single
+#define BM_ADD_LABEL_ASYNC          AddLabel_Async_Single
+#define BM_DELETE_LABEL_ASYNC       DeleteLabel_Async_Single
+
+DEFINE_DELETE_LABEL(BM_FUNC_NAME(DeleteLabel, BF), fp16_index_t, BruteForceIndex_Single,
+                    vecsim_types::float16, float, VecSimAlgo_BF)
+DEFINE_DELETE_LABEL(BM_FUNC_NAME(DeleteLabel, HNSW), fp16_index_t, HNSWIndex_Single,
+                    vecsim_types::float16, float, VecSimAlgo_HNSWLIB)
+DEFINE_DELETE_LABEL(BM_FUNC_NAME(DeleteLabel, Tiered), fp16_index_t, TieredHNSWIndex,
+                    vecsim_types::float16, float, VecSimAlgo_TIERED)
+#include "benchmark/bm_initialization/bm_basics_initialize_fp16.h"
+BENCHMARK_MAIN();
diff --git a/tests/benchmark/run_files/bm_batch_iterator_multi_fp16.cpp b/tests/benchmark/run_files/bm_batch_iterator_multi_fp16.cpp
new file mode 100644
index 000000000..1493cb897
--- /dev/null
+++ b/tests/benchmark/run_files/bm_batch_iterator_multi_fp16.cpp
@@ -0,0 +1,23 @@
+#include "benchmark/bm_batch_iterator.h"
+
+bool BM_VecSimGeneral::is_multi = true;
+
+// Global benchmark data
+size_t BM_VecSimGeneral::n_vectors = 1111025;
+size_t BM_VecSimGeneral::n_queries = 10000;
+size_t BM_VecSimGeneral::dim = 512;
+size_t BM_VecSimGeneral::M = 64;
+size_t BM_VecSimGeneral::EF_C = 512;
+size_t BM_VecSimGeneral::block_size = 1024;
+tieredIndexMock BM_VecSimGeneral::mock_thread_pool{};
+
+const char *BM_VecSimGeneral::hnsw_index_file =
+    "tests/benchmark/data/fashion_images_multi_value-cosine-dim512-M64-efc512-fp16.hnsw_v3";
+const char *BM_VecSimGeneral::test_queries_file =
+    "tests/benchmark/data/fashion_images_multi_value-cosine-dim512-fp16-test_vectors.raw";
+
+#define BM_FUNC_NAME(bm_func, algo) algo##_##bm_func##_Multi
+
+#include "benchmark/bm_initialization/bm_batch_initialize_fp16.h"
+
+BENCHMARK_MAIN();
diff --git a/tests/benchmark/run_files/bm_batch_iterator_single_fp16.cpp b/tests/benchmark/run_files/bm_batch_iterator_single_fp16.cpp
new file mode 100644
index 000000000..1e411a142
--- /dev/null
+++ b/tests/benchmark/run_files/bm_batch_iterator_single_fp16.cpp
@@ -0,0 +1,23 @@
+#include "benchmark/bm_batch_iterator.h"
+
+bool BM_VecSimGeneral::is_multi = false;
+
+// Global benchmark data
+size_t BM_VecSimGeneral::n_vectors = 1000000;
+size_t BM_VecSimGeneral::n_queries = 10000;
+size_t BM_VecSimGeneral::dim = 768;
+size_t BM_VecSimGeneral::M = 64;
+size_t BM_VecSimGeneral::EF_C = 512;
+size_t BM_VecSimGeneral::block_size = 1024;
+tieredIndexMock BM_VecSimGeneral::mock_thread_pool{};
+
+const char *BM_VecSimGeneral::hnsw_index_file =
+    "tests/benchmark/data/dbpedia-cosine-dim768-M64-efc512-fp16.hnsw_v3";
+const char *BM_VecSimGeneral::test_queries_file =
+    "tests/benchmark/data/dbpedia-cosine-dim768-fp16-test_vectors.raw";
+
+#define BM_FUNC_NAME(bm_func, algo) algo##_##bm_func##_Single
+
+#include "benchmark/bm_initialization/bm_batch_initialize_fp16.h"
+
+BENCHMARK_MAIN();

From 4eb705641b7e841fea9fae5d61bda8a62f2031c4 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Wed, 8 May 2024 05:26:05 +0000
Subject: [PATCH 06/15] fix hnsw flow test

---
 tests/flow/test_hnsw.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/flow/test_hnsw.py b/tests/flow/test_hnsw.py
index c2ad519e7..9370a651d 100644
--- a/tests/flow/test_hnsw.py
+++ b/tests/flow/test_hnsw.py
@@ -853,7 +853,7 @@ def test_hnsw_float16_multi_value():
             for j, correct_label in enumerate(keys):
                 if label == correct_label:
                     correct += 1
-                    assert math.isclose(hnswlib_distances[0][i], dists[j][1], rel_tol=1e-5)
+                    assert math.isclose(np.float16(hnswlib_distances[0][i]), dists[j][1], rel_tol=1e-2)
                     break
 
     # Measure recall

From 29bbd7ec1cf8fdbed2e61d5de55e2190c1686f3f Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Wed, 8 May 2024 10:59:09 +0000
Subject: [PATCH 07/15] add float616 to serializing script

add fp16 files to bm files

decrease testSizeEstimation dim size to make it run fasetr in debug
---
 .../hnsw_indices/hnsw_indices_basic_fp16.txt  |  5 ++++
 tests/benchmark/data/serializer.py            | 23 ++++++++++++++++++-
 tests/unit/test_bf16.cpp                      | 10 ++++----
 tests/unit/test_fp16.cpp                      | 10 ++++----
 tests/unit/test_hnsw.cpp                      |  5 ++--
 5 files changed, 40 insertions(+), 13 deletions(-)
 create mode 100644 tests/benchmark/data/hnsw_indices/hnsw_indices_basic_fp16.txt

diff --git a/tests/benchmark/data/hnsw_indices/hnsw_indices_basic_fp16.txt b/tests/benchmark/data/hnsw_indices/hnsw_indices_basic_fp16.txt
new file mode 100644
index 000000000..d52fd1635
--- /dev/null
+++ b/tests/benchmark/data/hnsw_indices/hnsw_indices_basic_fp16.txt
@@ -0,0 +1,5 @@
+https://dev.cto.redis.s3.amazonaws.com/VectorSimilarity/dbpedia-cosine-dim768-M64-efc512-fp16.hnsw_v3
+https://dev.cto.redis.s3.amazonaws.com/VectorSimilarity/dbpedia-cosine-dim768-fp16-test_vectors.raw
+
+https://dev.cto.redis.s3.amazonaws.com/VectorSimilarity/fashion_images_multi_value-cosine-dim512-M64-efc512-fp16.hnsw_v3
+https://dev.cto.redis.s3.amazonaws.com/VectorSimilarity/fashion_images_multi_value-cosine-dim512-fp16-test_vectors.raw
diff --git a/tests/benchmark/data/serializer.py b/tests/benchmark/data/serializer.py
index cd8d8ed1a..3c0ca1b2a 100644
--- a/tests/benchmark/data/serializer.py
+++ b/tests/benchmark/data/serializer.py
@@ -58,6 +58,13 @@
         'type': VecSimType_BFLOAT16,
         'metric': VecSimMetric_Cosine,
     },
+    {
+        'filename': 'dbpedia-768',
+        'nickname': 'dbpedia',
+        'dim': 768,
+        'type': VecSimType_FLOAT16,
+        'metric': VecSimMetric_Cosine,
+    },
     {
         'filename': 'fashion_images_multi_value',
         'metric': VecSimMetric_Cosine,
@@ -75,6 +82,12 @@
         'type': VecSimType_BFLOAT16,
         'multi': True,
     },
+    {
+        'filename': 'fashion_images_multi_value',
+        'metric': VecSimMetric_Cosine,
+        'type': VecSimType_FLOAT16,
+        'multi': True,
+    },
     {
         'filename': 'glove-25-angular',
         'nickname': 'glove',
@@ -128,7 +141,8 @@
 TYPES_ATTR = {
     VecSimType_FLOAT32: {"size_in_bytes": 4, "vector_type": np.float32},
     VecSimType_FLOAT64: {"size_in_bytes": 8, "vector_type": np.float64},
-    VecSimType_BFLOAT16: {"size_in_bytes": 2, "vector_type": bfloat16}
+    VecSimType_BFLOAT16: {"size_in_bytes": 2, "vector_type": bfloat16},
+    VecSimType_FLOAT16: {"size_in_bytes": 2, "vector_type": np.float16}
 }
 
 
@@ -175,6 +189,9 @@ def serialize(files=DEFAULT_FILES):
             elif hnswparams.type == VecSimType_BFLOAT16:
                 serialized_file_name = serialized_file_name + '-bf16'
                 serialized_raw_name = serialized_raw_name + '-bf16'
+            elif hnswparams.type == VecSimType_FLOAT16:
+                serialized_file_name = serialized_file_name + '-fp16'
+                serialized_raw_name = serialized_raw_name + '-fp16
 
             print('first, exporting test set to binary')
             if not file.get('skipRaw', False):
@@ -184,6 +201,8 @@ def serialize(files=DEFAULT_FILES):
                 elif hnswparams.type == VecSimType_BFLOAT16:
                     test_set = test[:]
                     test = np.array(test_set, dtype=bfloat16)
+                elif hnswparams.type == VecSimType_FLOAT16:
+                    test = test.astype(np.float16)
                 print(f"creating test set of {len(test)} vectors")
                 with open(os.path.join(location, serialized_raw_name + '-test_vectors.raw'), 'wb') as testfile:
                     for vec in test:
@@ -203,6 +222,8 @@ def serialize(files=DEFAULT_FILES):
             elif hnswparams.type == VecSimType_BFLOAT16:
                 data_set = data[:]
                 data = np.array(data_set, dtype=bfloat16)
+            elif hnswparams.type == VecSimType_FLOAT16:
+                data = data.astype(np.float16)
             print(f"creating index with {hnswparams.initialCapacity} vectors")
             for label, cur in enumerate(data):
                 for vec in cur if hnswparams.multi else [cur]:
diff --git a/tests/unit/test_bf16.cpp b/tests/unit/test_bf16.cpp
index 4ae352f00..260722645 100644
--- a/tests/unit/test_bf16.cpp
+++ b/tests/unit/test_bf16.cpp
@@ -248,7 +248,7 @@ TEST_F(BF16HNSWTest, testSizeEstimation) {
     // Initial capacity is rounded up to the block size.
     size_t extra_cap = n % bs == 0 ? 0 : bs - n % bs;
 
-    HNSWParams params = {.dim = 256, .initialCapacity = n, .blockSize = bs, .M = M};
+    HNSWParams params = {.dim = 4, .initialCapacity = n, .blockSize = bs, .M = M};
     SetUp(params);
 
     // EstimateInitialSize is called after CreateNewIndex because params struct is
@@ -287,7 +287,7 @@ TEST_F(BF16HNSWTest, testSizeEstimation) {
 }
 
 TEST_F(BF16HNSWTest, testSizeEstimation_No_InitialCapacity) {
-    size_t dim = 128;
+    size_t dim = 4;
     size_t n = 0;
     size_t bs = DEFAULT_BLOCK_SIZE;
 
@@ -309,7 +309,7 @@ TEST_F(BF16HNSWTest, testSizeEstimation_No_InitialCapacity) {
 }
 
 TEST_F(BF16BruteForceTest, testSizeEstimation) {
-    size_t dim = 128;
+    size_t dim = 4;
     size_t n = 0;
     size_t bs = DEFAULT_BLOCK_SIZE;
 
@@ -333,7 +333,7 @@ TEST_F(BF16BruteForceTest, testSizeEstimation) {
 }
 
 TEST_F(BF16BruteForceTest, testSizeEstimation_No_InitialCapacity) {
-    size_t dim = 128;
+    size_t dim = 4;
     size_t n = 100;
     size_t bs = DEFAULT_BLOCK_SIZE;
 
@@ -354,7 +354,7 @@ TEST_F(BF16TieredTest, testSizeEstimation) {
     size_t M = 32;
     size_t bs = DEFAULT_BLOCK_SIZE;
 
-    HNSWParams hnsw_params = {.dim = 128, .initialCapacity = n, .M = M};
+    HNSWParams hnsw_params = {.dim = 4, .initialCapacity = n, .M = M};
     SetUp(hnsw_params);
     TieredIndexParams tiered_params = generate_tiered_params(hnsw_params);
     VecSimParams params = CreateParams(tiered_params);
diff --git a/tests/unit/test_fp16.cpp b/tests/unit/test_fp16.cpp
index e730c51a2..0a51e3321 100644
--- a/tests/unit/test_fp16.cpp
+++ b/tests/unit/test_fp16.cpp
@@ -246,7 +246,7 @@ TEST_F(FP16HNSWTest, testSizeEstimation) {
     // Initial capacity is rounded up to the block size.
     size_t extra_cap = n % bs == 0 ? 0 : bs - n % bs;
 
-    HNSWParams params = {.dim = 256, .initialCapacity = n, .blockSize = bs, .M = M};
+    HNSWParams params = {.dim = 4, .initialCapacity = n, .blockSize = bs, .M = M};
     SetUp(params);
 
     // EstimateInitialSize is called after CreateNewIndex because params struct is
@@ -284,7 +284,7 @@ TEST_F(FP16HNSWTest, testSizeEstimation) {
 }
 
 TEST_F(FP16HNSWTest, testSizeEstimation_No_InitialCapacity) {
-    size_t dim = 128;
+    size_t dim = 4;
     size_t n = 0;
     size_t bs = DEFAULT_BLOCK_SIZE;
 
@@ -306,7 +306,7 @@ TEST_F(FP16HNSWTest, testSizeEstimation_No_InitialCapacity) {
 }
 
 TEST_F(FP16BruteForceTest, testSizeEstimation) {
-    size_t dim = 128;
+    size_t dim = 4;
     size_t n = 0;
     size_t bs = DEFAULT_BLOCK_SIZE;
 
@@ -330,7 +330,7 @@ TEST_F(FP16BruteForceTest, testSizeEstimation) {
 }
 
 TEST_F(FP16BruteForceTest, testSizeEstimation_No_InitialCapacity) {
-    size_t dim = 128;
+    size_t dim = 4;
     size_t n = 100;
     size_t bs = DEFAULT_BLOCK_SIZE;
 
@@ -351,7 +351,7 @@ TEST_F(FP16TieredTest, testSizeEstimation) {
     size_t M = 32;
     size_t bs = DEFAULT_BLOCK_SIZE;
 
-    HNSWParams hnsw_params = {.dim = 128, .initialCapacity = n, .M = M};
+    HNSWParams hnsw_params = {.dim = 4, .initialCapacity = n, .M = M};
     SetUp(hnsw_params);
     TieredIndexParams tiered_params = generate_tiered_params(hnsw_params);
     VecSimParams params = CreateParams(tiered_params);
diff --git a/tests/unit/test_hnsw.cpp b/tests/unit/test_hnsw.cpp
index c31f0e6f6..f687a568f 100644
--- a/tests/unit/test_hnsw.cpp
+++ b/tests/unit/test_hnsw.cpp
@@ -1582,7 +1582,7 @@ TYPED_TEST(HNSWTest, testCosine) {
 }
 
 TYPED_TEST(HNSWTest, testSizeEstimation) {
-    size_t dim = 256;
+    size_t dim = 4;
     size_t n = 200;
     size_t bs = 256;
     size_t M = 64;
@@ -1611,6 +1611,7 @@ TYPED_TEST(HNSWTest, testSizeEstimation) {
     for (size_t i = 0; i < n; i++) {
         GenerateAndAddVector<TEST_DATA_T>(index, dim, i);
     }
+
     idType cur = n;
     while (index->indexSize() % bs != 0) {
         GenerateAndAddVector<TEST_DATA_T>(index, dim, cur++);
@@ -1630,7 +1631,7 @@ TYPED_TEST(HNSWTest, testSizeEstimation) {
 }
 
 TYPED_TEST(HNSWTest, testInitialSizeEstimation_No_InitialCapacity) {
-    size_t dim = 128;
+    size_t dim = 4;
     size_t n = 0;
     size_t bs = DEFAULT_BLOCK_SIZE;
 

From 25253718aa10d396e5823b938a1ae6b24b556ea1 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Thu, 9 May 2024 04:57:29 +0000
Subject: [PATCH 08/15] set log call back to null in benchmarks

---
 tests/benchmark/bm_vecsim_index.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/benchmark/bm_vecsim_index.h b/tests/benchmark/bm_vecsim_index.h
index e2e939f80..72054a083 100644
--- a/tests/benchmark/bm_vecsim_index.h
+++ b/tests/benchmark/bm_vecsim_index.h
@@ -135,6 +135,8 @@ void BM_VecSimIndex<index_type_t>::Initialize() {
 
     // Load the test query vectors form file. Index file path is relative to repository root dir.
     loadTestVectors(AttachRootPath(test_queries_file), type);
+
+    VecSim_SetLogCallbackFunction(nullptr);
 }
 
 template <typename index_type_t>

From ac17793ef225b5ad1319a26e64c03882a163b311 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Thu, 9 May 2024 09:17:09 +0000
Subject: [PATCH 09/15] remove struct typedef add default ctor

---
 .github/workflows/flow-temp.yml | 7 +++----
 src/VecSim/types/bfloat16.h     | 4 ++--
 src/VecSim/types/float16.h      | 4 ++--
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/flow-temp.yml b/.github/workflows/flow-temp.yml
index 3d002b398..2306bc193 100644
--- a/.github/workflows/flow-temp.yml
+++ b/.github/workflows/flow-temp.yml
@@ -14,9 +14,8 @@ on:
 jobs:
   check-if-docs-only:
     uses: ./.github/workflows/task-check-docs.yml
-  bionic:
-    needs: [check-if-docs-only]
-    if: ${{ needs.check-if-docs-only.outputs.only-docs-changed == 'false' }}
+  macos:
     uses: ./.github/workflows/task-unit-test.yml
     with:
-      container: ubuntu:bionic
+      env: macos-latest
+      run-valgrind: false
diff --git a/src/VecSim/types/bfloat16.h b/src/VecSim/types/bfloat16.h
index 8bda8e885..899f6cdae 100644
--- a/src/VecSim/types/bfloat16.h
+++ b/src/VecSim/types/bfloat16.h
@@ -13,10 +13,10 @@
 namespace vecsim_types {
 struct bfloat16 {
     uint16_t val;
-    constexpr bfloat16(uint16_t val = 0) : val(val) {}
+    constexpr bfloat16() = default;
+    constexpr bfloat16(uint16_t val) : val(val) {}
     operator uint16_t() const { return val; }
 };
-using bfloat16 = struct bfloat16;
 
 static inline bfloat16 float_to_bf16(const float ff) {
     uint32_t *p_f32 = (uint32_t *)&ff;
diff --git a/src/VecSim/types/float16.h b/src/VecSim/types/float16.h
index 0e80e84fc..1a34c39dd 100644
--- a/src/VecSim/types/float16.h
+++ b/src/VecSim/types/float16.h
@@ -12,10 +12,10 @@
 namespace vecsim_types {
 struct float16 {
     uint16_t val;
-    constexpr float16(uint16_t val = 0) : val(val) {}
+    float16() = default;
+    constexpr float16(uint16_t val) : val(val) {}
     operator uint16_t() const { return val; }
 };
-using float16 = struct float16;
 
 inline float _interpret_as_float(uint32_t num) {
     void *num_ptr = &num;

From 775c41c7b90b0f245dcab6b3780d0f3ab91f21c6 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Fri, 10 May 2024 04:37:27 +0000
Subject: [PATCH 10/15] add comma seriaizer file

---
 tests/benchmark/data/serializer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/benchmark/data/serializer.py b/tests/benchmark/data/serializer.py
index 3c0ca1b2a..04f2ecd22 100644
--- a/tests/benchmark/data/serializer.py
+++ b/tests/benchmark/data/serializer.py
@@ -142,7 +142,7 @@
     VecSimType_FLOAT32: {"size_in_bytes": 4, "vector_type": np.float32},
     VecSimType_FLOAT64: {"size_in_bytes": 8, "vector_type": np.float64},
     VecSimType_BFLOAT16: {"size_in_bytes": 2, "vector_type": bfloat16},
-    VecSimType_FLOAT16: {"size_in_bytes": 2, "vector_type": np.float16}
+    VecSimType_FLOAT16: {"size_in_bytes": 2, "vector_type": np.float16},
 }
 
 

From f94d73166738d10560d7d80916f565470a403ca6 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Fri, 10 May 2024 04:40:56 +0000
Subject: [PATCH 11/15] disable temp flow

---
 .github/workflows/flow-temp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/flow-temp b/.github/workflows/flow-temp
index 83e53fd1f..2c8f48bc9 100644
--- a/.github/workflows/flow-temp
+++ b/.github/workflows/flow-temp
@@ -9,7 +9,7 @@ name: temporary testing
 
 on:
   push:
-    # branches-ignore: ['**'] # ignore all branches. Comment this line to run your workflow below on every push.
+    branches-ignore: ['**'] # ignore all branches. Comment this line to run your workflow below on every push.
 
 jobs:
   merge-queue-flow:

From db2521ded6ba844f23552803432f2545444f4278 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Fri, 10 May 2024 04:42:22 +0000
Subject: [PATCH 12/15] disabe for real

---
 .github/workflows/flow-temp     | 16 ----------------
 .github/workflows/flow-temp.yml |  2 +-
 2 files changed, 1 insertion(+), 17 deletions(-)
 delete mode 100644 .github/workflows/flow-temp

diff --git a/.github/workflows/flow-temp b/.github/workflows/flow-temp
deleted file mode 100644
index 2c8f48bc9..000000000
--- a/.github/workflows/flow-temp
+++ /dev/null
@@ -1,16 +0,0 @@
-name: temporary testing
-
-# This file is useful for triggering actions when you implement them.
-# When the `branches-ignore` line is commented out, this workflow will run on every push.
-# It is better to use this file for testing your new flows than creating a new one, to avoid cluttering the repo
-# action tab with unused workflows.
-# Don't worry about conflicts with other PRs - there is no "right" content of this file.
-# Make sure the `branches-ignore` line is not commented out when you merge your PR.
-
-on:
-  push:
-    branches-ignore: ['**'] # ignore all branches. Comment this line to run your workflow below on every push.
-
-jobs:
-  merge-queue-flow:
-    uses: ./.github/workflows/event-merge-to-queue.yml
diff --git a/.github/workflows/flow-temp.yml b/.github/workflows/flow-temp.yml
index 2306bc193..3688625a8 100644
--- a/.github/workflows/flow-temp.yml
+++ b/.github/workflows/flow-temp.yml
@@ -9,7 +9,7 @@ name: temporary testing
 
 on:
   push:
-    # branches-ignore: ['**'] # ignore all branches. Comment this line to run your workflow below on every push.
+    branches-ignore: ['**'] # ignore all branches. Comment this line to run your workflow below on every push.
 
 jobs:
   check-if-docs-only:

From 0edd29d8c2d89be7983758ae5f6b3d1643be139d Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Sat, 11 May 2024 05:47:17 +0000
Subject: [PATCH 13/15] remove constexpr from bfloat default ctor

declaring a default ctor as constexpr is allowed as of c++20, and this feature in particular is supported as of gcc10

I don't see any benefit using this feature...
---
 src/VecSim/types/bfloat16.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/VecSim/types/bfloat16.h b/src/VecSim/types/bfloat16.h
index 899f6cdae..5537918f6 100644
--- a/src/VecSim/types/bfloat16.h
+++ b/src/VecSim/types/bfloat16.h
@@ -13,7 +13,7 @@
 namespace vecsim_types {
 struct bfloat16 {
     uint16_t val;
-    constexpr bfloat16() = default;
+    bfloat16() = default;
     constexpr bfloat16(uint16_t val) : val(val) {}
     operator uint16_t() const { return val; }
 };

From 4369584f8b316518047a61d027dfa712d165fee1 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Sat, 11 May 2024 05:48:03 +0000
Subject: [PATCH 14/15] test focal

---
 .github/workflows/flow-temp.yml | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/flow-temp.yml b/.github/workflows/flow-temp.yml
index 3688625a8..9f1a139c0 100644
--- a/.github/workflows/flow-temp.yml
+++ b/.github/workflows/flow-temp.yml
@@ -9,13 +9,14 @@ name: temporary testing
 
 on:
   push:
-    branches-ignore: ['**'] # ignore all branches. Comment this line to run your workflow below on every push.
+    # branches-ignore: ['**'] # ignore all branches. Comment this line to run your workflow below on every push.
 
 jobs:
   check-if-docs-only:
     uses: ./.github/workflows/task-check-docs.yml
-  macos:
+  focal:
+    needs: [check-if-docs-only]
+    if: ${{ needs.check-if-docs-only.outputs.only-docs-changed == 'false' }}
     uses: ./.github/workflows/task-unit-test.yml
     with:
-      env: macos-latest
-      run-valgrind: false
+      container: ubuntu:focal

From 29c1773bb51389f95e05d01b4a7a5935a4ffec36 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Sun, 12 May 2024 08:10:50 +0000
Subject: [PATCH 15/15] disable temp flow

---
 .github/workflows/flow-temp.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/flow-temp.yml b/.github/workflows/flow-temp.yml
index 9f1a139c0..741ff20fc 100644
--- a/.github/workflows/flow-temp.yml
+++ b/.github/workflows/flow-temp.yml
@@ -9,7 +9,7 @@ name: temporary testing
 
 on:
   push:
-    # branches-ignore: ['**'] # ignore all branches. Comment this line to run your workflow below on every push.
+    branches-ignore: ['**'] # ignore all branches. Comment this line to run your workflow below on every push.
 
 jobs:
   check-if-docs-only: