diff --git a/src/VecSim/algorithms/brute_force/brute_force.h b/src/VecSim/algorithms/brute_force/brute_force.h
index f52a06453..3aa6e528b 100644
--- a/src/VecSim/algorithms/brute_force/brute_force.h
+++ b/src/VecSim/algorithms/brute_force/brute_force.h
@@ -43,8 +43,8 @@ class BruteForceIndex : public VecSimIndexAbstract<DataType, DistType> {
     size_t indexSize() const override;
     size_t indexCapacity() const override;
     std::unique_ptr<RawDataContainer::Iterator> getVectorsIterator() const;
-    DataType *getDataByInternalId(idType id) const {
-        return (DataType *)this->vectors->getElement(id);
+    const DataType *getDataByInternalId(idType id) const {
+        return reinterpret_cast<const DataType *>(this->vectors->getElement(id));
     }
     VecSimQueryReply *topKQuery(const void *queryBlob, size_t k,
                                 VecSimQueryParams *queryParams) const override;
@@ -77,16 +77,6 @@ class BruteForceIndex : public VecSimIndexAbstract<DataType, DistType> {
 
     virtual ~BruteForceIndex() = default;
 #ifdef BUILD_TESTS
-    /**
-     * @brief Used for testing - store vector(s) data associated with a given label. This function
-     * copies the vector(s)' data buffer(s) and place it in the output vector
-     *
-     * @param label
-     * @param vectors_output empty vector to be modified, should store the blob(s) associated with
-     * the label.
-     */
-    virtual void getDataByLabel(labelType label,
-                                std::vector<std::vector<DataType>> &vectors_output) const = 0;
     void fitMemory() override {
         if (count == 0) {
             return;
@@ -351,12 +341,13 @@ template <typename DataType, typename DistType>
 VecSimBatchIterator *
 BruteForceIndex<DataType, DistType>::newBatchIterator(const void *queryBlob,
                                                       VecSimQueryParams *queryParams) const {
-    auto *queryBlobCopy =
-        this->allocator->allocate_aligned(this->dataSize, this->preprocessors->getAlignment());
-    memcpy(queryBlobCopy, queryBlob, this->dim * sizeof(DataType));
-    this->preprocessQueryInPlace(queryBlobCopy);
+    // force_copy == true.
+    auto queryBlobCopy = this->preprocessQuery(queryBlob, true);
+
+    // take ownership of the blob copy and pass it to the batch iterator.
+    auto *queryBlobCopyPtr = queryBlobCopy.release();
     // Ownership of queryBlobCopy moves to BF_BatchIterator that will free it at the end.
-    return newBatchIterator_Instance(queryBlobCopy, queryParams);
+    return newBatchIterator_Instance(queryBlobCopyPtr, queryParams);
 }
 
 template <typename DataType, typename DistType>
diff --git a/src/VecSim/algorithms/brute_force/brute_force_multi.h b/src/VecSim/algorithms/brute_force/brute_force_multi.h
index 7c257286d..9f89fbf5a 100644
--- a/src/VecSim/algorithms/brute_force/brute_force_multi.h
+++ b/src/VecSim/algorithms/brute_force/brute_force_multi.h
@@ -48,10 +48,30 @@ class BruteForceIndex_Multi : public BruteForceIndex<DataType, DistType> {
 
         for (idType id : ids->second) {
             auto vec = std::vector<DataType>(this->dim);
+            // Only copy the vector data (dim * sizeof(DataType)), not any additional metadata like
+            // the norm
             memcpy(vec.data(), this->getDataByInternalId(id), this->dim * sizeof(DataType));
             vectors_output.push_back(vec);
         }
     }
+
+    std::vector<std::vector<char>> getStoredVectorDataByLabel(labelType label) const override {
+        std::vector<std::vector<char>> vectors_output;
+        auto ids = labelToIdsLookup.find(label);
+
+        for (idType id : ids->second) {
+            // Get the data pointer - need to cast to char* for memcpy
+            const char *data = reinterpret_cast<const char *>(this->getDataByInternalId(id));
+
+            // Create a vector with the full data (including any metadata like norms)
+            std::vector<char> vec(this->getDataSize());
+            memcpy(vec.data(), data, this->getDataSize());
+            vectors_output.push_back(std::move(vec));
+        }
+
+        return vectors_output;
+    }
+
 #endif
 private:
     // inline definitions
diff --git a/src/VecSim/algorithms/brute_force/brute_force_single.h b/src/VecSim/algorithms/brute_force/brute_force_single.h
index 4329fc77b..ad9bf80d4 100644
--- a/src/VecSim/algorithms/brute_force/brute_force_single.h
+++ b/src/VecSim/algorithms/brute_force/brute_force_single.h
@@ -50,9 +50,26 @@ class BruteForceIndex_Single : public BruteForceIndex<DataType, DistType> {
         auto id = labelToIdLookup.at(label);
 
         auto vec = std::vector<DataType>(this->dim);
+        // Only copy the vector data (dim * sizeof(DataType)), not any additional metadata like the
+        // norm
         memcpy(vec.data(), this->getDataByInternalId(id), this->dim * sizeof(DataType));
         vectors_output.push_back(vec);
     }
+
+    std::vector<std::vector<char>> getStoredVectorDataByLabel(labelType label) const override {
+        std::vector<std::vector<char>> vectors_output;
+        auto id = labelToIdLookup.at(label);
+
+        // Get the data pointer - need to cast to char* for memcpy
+        const char *data = reinterpret_cast<const char *>(this->getDataByInternalId(id));
+
+        // Create a vector with the full data (including any metadata like norms)
+        std::vector<char> vec(this->getDataSize());
+        memcpy(vec.data(), data, this->getDataSize());
+        vectors_output.push_back(std::move(vec));
+
+        return vectors_output;
+    }
 #endif
 protected:
     // inline definitions
diff --git a/src/VecSim/algorithms/hnsw/hnsw.h b/src/VecSim/algorithms/hnsw/hnsw.h
index dc1144545..6a3b41315 100644
--- a/src/VecSim/algorithms/hnsw/hnsw.h
+++ b/src/VecSim/algorithms/hnsw/hnsw.h
@@ -303,16 +303,6 @@ class HNSWIndex : public VecSimIndexAbstract<DataType, DistType>,
     virtual int removeLabel(labelType label) = 0;
 
 #ifdef BUILD_TESTS
-    /**
-     * @brief Used for testing - store vector(s) data associated with a given label. This function
-     * copies the vector(s)' data buffer(s) and place it in the output vector
-     *
-     * @param label
-     * @param vectors_output empty vector to be modified, should store the blob(s) associated with
-     * the label.
-     */
-    virtual void getDataByLabel(labelType label,
-                                std::vector<std::vector<DataType>> &vectors_output) const = 0;
     void fitMemory() override {
         if (maxElements > 0) {
             idToMetaData.shrink_to_fit();
@@ -1562,7 +1552,7 @@ void HNSWIndex<DataType, DistType>::insertElementToGraph(idType element_id,
     for (auto level = static_cast<int>(max_common_level); level >= 0; level--) {
         candidatesMaxHeap<DistType> top_candidates =
             searchLayer(curr_element, vector_data, level, efConstruction);
-        // If the entry point was marked deleted between iterations, we may recieve an empty
+        // If the entry point was marked deleted between iterations, we may receive an empty
         // candidates set.
         if (!top_candidates.empty()) {
             curr_element = mutuallyConnectNewElement(element_id, top_candidates, level);
diff --git a/src/VecSim/algorithms/hnsw/hnsw_multi.h b/src/VecSim/algorithms/hnsw/hnsw_multi.h
index 6f3ac91ae..52d9fe00e 100644
--- a/src/VecSim/algorithms/hnsw/hnsw_multi.h
+++ b/src/VecSim/algorithms/hnsw/hnsw_multi.h
@@ -75,10 +75,28 @@ class HNSWIndex_Multi : public HNSWIndex<DataType, DistType> {
 
         for (idType id : ids->second) {
             auto vec = std::vector<DataType>(this->dim);
-            memcpy(vec.data(), this->getDataByInternalId(id), this->dataSize);
+            // Only copy the vector data (dim * sizeof(DataType)), not any additional metadata like
+            // the norm
+            memcpy(vec.data(), this->getDataByInternalId(id), this->dim * sizeof(DataType));
             vectors_output.push_back(vec);
         }
     }
+
+    std::vector<std::vector<char>> getStoredVectorDataByLabel(labelType label) const override {
+        std::vector<std::vector<char>> vectors_output;
+        auto ids = labelLookup.find(label);
+
+        for (idType id : ids->second) {
+            const char *data = this->getDataByInternalId(id);
+
+            // Create a vector with the full data (including any metadata like norms)
+            std::vector<char> vec(this->dataSize);
+            memcpy(vec.data(), data, this->dataSize);
+            vectors_output.push_back(std::move(vec));
+        }
+
+        return vectors_output;
+    }
 #endif
     ~HNSWIndex_Multi() = default;
 
@@ -202,13 +220,14 @@ template <typename DataType, typename DistType>
 VecSimBatchIterator *
 HNSWIndex_Multi<DataType, DistType>::newBatchIterator(const void *queryBlob,
                                                       VecSimQueryParams *queryParams) const {
-    auto queryBlobCopy =
-        this->allocator->allocate_aligned(this->dataSize, this->preprocessors->getAlignment());
-    memcpy(queryBlobCopy, queryBlob, this->dim * sizeof(DataType));
-    this->preprocessQueryInPlace(queryBlobCopy);
+    // force_copy == true.
+    auto queryBlobCopy = this->preprocessQuery(queryBlob, true);
+
+    // take ownership of the blob copy and pass it to the batch iterator.
+    auto *queryBlobCopyPtr = queryBlobCopy.release();
     // Ownership of queryBlobCopy moves to HNSW_BatchIterator that will free it at the end.
     return new (this->allocator) HNSWMulti_BatchIterator<DataType, DistType>(
-        queryBlobCopy, this, queryParams, this->allocator);
+        queryBlobCopyPtr, this, queryParams, this->allocator);
 }
 
 /**
diff --git a/src/VecSim/algorithms/hnsw/hnsw_single.h b/src/VecSim/algorithms/hnsw/hnsw_single.h
index 6b1ee8c6c..0917e82de 100644
--- a/src/VecSim/algorithms/hnsw/hnsw_single.h
+++ b/src/VecSim/algorithms/hnsw/hnsw_single.h
@@ -51,9 +51,24 @@ class HNSWIndex_Single : public HNSWIndex<DataType, DistType> {
         auto id = labelLookup.at(label);
 
         auto vec = std::vector<DataType>(this->dim);
-        memcpy(vec.data(), this->getDataByInternalId(id), this->dataSize);
+        // Only copy the vector data (dim * sizeof(DataType)), not any additional metadata like the
+        // norm
+        memcpy(vec.data(), this->getDataByInternalId(id), this->dim * sizeof(DataType));
         vectors_output.push_back(vec);
     }
+
+    std::vector<std::vector<char>> getStoredVectorDataByLabel(labelType label) const override {
+        std::vector<std::vector<char>> vectors_output;
+        auto id = labelLookup.at(label);
+        const char *data = this->getDataByInternalId(id);
+
+        // Create a vector with the full data (including any metadata like norms)
+        std::vector<char> vec(this->dataSize);
+        memcpy(vec.data(), data, this->dataSize);
+        vectors_output.push_back(std::move(vec));
+
+        return vectors_output;
+    }
 #endif
     ~HNSWIndex_Single() = default;
 
@@ -162,13 +177,14 @@ template <typename DataType, typename DistType>
 VecSimBatchIterator *
 HNSWIndex_Single<DataType, DistType>::newBatchIterator(const void *queryBlob,
                                                        VecSimQueryParams *queryParams) const {
-    auto queryBlobCopy =
-        this->allocator->allocate_aligned(this->dataSize, this->preprocessors->getAlignment());
-    memcpy(queryBlobCopy, queryBlob, this->dim * sizeof(DataType));
-    this->preprocessQueryInPlace(queryBlobCopy);
+    // force_copy == true.
+    auto queryBlobCopy = this->preprocessQuery(queryBlob, true);
+
+    // take ownership of the blob copy and pass it to the batch iterator.
+    auto *queryBlobCopyPtr = queryBlobCopy.release();
     // Ownership of queryBlobCopy moves to HNSW_BatchIterator that will free it at the end.
     return new (this->allocator) HNSWSingle_BatchIterator<DataType, DistType>(
-        queryBlobCopy, this, queryParams, this->allocator);
+        queryBlobCopyPtr, this, queryParams, this->allocator);
 }
 
 /**
diff --git a/src/VecSim/algorithms/hnsw/hnsw_tiered.h b/src/VecSim/algorithms/hnsw/hnsw_tiered.h
index 1db59afbc..29b417361 100644
--- a/src/VecSim/algorithms/hnsw/hnsw_tiered.h
+++ b/src/VecSim/algorithms/hnsw/hnsw_tiered.h
@@ -172,7 +172,7 @@ class TieredHNSWIndex : public VecSimTieredIndex<DataType, DistType> {
         inline void filter_irrelevant_results(VecSimQueryResultContainer &);
 
     public:
-        TieredHNSW_BatchIterator(void *query_vector,
+        TieredHNSW_BatchIterator(const void *query_vector,
                                  const TieredHNSWIndex<DataType, DistType> *index,
                                  VecSimQueryParams *queryParams,
                                  std::shared_ptr<VecSimAllocator> allocator);
@@ -206,11 +206,9 @@ class TieredHNSWIndex : public VecSimTieredIndex<DataType, DistType> {
     VecSimDebugInfoIterator *debugInfoIterator() const override;
     VecSimBatchIterator *newBatchIterator(const void *queryBlob,
                                           VecSimQueryParams *queryParams) const override {
-        size_t blobSize = this->frontendIndex->getDim() * sizeof(DataType);
-        void *queryBlobCopy = this->allocator->allocate(blobSize);
-        memcpy(queryBlobCopy, queryBlob, blobSize);
+        // The query blob will be processed and copied by the internal indexes's batch iterator.
         return new (this->allocator)
-            TieredHNSW_BatchIterator(queryBlobCopy, this, queryParams, this->allocator);
+            TieredHNSW_BatchIterator(queryBlob, this, queryParams, this->allocator);
     }
     inline void setLastSearchMode(VecSearchMode mode) override {
         return this->backendIndex->setLastSearchMode(mode);
@@ -545,10 +543,11 @@ void TieredHNSWIndex<DataType, DistType>::executeInsertJob(HNSWInsertJob *job) {
     HNSWIndex<DataType, DistType> *hnsw_index = this->getHNSWIndex();
     // Copy the vector blob from the flat buffer, so we can release the flat lock while we are
     // indexing the vector into HNSW index.
-    auto blob_copy = this->getAllocator()->allocate_unique(this->frontendIndex->getDataSize());
-
-    memcpy(blob_copy.get(), this->frontendIndex->getDataByInternalId(job->id),
-           this->frontendIndex->getDim() * sizeof(DataType));
+    size_t data_size = this->frontendIndex->getDataSize();
+    auto blob_copy = this->getAllocator()->allocate_unique(data_size);
+    // Assuming the size of the blob stored in the frontend index matches the size of the blob
+    // stored in the HNSW index.
+    memcpy(blob_copy.get(), this->frontendIndex->getDataByInternalId(job->id), data_size);
 
     this->insertVectorToHNSW<true>(hnsw_index, job->label, blob_copy.get());
 
@@ -719,7 +718,7 @@ int TieredHNSWIndex<DataType, DistType>::addVector(const void *blob, labelType l
     int ret = 1;
     auto hnsw_index = this->getHNSWIndex();
     // writeMode is not protected since it is assumed to be called only from the "main thread"
-    // (that is the thread that is exculusively calling add/delete vector).
+    // (that is the thread that is exclusively calling add/delete vector).
     if (this->getWriteMode() == VecSim_WriteInPlace) {
         // First, check if we need to overwrite the vector in-place for single (from both indexes).
         if (!this->backendIndex->isMultiValue()) {
@@ -849,7 +848,7 @@ int TieredHNSWIndex<DataType, DistType>::deleteVector(labelType label) {
     // Note that we may remove the same vector that has been removed from the flat index, if it was
     // being ingested at that time.
     // writeMode is not protected since it is assumed to be called only from the "main thread"
-    // (that is the thread that is exculusively calling add/delete vector).
+    // (that is the thread that is exclusively calling add/delete vector).
     if (this->getWriteMode() == VecSim_WriteAsync) {
         num_deleted_vectors += this->deleteLabelFromHNSW(label);
         // Apply ready swap jobs if number of deleted vectors reached the threshold
@@ -924,9 +923,14 @@ double TieredHNSWIndex<DataType, DistType>::getDistanceFrom_Unsafe(labelType lab
 
 template <typename DataType, typename DistType>
 TieredHNSWIndex<DataType, DistType>::TieredHNSW_BatchIterator::TieredHNSW_BatchIterator(
-    void *query_vector, const TieredHNSWIndex<DataType, DistType> *index,
+    const void *query_vector, const TieredHNSWIndex<DataType, DistType> *index,
     VecSimQueryParams *queryParams, std::shared_ptr<VecSimAllocator> allocator)
-    : VecSimBatchIterator(query_vector, queryParams ? queryParams->timeoutCtx : nullptr,
+    // Tiered batch iterator doesn't hold its own copy of the query vector.
+    // Instead, each internal batch iterators (flat_iterator and hnsw_iterator) create their own
+    // copies: flat_iterator copy is created during TieredHNSW_BatchIterator construction When
+    // TieredHNSW_BatchIterator::getNextResults() is called and hnsw_iterator is not initialized, it
+    // retrieves the blob from flat_iterator
+    : VecSimBatchIterator(nullptr, queryParams ? queryParams->timeoutCtx : nullptr,
                           std::move(allocator)),
       index(index), flat_results(this->allocator), hnsw_results(this->allocator),
       flat_iterator(this->index->frontendIndex->newBatchIterator(query_vector, queryParams)),
@@ -1192,4 +1196,5 @@ void TieredHNSWIndex<DataType, DistType>::getDataByLabel(
     labelType label, std::vector<std::vector<DataType>> &vectors_output) const {
     this->getHNSWIndex()->getDataByLabel(label, vectors_output);
 }
+
 #endif
diff --git a/src/VecSim/spaces/computer/preprocessor_container.cpp b/src/VecSim/spaces/computer/preprocessor_container.cpp
index 4746dbf75..f10a85a62 100644
--- a/src/VecSim/spaces/computer/preprocessor_container.cpp
+++ b/src/VecSim/spaces/computer/preprocessor_container.cpp
@@ -21,10 +21,9 @@ PreprocessorsContainerAbstract::preprocessForStorage(const void *original_blob,
     return wrapWithDummyDeleter(const_cast<void *>(original_blob));
 }
 
-MemoryUtils::unique_blob
-PreprocessorsContainerAbstract::preprocessQuery(const void *original_blob,
-                                                size_t processed_bytes_count) const {
-    return maybeCopyToAlignedMem(original_blob, processed_bytes_count);
+MemoryUtils::unique_blob PreprocessorsContainerAbstract::preprocessQuery(
+    const void *original_blob, size_t processed_bytes_count, bool force_copy) const {
+    return maybeCopyToAlignedMem(original_blob, processed_bytes_count, force_copy);
 }
 
 void PreprocessorsContainerAbstract::preprocessQueryInPlace(void *blob,
@@ -33,15 +32,16 @@ void PreprocessorsContainerAbstract::preprocessQueryInPlace(void *blob,
 void PreprocessorsContainerAbstract::preprocessStorageInPlace(void *blob,
                                                               size_t processed_bytes_count) const {}
 
-MemoryUtils::unique_blob
-PreprocessorsContainerAbstract::maybeCopyToAlignedMem(const void *original_blob,
-                                                      size_t blob_bytes_count) const {
-    if (this->alignment) {
-        if ((uintptr_t)original_blob % this->alignment) {
-            auto aligned_mem = this->allocator->allocate_aligned(blob_bytes_count, this->alignment);
-            memcpy(aligned_mem, original_blob, blob_bytes_count);
-            return this->wrapAllocated(aligned_mem);
-        }
+MemoryUtils::unique_blob PreprocessorsContainerAbstract::maybeCopyToAlignedMem(
+    const void *original_blob, size_t blob_bytes_count, bool force_copy) const {
+    bool needs_copy =
+        force_copy || (this->alignment && ((uintptr_t)original_blob % this->alignment != 0));
+
+    if (needs_copy) {
+        auto aligned_mem = this->allocator->allocate_aligned(blob_bytes_count, this->alignment);
+        // TODO: handle original_blob_size != processed_bytes_count
+        memcpy(aligned_mem, original_blob, blob_bytes_count);
+        return this->wrapAllocated(aligned_mem);
     }
 
     // Returning a unique_ptr with a no-op deleter
diff --git a/src/VecSim/spaces/computer/preprocessor_container.h b/src/VecSim/spaces/computer/preprocessor_container.h
index dc7d7366a..bbd123492 100644
--- a/src/VecSim/spaces/computer/preprocessor_container.h
+++ b/src/VecSim/spaces/computer/preprocessor_container.h
@@ -30,7 +30,8 @@ class PreprocessorsContainerAbstract : public VecsimBaseObject {
                                                           size_t processed_bytes_count) const;
 
     virtual MemoryUtils::unique_blob preprocessQuery(const void *original_blob,
-                                                     size_t processed_bytes_count) const;
+                                                     size_t processed_bytes_count,
+                                                     bool force_copy = false) const;
 
     virtual void preprocessQueryInPlace(void *blob, size_t processed_bytes_count) const;
 
@@ -43,7 +44,8 @@ class PreprocessorsContainerAbstract : public VecsimBaseObject {
 
     // Allocate and copy the blob only if the original blob is not aligned.
     MemoryUtils::unique_blob maybeCopyToAlignedMem(const void *original_blob,
-                                                   size_t blob_bytes_count) const;
+                                                   size_t blob_bytes_count,
+                                                   bool force_copy = false) const;
 
     MemoryUtils::unique_blob wrapAllocated(void *blob) const {
         return MemoryUtils::unique_blob(
@@ -88,7 +90,8 @@ class MultiPreprocessorsContainer : public PreprocessorsContainerAbstract {
                                                   size_t processed_bytes_count) const override;
 
     MemoryUtils::unique_blob preprocessQuery(const void *original_blob,
-                                             size_t processed_bytes_count) const override;
+                                             size_t processed_bytes_count,
+                                             bool force_copy = false) const override;
 
     void preprocessQueryInPlace(void *blob, size_t processed_bytes_count) const override;
 
@@ -219,7 +222,7 @@ MultiPreprocessorsContainer<DataType, n_preprocessors>::preprocessForStorage(
 
 template <typename DataType, size_t n_preprocessors>
 MemoryUtils::unique_blob MultiPreprocessorsContainer<DataType, n_preprocessors>::preprocessQuery(
-    const void *original_blob, size_t processed_bytes_count) const {
+    const void *original_blob, size_t processed_bytes_count, bool force_copy) const {
 
     void *query_blob = nullptr;
     for (auto pp : preprocessors) {
@@ -228,9 +231,9 @@ MemoryUtils::unique_blob MultiPreprocessorsContainer<DataType, n_preprocessors>:
         // modifies the memory in place
         pp->preprocessQuery(original_blob, query_blob, processed_bytes_count, this->alignment);
     }
-    return query_blob
-               ? std::move(this->wrapAllocated(query_blob))
-               : std::move(this->maybeCopyToAlignedMem(original_blob, processed_bytes_count));
+    return query_blob ? std::move(this->wrapAllocated(query_blob))
+                      : std::move(this->maybeCopyToAlignedMem(original_blob, processed_bytes_count,
+                                                              force_copy));
 }
 
 template <typename DataType, size_t n_preprocessors>
diff --git a/src/VecSim/spaces/computer/preprocessors.h b/src/VecSim/spaces/computer/preprocessors.h
index e7e85088c..7b422f78d 100644
--- a/src/VecSim/spaces/computer/preprocessors.h
+++ b/src/VecSim/spaces/computer/preprocessors.h
@@ -58,9 +58,11 @@ class CosinePreprocessor : public PreprocessorInterface {
             // If one of them is null, allocate memory for it and copy the original_blob to it.
             if (storage_blob == nullptr) {
                 storage_blob = this->allocator->allocate(processed_bytes_count);
+                // TODO: handle original_blob_size != processed_bytes_count
                 memcpy(storage_blob, original_blob, processed_bytes_count);
             } else if (query_blob == nullptr) {
                 query_blob = this->allocator->allocate_aligned(processed_bytes_count, alignment);
+                // TODO: handle original_blob_size != processed_bytes_count
                 memcpy(query_blob, original_blob, processed_bytes_count);
             }
 
@@ -71,6 +73,7 @@ class CosinePreprocessor : public PreprocessorInterface {
             if (query_blob == nullptr) { // If both blobs are null, allocate query_blob and set
                                          // storage_blob to point to it.
                 query_blob = this->allocator->allocate_aligned(processed_bytes_count, alignment);
+                // TODO: handle original_blob_size != processed_bytes_count
                 memcpy(query_blob, original_blob, processed_bytes_count);
                 storage_blob = query_blob;
             }
@@ -83,6 +86,7 @@ class CosinePreprocessor : public PreprocessorInterface {
                               size_t processed_bytes_count) const override {
         if (blob == nullptr) {
             blob = this->allocator->allocate(processed_bytes_count);
+            // TODO: handle original_blob_size != processed_bytes_count
             memcpy(blob, original_blob, processed_bytes_count);
         }
         normalize_func(blob, this->dim);
@@ -92,6 +96,7 @@ class CosinePreprocessor : public PreprocessorInterface {
                          unsigned char alignment) const override {
         if (blob == nullptr) {
             blob = this->allocator->allocate_aligned(processed_bytes_count, alignment);
+            // TODO: handle original_blob_size != processed_bytes_count
             memcpy(blob, original_blob, processed_bytes_count);
         }
         normalize_func(blob, this->dim);
diff --git a/src/VecSim/vec_sim_index.h b/src/VecSim/vec_sim_index.h
index 34d726d8c..9c5574ea2 100644
--- a/src/VecSim/vec_sim_index.h
+++ b/src/VecSim/vec_sim_index.h
@@ -146,10 +146,11 @@ struct VecSimIndexAbstract : public VecSimIndexInterface {
     /**
      * @brief Preprocess a blob for query.
      *
-     * @param queryBlob will be copied.
+     * @param queryBlob will be copied if preprocessing is required, or if force_copy is set to
+     * true.
      * @return unique_ptr of the processed blob.
      */
-    MemoryUtils::unique_blob preprocessQuery(const void *queryBlob) const;
+    MemoryUtils::unique_blob preprocessQuery(const void *queryBlob, bool force_copy = false) const;
 
     /**
      * @brief Preprocess a blob for storage.
@@ -267,7 +268,6 @@ struct VecSimIndexAbstract : public VecSimIndexInterface {
         };
         return info;
     }
-
 #ifdef BUILD_TESTS
     void replacePPContainer(PreprocessorsContainerAbstract *newPPContainer) {
         delete this->preprocessors;
@@ -277,6 +277,43 @@ struct VecSimIndexAbstract : public VecSimIndexInterface {
     IndexComponents<DataType, DistType> get_components() const {
         return {.indexCalculator = this->indexCalculator, .preprocessors = this->preprocessors};
     }
+
+    /**
+     * @brief Used for testing - get only the vector elements associated with a given label.
+     * This function copies only the vector(s) elements into the output vector,
+     * without any additional metadata that might be stored with the vector.
+     *
+     * Important: This method returns ONLY the vector elements, even if the stored vector contains
+     * additional metadata. For example, with int8_t/uint8_t vectors using cosine similarity,
+     * this method will NOT return the norm that is stored with the vector(s).
+     *
+     * If you need the complete data including any metadata, use getStoredVectorDataByLabel()
+     * instead.
+     *
+     * @param label The label to retrieve vector(s) elements for
+     * @param vectors_output Empty vector to be filled with vector(s)
+     */
+    virtual void getDataByLabel(labelType label,
+                                std::vector<std::vector<DataType>> &vectors_output) const = 0;
+
+    /**
+     * @brief Used for testing - get the complete raw data associated with a given label.
+     * This function returns the ENTIRE vector(s) data as stored in the index, including any
+     * additional metadata that might be stored alongside the vector elements.
+     *
+     * For example:
+     * - For int8_t/uint8_t vectors with cosine similarity, this includes the norm stored at the end
+     * - For other vector types or future implementations, this will include any additional data
+     *   that might be stored with the vector
+     *
+     * Use this method when you need access to the complete vector data as it is stored internally.
+     *
+     * @param label The label to retrieve data for
+     * @return A vector containing the complete vector data (elements + metadata) for the given
+     * label
+     */
+    virtual std::vector<std::vector<char>> getStoredVectorDataByLabel(labelType label) const = 0;
+
 #endif
 
 protected:
@@ -292,8 +329,9 @@ ProcessedBlobs VecSimIndexAbstract<DataType, DistType>::preprocess(const void *b
 
 template <typename DataType, typename DistType>
 MemoryUtils::unique_blob
-VecSimIndexAbstract<DataType, DistType>::preprocessQuery(const void *queryBlob) const {
-    return this->preprocessors->preprocessQuery(queryBlob, this->dataSize);
+VecSimIndexAbstract<DataType, DistType>::preprocessQuery(const void *queryBlob,
+                                                         bool force_copy) const {
+    return this->preprocessors->preprocessQuery(queryBlob, this->dataSize, force_copy);
 }
 
 template <typename DataType, typename DistType>
diff --git a/src/VecSim/vec_sim_tiered_index.h b/src/VecSim/vec_sim_tiered_index.h
index b15cde73c..c2ae23b7f 100644
--- a/src/VecSim/vec_sim_tiered_index.h
+++ b/src/VecSim/vec_sim_tiered_index.h
@@ -107,9 +107,7 @@ class VecSimTieredIndex : public VecSimIndexInterface {
     static VecSimWriteMode getWriteMode() { return VecSimIndexInterface::asyncWriteMode; }
 
 #ifdef BUILD_TESTS
-    inline VecSimIndexAbstract<DataType, DistType> *getFlatBufferIndex() {
-        return this->frontendIndex;
-    }
+    inline BruteForceIndex<DataType, DistType> *getFlatBufferIndex() { return this->frontendIndex; }
     inline size_t getFlatBufferLimit() { return this->flatBufferLimit; }
 
     virtual void fitMemory() override {
diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt
index ece443765..30cfb080a 100644
--- a/tests/unit/CMakeLists.txt
+++ b/tests/unit/CMakeLists.txt
@@ -46,6 +46,7 @@ add_executable(test_bf16 ../utils/mock_thread_pool.cpp test_bf16.cpp unit_test_u
 add_executable(test_fp16 ../utils/mock_thread_pool.cpp test_fp16.cpp unit_test_utils.cpp)
 add_executable(test_int8 ../utils/mock_thread_pool.cpp test_int8.cpp unit_test_utils.cpp)
 add_executable(test_uint8 ../utils/mock_thread_pool.cpp test_uint8.cpp unit_test_utils.cpp)
+add_executable(test_index_test_utils ../utils/mock_thread_pool.cpp test_index_test_utils.cpp unit_test_utils.cpp)
 
 target_link_libraries(test_hnsw PUBLIC gtest_main VectorSimilarity)
 target_link_libraries(test_hnsw_parallel PUBLIC gtest_main VectorSimilarity)
@@ -59,6 +60,7 @@ target_link_libraries(test_bf16 PUBLIC gtest_main VectorSimilarity)
 target_link_libraries(test_fp16 PUBLIC gtest_main VectorSimilarity)
 target_link_libraries(test_int8 PUBLIC gtest_main VectorSimilarity)
 target_link_libraries(test_uint8 PUBLIC gtest_main VectorSimilarity)
+target_link_libraries(test_index_test_utils PUBLIC gtest_main VectorSimilarity)
 
 include(GoogleTest)
 
@@ -74,3 +76,4 @@ gtest_discover_tests(test_bf16 TEST_PREFIX BF16UNIT_)
 gtest_discover_tests(test_fp16 TEST_PREFIX FP16UNIT_)
 gtest_discover_tests(test_int8 TEST_PREFIX INT8UNIT_)
 gtest_discover_tests(test_uint8 TEST_PREFIX UINT8UNIT_)
+gtest_discover_tests(test_index_test_utils)
diff --git a/tests/unit/test_bruteforce.cpp b/tests/unit/test_bruteforce.cpp
index 821af06a9..ac7db607b 100644
--- a/tests/unit/test_bruteforce.cpp
+++ b/tests/unit/test_bruteforce.cpp
@@ -80,7 +80,7 @@ TYPED_TEST(BruteForceTest, brute_force_vector_update_test) {
     ASSERT_EQ(bf_index->idToLabelMapping.size(), DEFAULT_BLOCK_SIZE);
 
     // Check update.
-    TEST_DATA_T *vector_data = bf_index->getDataByInternalId(0);
+    const TEST_DATA_T *vector_data = bf_index->getDataByInternalId(0);
     for (size_t i = 0; i < dim; ++i) {
         ASSERT_EQ(*vector_data, 2.0);
         ++vector_data;
@@ -386,7 +386,7 @@ TYPED_TEST(BruteForceTest, test_delete_swap_block) {
     ASSERT_EQ(deleted_label_id_pair, bf_single_index->labelToIdLookup.end());
 
     // The vector in index1 should hold id5 data.
-    TEST_DATA_T *vector_data = bf_index->getDataByInternalId(1);
+    const TEST_DATA_T *vector_data = bf_index->getDataByInternalId(1);
     for (size_t i = 0; i < dim; ++i) {
         ASSERT_EQ(*vector_data, 5);
         ++vector_data;
diff --git a/tests/unit/test_index_test_utils.cpp b/tests/unit/test_index_test_utils.cpp
new file mode 100644
index 000000000..ce1cefba9
--- /dev/null
+++ b/tests/unit/test_index_test_utils.cpp
@@ -0,0 +1,247 @@
+/*
+ * Copyright (c) 2006-Present, Redis Ltd.
+ * All rights reserved.
+ *
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
+ * GNU Affero General Public License v3 (AGPLv3).
+ */
+
+#include "gtest/gtest.h"
+#include "VecSim/vec_sim.h"
+#include "unit_test_utils.h"
+#include "tests_utils.h"
+#include "VecSim/algorithms/brute_force/brute_force_multi.h"
+#include "VecSim/algorithms/brute_force/brute_force_single.h"
+#include "VecSim/algorithms/hnsw/hnsw_multi.h"
+#include "VecSim/algorithms/hnsw/hnsw_single.h"
+#include "VecSim/spaces/normalize/normalize_naive.h"
+#include <cmath>
+
+class IndexTestUtilsTest : public testing::TestWithParam<std::tuple<bool, VecSimMetric>> {
+protected:
+    static constexpr size_t dim = 4;
+    static constexpr size_t labels_count = 5;
+
+    VecSimIndex *index;
+    bool is_multi = std::get<0>(GetParam());
+    VecSimMetric metric = std::get<1>(GetParam());
+    size_t vec_per_label = 1;
+
+    void SetUp(HNSWParams &params) {
+        params.dim = dim;
+        params.multi = is_multi;
+        params.metric = metric;
+        VecSimParams vecsim_params = CreateParams(params);
+        index = VecSimIndex_New(&vecsim_params);
+        vec_per_label = this->is_multi ? 3 : 1;
+    }
+
+    void SetUp(BFParams &params) {
+        params.dim = dim;
+        params.multi = is_multi;
+        params.metric = metric;
+        VecSimParams vecsim_params = CreateParams(params);
+        index = VecSimIndex_New(&vecsim_params);
+        vec_per_label = this->is_multi ? 3 : 1;
+    }
+
+    void TearDown() { VecSimIndex_Free(index); }
+
+    // id should be unique as it will be used as a seed for the random vector generation
+    virtual void GenerateRandomAndAddVector(size_t label, size_t id) {
+        FAIL() << "IndexTestUtilsTest::GenerateRandomAndAddVector this method should be overriden";
+    }
+
+    template <typename DataType>
+    void ValidateVectorsImp(std::vector<std::vector<char>> index_label_vectors,
+                            std::vector<std::vector<DataType>> original_vectors, size_t label) {
+        for (size_t i = 0; i < vec_per_label; i++) {
+            const DataType *vec = reinterpret_cast<const DataType *>(index_label_vectors[i].data());
+            for (size_t j = 0; j < dim; j++) {
+                ASSERT_EQ(vec[j], original_vectors[label * vec_per_label + i][j]);
+            }
+        }
+    }
+    virtual void ValidateVectors(std::vector<std::vector<char>> vectors, size_t label) {
+        FAIL() << "IndexTestUtilsTest::ValidateVectors this method should be overriden";
+    }
+
+    virtual std::vector<std::vector<char>> GetStoredVectorsData(size_t label) {
+        ADD_FAILURE()
+            << "IndexTestUtilsTest::GetStoredVectorsData() this method should be overriden";
+        return {};
+    }
+
+    virtual size_t GetIndexDatasize() {
+        ADD_FAILURE() << "IndexTestUtilsTest::GetIndexDatasize() this method should be overriden";
+        return {};
+    }
+
+    // Tests
+    void get_stored_vector_data_single_test();
+};
+
+class Int8IndexTestUtilsTest : public IndexTestUtilsTest {
+protected:
+    std::vector<std::vector<int8_t>> vectors;
+    void GenerateRandomAndAddVector(size_t label, size_t id) override {
+        std::vector<int8_t> v(dim);
+        test_utils::populate_int8_vec(v.data(), dim, id);
+        VecSimIndex_AddVector(index, v.data(), label);
+
+        vectors.emplace_back(v);
+    }
+
+    std::vector<std::vector<char>> GetStoredVectorsData(size_t label) override {
+        return (dynamic_cast<VecSimIndexAbstract<int8_t, float> *>(this->index))
+            ->getStoredVectorDataByLabel(label);
+    }
+
+    size_t GetIndexDatasize() override {
+        return (dynamic_cast<VecSimIndexAbstract<int8_t, float> *>(this->index))->getDataSize();
+    }
+
+    void ValidateVectors(std::vector<std::vector<char>> index_vectors, size_t label) override {
+        IndexTestUtilsTest::ValidateVectorsImp<int8_t>(index_vectors, vectors, label);
+    }
+
+    void ValidateCosine() {
+        for (size_t i = 0; i < labels_count; i++) {
+            auto stored_data = GetStoredVectorsData(i);
+            for (size_t j = 0; j < stored_data.size(); j++) {
+                ASSERT_EQ(stored_data[j].size(), dim * sizeof(int8_t) + sizeof(float));
+                const int8_t *stored_vec = reinterpret_cast<const int8_t *>(stored_data[j].data());
+                // compute expected norm using the original vector
+                float expected_norm =
+                    test_utils::integral_compute_norm(vectors[i * vec_per_label + j].data(), dim);
+                const float *stored_norm = reinterpret_cast<const float *>(stored_vec + dim);
+                ASSERT_EQ(*stored_norm, expected_norm) << "wrong vector norm for vector id:" << j;
+            }
+        }
+    }
+};
+
+class Float32IndexTestUtilsTest : public IndexTestUtilsTest {
+protected:
+    std::vector<std::vector<float>> vectors;
+    void GenerateRandomAndAddVector(size_t label, size_t id) override {
+        std::vector<float> v(dim);
+        test_utils::populate_float_vec(v.data(), dim, id);
+
+        VecSimIndex_AddVector(index, v.data(), label);
+        VecSimMetric metric = std::get<1>(GetParam());
+
+        if (metric == VecSimMetric_Cosine)
+            VecSim_Normalize(v.data(), dim, VecSimType_FLOAT32);
+
+        vectors.emplace_back(v);
+    }
+
+    void ValidateVectors(std::vector<std::vector<char>> index_vectors, size_t label) override {
+        IndexTestUtilsTest::ValidateVectorsImp<float>(index_vectors, vectors, label);
+    }
+
+    std::vector<std::vector<char>> GetStoredVectorsData(size_t label) override {
+        return (dynamic_cast<VecSimIndexAbstract<float, float> *>(this->index))
+            ->getStoredVectorDataByLabel(label);
+    }
+
+    size_t GetIndexDatasize() override {
+        return (dynamic_cast<VecSimIndexAbstract<float, float> *>(this->index))->getDataSize();
+    }
+};
+
+TEST_P(Int8IndexTestUtilsTest, BF) {
+    BFParams params = {.type = VecSimType_INT8, .dim = dim};
+    SetUp(params);
+
+    EXPECT_NO_FATAL_FAILURE(get_stored_vector_data_single_test());
+    VecSimMetric metric = std::get<1>(GetParam());
+    if (metric == VecSimMetric_Cosine) {
+        EXPECT_NO_FATAL_FAILURE(ValidateCosine());
+    }
+}
+
+TEST_P(Int8IndexTestUtilsTest, HNSW) {
+    HNSWParams params = {.type = VecSimType_INT8, .dim = dim};
+    SetUp(params);
+
+    EXPECT_NO_FATAL_FAILURE(get_stored_vector_data_single_test());
+    VecSimMetric metric = std::get<1>(GetParam());
+    if (metric == VecSimMetric_Cosine) {
+        EXPECT_NO_FATAL_FAILURE(ValidateCosine());
+    }
+}
+
+/** Run all Int8IndexTestUtilsTest tests for each {is_multi, VecSimMetric} combination */
+INSTANTIATE_TEST_SUITE_P(Int8IndexTestUtilsTest, Int8IndexTestUtilsTest,
+                         testing::Combine(testing::Values(false, true), // is_multi
+                                          testing::Values(VecSimMetric_L2, VecSimMetric_IP,
+                                                          VecSimMetric_Cosine)),
+                         [](const testing::TestParamInfo<Int8IndexTestUtilsTest::ParamType> &info) {
+                             bool is_multi = std::get<0>(info.param);
+                             const char *metric = VecSimMetric_ToString(std::get<1>(info.param));
+                             std::string test_name(is_multi ? "Multi_" : "Single_");
+                             return test_name + metric;
+                         });
+
+TEST_P(Float32IndexTestUtilsTest, BF) {
+    BFParams params = {.type = VecSimType_FLOAT32, .dim = dim};
+    SetUp(params);
+
+    EXPECT_NO_FATAL_FAILURE(get_stored_vector_data_single_test());
+    VecSimMetric metric = std::get<1>(GetParam());
+}
+
+TEST_P(Float32IndexTestUtilsTest, HNSW) {
+    HNSWParams params = {.type = VecSimType_FLOAT32, .dim = dim};
+    SetUp(params);
+
+    EXPECT_NO_FATAL_FAILURE(get_stored_vector_data_single_test());
+    VecSimMetric metric = std::get<1>(GetParam());
+}
+
+/** Run all Float32IndexTestUtilsTest tests for each {is_multi, VecSimMetric} combination */
+INSTANTIATE_TEST_SUITE_P(
+    Float32IndexTestUtilsTest, Float32IndexTestUtilsTest,
+    testing::Combine(testing::Values(false, true), // is_multi
+                     testing::Values(VecSimMetric_L2, VecSimMetric_IP, VecSimMetric_Cosine)),
+    [](const testing::TestParamInfo<Float32IndexTestUtilsTest::ParamType> &info) {
+        bool is_multi = std::get<0>(info.param);
+        const char *metric = VecSimMetric_ToString(std::get<1>(info.param));
+        std::string test_name(is_multi ? "Multi_" : "Single_");
+        return test_name + "_" + metric;
+    });
+
+void IndexTestUtilsTest::get_stored_vector_data_single_test() {
+    size_t n = this->labels_count * this->vec_per_label;
+
+    // Add vectors to the index
+    int id = 0;
+    for (size_t i = 0; i < this->labels_count; i++) {
+        for (size_t j = 0; j < vec_per_label; j++) {
+            this->GenerateRandomAndAddVector(i, id++);
+        }
+    }
+
+    // Verify the index size
+    ASSERT_EQ(VecSimIndex_IndexSize(index), n);
+
+    // Get stored vector data for each label
+    for (size_t i = 0; i < this->labels_count; i++) {
+        auto stored_data = GetStoredVectorsData(i);
+
+        // Should return a vector of vectors for each label
+        ASSERT_EQ(stored_data.size(), vec_per_label);
+
+        // Get the size of the stored data
+        size_t data_size = GetIndexDatasize();
+        for (size_t j = 0; j < vec_per_label; j++) {
+            ASSERT_EQ(stored_data[j].size(), data_size);
+        }
+
+        // Compare the stored data with the original vectors
+        EXPECT_NO_FATAL_FAILURE(this->ValidateVectors(stored_data, i));
+    }
+}
diff --git a/tests/unit/test_int8.cpp b/tests/unit/test_int8.cpp
index aaf0f5d51..4d04ce8a4 100644
--- a/tests/unit/test_int8.cpp
+++ b/tests/unit/test_int8.cpp
@@ -7,6 +7,7 @@
 #include "VecSim/vec_sim_debug.h"
 #include "VecSim/spaces/L2/L2.h"
 #include "VecSim/spaces/IP/IP.h"
+#include "VecSim/spaces/normalize/normalize_naive.h"
 
 class INT8Test : public ::testing::Test {
 protected:
@@ -38,7 +39,9 @@ class INT8Test : public ::testing::Test {
 
     virtual HNSWIndex<int8_t, float> *CastToHNSW() { return CastIndex<HNSWIndex<int8_t, float>>(); }
 
-    void PopulateRandomVector(int8_t *out_vec) { test_utils::populate_int8_vec(out_vec, dim); }
+    void PopulateRandomVector(int8_t *out_vec) {
+        test_utils::populate_int8_vec(out_vec, dim, current_seed++);
+    }
     int PopulateRandomAndAddVector(size_t id, int8_t *out_vec) {
         PopulateRandomVector(out_vec);
         return VecSimIndex_AddVector(index, out_vec, id);
@@ -92,6 +95,7 @@ class INT8Test : public ::testing::Test {
 
     VecSimIndex *index;
     size_t dim;
+    int current_seed{0};
 };
 
 class INT8HNSWTest : public INT8Test {
@@ -173,8 +177,7 @@ class INT8TieredTest : public INT8Test {
     virtual void TearDown() override {}
 
     virtual const void *GetDataByInternalId(idType id) override {
-        return CastIndex<BruteForceIndex<int8_t, float>>(CastToBruteForce())
-            ->getDataByInternalId(id);
+        return CastToBruteForce()->getDataByInternalId(id);
     }
 
     virtual HNSWIndex<int8_t, float> *CastToHNSW() override {
@@ -186,7 +189,7 @@ class INT8TieredTest : public INT8Test {
         return CastIndex<HNSWIndex_Single<int8_t, float>>(CastToHNSW());
     }
 
-    VecSimIndexAbstract<int8_t, float> *CastToBruteForce() {
+    BruteForceIndex<int8_t, float> *CastToBruteForce() {
         auto tiered_index = dynamic_cast<TieredHNSWIndex<int8_t, float> *>(index);
         return tiered_index->getFlatBufferIndex();
     }
@@ -384,7 +387,7 @@ void INT8Test::metrics_test(params_t index_params) {
     double expected_score = 0;
 
     auto verify_res = [&](size_t id, double score, size_t index) {
-        ASSERT_EQ(score, expected_score) << "failed at vector id:" << id;
+        ASSERT_NEAR(score, expected_score, 1e-6f) << "failed at vector id:" << id;
     };
 
     for (size_t i = 0; i < n; i++) {
@@ -875,7 +878,7 @@ void INT8HNSWTest::test_serialization(bool is_multi) {
     int8_t data[n * dim];
 
     for (size_t i = 0; i < n * dim; i += dim) {
-        test_utils::populate_int8_vec(data + i, dim, i);
+        this->PopulateRandomVector(data + i);
     }
 
     for (size_t j = 0; j < n; ++j) {
@@ -993,3 +996,127 @@ TEST_F(INT8TieredTest, getElementNeighbors) {
     HNSWParams params = {.dim = 4, .M = 20};
     get_element_neighbors(params);
 }
+
+/**
+ * Tests int8_t vectors with cosine similarity in a tiered index across three scenarios:
+ * 1. Verifies vector data correctness when stored in the flat buffer
+ * 2. Verifies vector data correctness when inserted directly into HNSW (when flat buffer is full)
+ * 3. Verifies vector data correctness after transfer from flat buffer to HNSW
+ *
+ * For each scenario, the test confirms:
+ * - Vector data matches the expected normalized vector
+ * - The norm is correctly stored at the end of the vector
+ * - Search operations (topK, range, batch) return the expected results
+ */
+
+TEST_F(INT8TieredTest, CosineBlobCorrectness) {
+    // Create TieredHNSW index with cosine metric
+    constexpr size_t dim = 4;
+    HNSWParams hnsw_params = {.dim = dim, .metric = VecSimMetric_Cosine};
+    // Create tiered index with buffer limit set to 1.
+    TieredIndexParams tiered_params = this->generate_tiered_params(hnsw_params, 1, 1);
+    SetUp(tiered_params);
+
+    auto frontend_index = this->CastToBruteForce();
+    auto hnsw_index = this->CastToHNSW();
+
+    int8_t vector[dim];
+    PopulateRandomVector(vector);
+    float vector_norm = spaces::IntegralType_ComputeNorm<int8_t>(vector, dim);
+
+    auto verify_norm = [&](const int8_t *input_vector, float expected_norm) {
+        float vectors_stored_norm = *(reinterpret_cast<const float *>(input_vector + dim));
+        ASSERT_EQ(vectors_stored_norm, expected_norm) << "wrong vector norm";
+    };
+
+    int8_t normalized_vec[dim + sizeof(float)];
+    memcpy(normalized_vec, vector, dim);
+    spaces::integer_normalizeVector<int8_t>(normalized_vec, dim);
+    ASSERT_NO_FATAL_FAILURE(verify_norm(normalized_vec, vector_norm));
+
+    int8_t query[dim + sizeof(float)];
+    PopulateRandomVector(query);
+    float query_norm = spaces::IntegralType_ComputeNorm<int8_t>(query, dim);
+
+    // Calculate the expected score manually.
+    int ip = 0;
+    for (size_t i = 0; i < dim; i++) {
+        ip += vector[i] * query[i];
+    }
+    float expected_score = 1.0 - (float(ip) / (vector_norm * query_norm));
+
+    auto verify_res = [&](size_t label, double score, size_t result_rank) {
+        ASSERT_EQ(score, expected_score) << "label: " << label;
+    };
+
+    // ============== Scenario 1:
+    // blob correctness in the flat buffer
+
+    // Add a vector to the flat buffer.
+    VecSimIndex_AddVector(index, vector, 0);
+    {
+        SCOPED_TRACE("Store in the flat buffer");
+        // Get the stored vector data including the norm
+        auto stored_vec = frontend_index->getStoredVectorDataByLabel(0);
+        const int8_t *stored_vec_data = reinterpret_cast<const int8_t *>(stored_vec.at(0).data());
+        // the vector should be normalized.
+        ASSERT_NO_FATAL_FAILURE(CompareVectors(stored_vec_data, normalized_vec, dim));
+        // The norm should be stored in the last position.
+        verify_norm(stored_vec_data, vector_norm);
+
+        ASSERT_NO_FATAL_FAILURE(runTopKSearchTest(index, query, 1, verify_res));
+        ASSERT_NO_FATAL_FAILURE(runRangeQueryTest(index, query, 2, verify_res, 1, BY_SCORE));
+        VecSimBatchIterator *batchIterator = VecSimBatchIterator_New(index, query, nullptr);
+        ASSERT_NO_FATAL_FAILURE(runBatchIteratorSearchTest(batchIterator, 1, verify_res));
+        VecSimBatchIterator_Free(batchIterator);
+    }
+
+    // ============== Scenario 2:
+    // blob correctness when inserted directly to the hnsw
+
+    // Add another vector and exceed the flat buffer capacity. The vector should be stored directly
+    // in the hnsw index
+    VecSimIndex_AddVector(index, vector, 1);
+    EXPECT_EQ(frontend_index->indexSize(), 1);
+    EXPECT_EQ(hnsw_index->indexSize(), 1);
+    {
+        SCOPED_TRACE("Full buffer; add vector directly to hnsw");
+        auto stored_vec = hnsw_index->getStoredVectorDataByLabel(1);
+        const int8_t *stored_vec_data = reinterpret_cast<const int8_t *>(stored_vec.at(0).data());
+        // the vector should be normalized.
+        ASSERT_NO_FATAL_FAILURE(CompareVectors(stored_vec_data, normalized_vec, dim));
+        // The norm should be stored in the last position.
+        verify_norm(stored_vec_data, vector_norm);
+
+        size_t k = 2;
+        ASSERT_NO_FATAL_FAILURE(runTopKSearchTest(index, query, k, verify_res));
+        ASSERT_NO_FATAL_FAILURE(runRangeQueryTest(index, query, 100, verify_res, k, BY_SCORE));
+        VecSimBatchIterator *batchIterator = VecSimBatchIterator_New(index, query, nullptr);
+        ASSERT_NO_FATAL_FAILURE(runBatchIteratorSearchTest(batchIterator, k, verify_res));
+        VecSimBatchIterator_Free(batchIterator);
+    }
+
+    // ============== Scenario 3:
+    // blob correctness after transferred to the hnsw
+
+    // Move the first vector to the hnsw index.
+    mock_thread_pool.thread_iteration();
+    EXPECT_EQ(frontend_index->indexSize(), 0);
+    EXPECT_EQ(hnsw_index->indexSize(), 2);
+    {
+        SCOPED_TRACE("Execute insertion job");
+        auto stored_vec = hnsw_index->getStoredVectorDataByLabel(0);
+        const int8_t *stored_vec_data = reinterpret_cast<const int8_t *>(stored_vec.at(0).data());
+        // the vector should be normalized.
+        ASSERT_NO_FATAL_FAILURE(CompareVectors(stored_vec_data, normalized_vec, dim));
+        // The norm should be stored in the last position.
+        verify_norm(stored_vec_data, vector_norm);
+
+        size_t k = 2;
+        ASSERT_NO_FATAL_FAILURE(runTopKSearchTest(index, query, k, verify_res));
+        ASSERT_NO_FATAL_FAILURE(runRangeQueryTest(index, query, 100, verify_res, k, BY_SCORE));
+        VecSimBatchIterator *batchIterator = VecSimBatchIterator_New(index, query, nullptr);
+        ASSERT_NO_FATAL_FAILURE(runBatchIteratorSearchTest(batchIterator, k, verify_res));
+        VecSimBatchIterator_Free(batchIterator);
+    }
+}
diff --git a/tests/utils/tests_utils.h b/tests/utils/tests_utils.h
index 063109e5d..0236c3e67 100644
--- a/tests/utils/tests_utils.h
+++ b/tests/utils/tests_utils.h
@@ -32,6 +32,19 @@ static void populate_uint8_vec(uint8_t *v, size_t dim, int seed = 1234) {
     }
 }
 
+// Assuming v is a memory allocation of size dim * sizeof(float)
+static void populate_float_vec(float *v, size_t dim, int seed = 1234) {
+
+    std::mt19937 gen(seed); // Mersenne Twister engine initialized with the fixed seed
+
+    // Define a distribution range for float values between -1.0 and 1.0
+    std::uniform_real_distribution<float> dis(-1.0f, 1.0f);
+
+    for (size_t i = 0; i < dim; i++) {
+        v[i] = dis(gen);
+    }
+}
+
 template <typename datatype>
 float integral_compute_norm(const datatype *vec, size_t dim) {
     return spaces::IntegralType_ComputeNorm<datatype>(vec, dim);