Skip to content

[8.0] [MOD-8198] Introduce INT8 (#560) #577

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions .github/workflows/flow-temp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ on:
push:
branches-ignore: ['**'] # ignore all branches. Comment this line to run your workflow below on every push.
jobs:
jammy:
uses: ./.github/workflows/task-unit-test.yml
with:
container: ubuntu:jammy
run-valgrind: true
# jammy:
# uses: ./.github/workflows/task-unit-test.yml
# with:
# container: ubuntu:jammy
# run-valgrind: true
# alpine3:
# uses: ./.github/workflows/task-unit-test.yml
# with:
Expand All @@ -32,11 +32,11 @@ jobs:
# with:
# container: ubuntu:focal
# run-valgrind: false
# bullseye:
# uses: ./.github/workflows/task-unit-test.yml
# with:
# container: debian:bullseye
# run-valgrind: false
bullseye:
uses: ./.github/workflows/task-unit-test.yml
with:
container: debian:bullseye
run-valgrind: false
# amazonlinux2:
# uses: ./.github/workflows/task-unit-test.yml
# with:
Expand Down
5 changes: 5 additions & 0 deletions cmake/x86_64InstructionFlags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "(x86_64)|(AMD64|amd64)|(^i.86$)")
CHECK_CXX_COMPILER_FLAG(-mavx512vbmi2 CXX_AVX512VBMI2)
CHECK_CXX_COMPILER_FLAG(-mavx512fp16 CXX_AVX512FP16)
CHECK_CXX_COMPILER_FLAG(-mavx512f CXX_AVX512F)
CHECK_CXX_COMPILER_FLAG(-mavx512vnni CXX_AVX512VNNI)
CHECK_CXX_COMPILER_FLAG(-mavx2 CXX_AVX2)
CHECK_CXX_COMPILER_FLAG(-mavx CXX_AVX)
CHECK_CXX_COMPILER_FLAG(-mf16c CXX_F16C)
Expand Down Expand Up @@ -48,6 +49,10 @@ if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "(x86_64)|(AMD64|amd64)|(^i.86$)")
add_compile_definitions(OPT_AVX512_BW_VBMI2)
endif()

if(CXX_AVX512F AND CXX_AVX512BW AND CXX_AVX512VL AND CXX_AVX512VNNI)
add_compile_definitions(OPT_AVX512_F_BW_VL_VNNI)
endif()

if(CXX_F16C AND CXX_FMA AND CXX_AVX)
add_compile_definitions(OPT_F16C)
endif()
Expand Down
2 changes: 2 additions & 0 deletions src/VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTestBasic_switchDeleteModes_Test)

friend class BF16TieredTest;
friend class FP16TieredTest;
friend class INT8TieredTest;
friend class CommonTypeMetricTieredTests_TestDataSizeTieredHNSW_Test;

INDEX_TEST_FRIEND_CLASS(BM_VecSimBasics)
INDEX_TEST_FRIEND_CLASS(BM_VecSimCommon)
29 changes: 17 additions & 12 deletions src/VecSim/index_factories/brute_force_factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,12 @@
static AbstractIndexInitParams NewAbstractInitParams(const VecSimParams *params) {

const BFParams *bfParams = &params->algoParams.bfParams;
size_t dataSize = VecSimParams_GetDataSize(bfParams->type, bfParams->dim, bfParams->metric);
AbstractIndexInitParams abstractInitParams = {.allocator =
VecSimAllocator::newVecsimAllocator(),
.dim = bfParams->dim,
.vecType = bfParams->type,
.dataSize = dataSize,
.metric = bfParams->metric,
.blockSize = bfParams->blockSize,
.multi = bfParams->multi,
Expand All @@ -52,32 +54,30 @@

VecSimIndex *NewIndex(const BFParams *bfparams, const AbstractIndexInitParams &abstractInitParams,
bool is_normalized) {
// If the index metric is Cosine, and is_normalized == true, we will skip normalizing vectors
// and query blobs.
VecSimMetric metric;
if (is_normalized && bfparams->metric == VecSimMetric_Cosine) {
metric = VecSimMetric_IP;
} else {
metric = bfparams->metric;
}

if (bfparams->type == VecSimType_FLOAT32) {
IndexComponents<float, float> indexComponents = CreateIndexComponents<float, float>(
abstractInitParams.allocator, metric, bfparams->dim);
abstractInitParams.allocator, bfparams->metric, bfparams->dim, is_normalized);
return NewIndex_ChooseMultiOrSingle<float>(bfparams, abstractInitParams, indexComponents);
} else if (bfparams->type == VecSimType_FLOAT64) {
IndexComponents<double, double> indexComponents = CreateIndexComponents<double, double>(
abstractInitParams.allocator, metric, bfparams->dim);
abstractInitParams.allocator, bfparams->metric, bfparams->dim, is_normalized);
return NewIndex_ChooseMultiOrSingle<double>(bfparams, abstractInitParams, indexComponents);
} else if (bfparams->type == VecSimType_BFLOAT16) {
IndexComponents<bfloat16, float> indexComponents = CreateIndexComponents<bfloat16, float>(
abstractInitParams.allocator, metric, bfparams->dim);
abstractInitParams.allocator, bfparams->metric, bfparams->dim, is_normalized);
return NewIndex_ChooseMultiOrSingle<bfloat16, float>(bfparams, abstractInitParams,
indexComponents);
} else if (bfparams->type == VecSimType_FLOAT16) {
IndexComponents<float16, float> indexComponents = CreateIndexComponents<float16, float>(
abstractInitParams.allocator, metric, bfparams->dim);
abstractInitParams.allocator, bfparams->metric, bfparams->dim, is_normalized);
return NewIndex_ChooseMultiOrSingle<float16, float>(bfparams, abstractInitParams,
indexComponents);
} else if (bfparams->type == VecSimType_INT8) {
IndexComponents<int8_t, float> indexComponents = CreateIndexComponents<int8_t, float>(
abstractInitParams.allocator, bfparams->metric, bfparams->dim, is_normalized);
return NewIndex_ChooseMultiOrSingle<int8_t, float>(bfparams, abstractInitParams,
indexComponents);
}

// If we got here something is wrong.
Expand Down Expand Up @@ -117,6 +117,11 @@
} else if (params->type == VecSimType_FLOAT16) {
est += EstimateComponentsMemory<float16, float>(params->metric, is_normalized);
est += EstimateInitialSize_ChooseMultiOrSingle<float16, float>(params->multi);
} else if (params->type == VecSimType_INT8) {
est += EstimateComponentsMemory<int8_t, float>(params->metric, is_normalized);
est += EstimateInitialSize_ChooseMultiOrSingle<int8_t, float>(params->multi);
} else {
throw std::invalid_argument("Invalid params->type");

Check warning on line 124 in src/VecSim/index_factories/brute_force_factory.cpp

View check run for this annotation

Codecov / codecov/patch

src/VecSim/index_factories/brute_force_factory.cpp#L124

Added line #L124 was not covered by tests
}

est += sizeof(DataBlocksContainer) + allocations_overhead;
Expand Down
14 changes: 12 additions & 2 deletions src/VecSim/index_factories/components/components_factory.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,24 @@

template <typename DataType, typename DistType>
IndexComponents<DataType, DistType>
CreateIndexComponents(std::shared_ptr<VecSimAllocator> allocator, VecSimMetric metric, size_t dim) {
CreateIndexComponents(std::shared_ptr<VecSimAllocator> allocator, VecSimMetric metric, size_t dim,
bool is_normalized) {
unsigned char alignment = 0;
spaces::dist_func_t<DistType> distFunc =
spaces::GetDistFunc<DataType, DistType>(metric, dim, &alignment);
// Currently we have only one distance calculator implementation
auto indexCalculator = new (allocator) DistanceCalculatorCommon<DistType>(allocator, distFunc);

PreprocessorsContainerParams ppParams = {.metric = metric, .dim = dim, .alignment = alignment};
// If the index metric is Cosine, and is_normalized == true, we will skip normalizing vectors
// and query blobs.
VecSimMetric pp_metric;
if (is_normalized && metric == VecSimMetric_Cosine) {
pp_metric = VecSimMetric_IP;
} else {
pp_metric = metric;
}
PreprocessorsContainerParams ppParams = {
.metric = pp_metric, .dim = dim, .alignment = alignment};
auto preprocessors = CreatePreprocessorsContainer<DataType>(allocator, ppParams);

return {indexCalculator, preprocessors};
Expand Down
51 changes: 27 additions & 24 deletions src/VecSim/index_factories/hnsw_factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,14 @@

static AbstractIndexInitParams NewAbstractInitParams(const VecSimParams *params) {
const HNSWParams *hnswParams = &params->algoParams.hnswParams;

size_t dataSize =
VecSimParams_GetDataSize(hnswParams->type, hnswParams->dim, hnswParams->metric);
AbstractIndexInitParams abstractInitParams = {.allocator =
VecSimAllocator::newVecsimAllocator(),
.dim = hnswParams->dim,
.vecType = hnswParams->type,
.dataSize = dataSize,
.metric = hnswParams->metric,
.blockSize = hnswParams->blockSize,
.multi = hnswParams->multi,
Expand All @@ -48,36 +52,32 @@
const HNSWParams *hnswParams = &params->algoParams.hnswParams;
AbstractIndexInitParams abstractInitParams = NewAbstractInitParams(params);

// If the index metric is Cosine, and is_normalized == true, we will skip normalizing vectors
// and query blobs.
VecSimMetric metric;
if (is_normalized && hnswParams->metric == VecSimMetric_Cosine) {
metric = VecSimMetric_IP;
} else {
metric = hnswParams->metric;
}

if (hnswParams->type == VecSimType_FLOAT32) {
IndexComponents<float, float> indexComponents = CreateIndexComponents<float, float>(
abstractInitParams.allocator, metric, hnswParams->dim);
abstractInitParams.allocator, hnswParams->metric, hnswParams->dim, is_normalized);
return NewIndex_ChooseMultiOrSingle<float>(hnswParams, abstractInitParams, indexComponents);

} else if (hnswParams->type == VecSimType_FLOAT64) {
IndexComponents<double, double> indexComponents = CreateIndexComponents<double, double>(
abstractInitParams.allocator, metric, hnswParams->dim);
abstractInitParams.allocator, hnswParams->metric, hnswParams->dim, is_normalized);
return NewIndex_ChooseMultiOrSingle<double>(hnswParams, abstractInitParams,
indexComponents);

} else if (hnswParams->type == VecSimType_BFLOAT16) {
IndexComponents<bfloat16, float> indexComponents = CreateIndexComponents<bfloat16, float>(
abstractInitParams.allocator, metric, hnswParams->dim);
abstractInitParams.allocator, hnswParams->metric, hnswParams->dim, is_normalized);
return NewIndex_ChooseMultiOrSingle<bfloat16, float>(hnswParams, abstractInitParams,
indexComponents);
} else if (hnswParams->type == VecSimType_FLOAT16) {
IndexComponents<float16, float> indexComponents = CreateIndexComponents<float16, float>(
abstractInitParams.allocator, metric, hnswParams->dim);
abstractInitParams.allocator, hnswParams->metric, hnswParams->dim, is_normalized);
return NewIndex_ChooseMultiOrSingle<float16, float>(hnswParams, abstractInitParams,
indexComponents);
} else if (hnswParams->type == VecSimType_INT8) {
IndexComponents<int8_t, float> indexComponents = CreateIndexComponents<int8_t, float>(
abstractInitParams.allocator, hnswParams->metric, hnswParams->dim, is_normalized);
return NewIndex_ChooseMultiOrSingle<int8_t, float>(hnswParams, abstractInitParams,
indexComponents);
}

// If we got here something is wrong.
Expand Down Expand Up @@ -114,6 +114,11 @@
} else if (params->type == VecSimType_FLOAT16) {
est += EstimateComponentsMemory<float16, float>(params->metric, is_normalized);
est += EstimateInitialSize_ChooseMultiOrSingle<float16, float>(params->multi);
} else if (params->type == VecSimType_INT8) {
est += EstimateComponentsMemory<int8_t, float>(params->metric, is_normalized);
est += EstimateInitialSize_ChooseMultiOrSingle<int8_t, float>(params->multi);
} else {
throw std::invalid_argument("Invalid params->type");

Check warning on line 121 in src/VecSim/index_factories/hnsw_factory.cpp

View check run for this annotation

Codecov / codecov/patch

src/VecSim/index_factories/hnsw_factory.cpp#L121

Added line #L121 was not covered by tests
}
est += sizeof(DataBlocksContainer) + allocations_overhead;

Expand Down Expand Up @@ -205,34 +210,32 @@
VecSimParams vecsimParams = {.algo = VecSimAlgo_HNSWLIB,
.algoParams = {.hnswParams = HNSWParams{params}}};

VecSimMetric metric;
if (is_normalized && params.metric == VecSimMetric_Cosine) {
metric = VecSimMetric_IP;
} else {
metric = params.metric;
}

AbstractIndexInitParams abstractInitParams = NewAbstractInitParams(&vecsimParams);
if (params.type == VecSimType_FLOAT32) {
IndexComponents<float, float> indexComponents = CreateIndexComponents<float, float>(
abstractInitParams.allocator, metric, abstractInitParams.dim);
abstractInitParams.allocator, params.metric, abstractInitParams.dim, is_normalized);
return NewIndex_ChooseMultiOrSingle<float>(input, &params, abstractInitParams,
indexComponents, version);
} else if (params.type == VecSimType_FLOAT64) {
IndexComponents<double, double> indexComponents = CreateIndexComponents<double, double>(
abstractInitParams.allocator, metric, abstractInitParams.dim);
abstractInitParams.allocator, params.metric, abstractInitParams.dim, is_normalized);
return NewIndex_ChooseMultiOrSingle<double>(input, &params, abstractInitParams,
indexComponents, version);
} else if (params.type == VecSimType_BFLOAT16) {
IndexComponents<bfloat16, float> indexComponents = CreateIndexComponents<bfloat16, float>(
abstractInitParams.allocator, metric, abstractInitParams.dim);
abstractInitParams.allocator, params.metric, abstractInitParams.dim, is_normalized);
return NewIndex_ChooseMultiOrSingle<bfloat16, float>(input, &params, abstractInitParams,
indexComponents, version);
} else if (params.type == VecSimType_FLOAT16) {
IndexComponents<float16, float> indexComponents = CreateIndexComponents<float16, float>(
abstractInitParams.allocator, metric, abstractInitParams.dim);
abstractInitParams.allocator, params.metric, abstractInitParams.dim, is_normalized);
return NewIndex_ChooseMultiOrSingle<float16, float>(input, &params, abstractInitParams,
indexComponents, version);
} else if (params.type == VecSimType_INT8) {
IndexComponents<int8_t, float> indexComponents = CreateIndexComponents<int8_t, float>(
abstractInitParams.allocator, params.metric, abstractInitParams.dim, is_normalized);
return NewIndex_ChooseMultiOrSingle<int8_t, float>(input, &params, abstractInitParams,
indexComponents, version);
} else {
auto bad_name = VecSimType_ToString(params.type);
if (bad_name == nullptr) {
Expand Down
9 changes: 9 additions & 0 deletions src/VecSim/index_factories/tiered_factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,12 @@
BFParams bf_params = NewBFParams(params);

std::shared_ptr<VecSimAllocator> flat_allocator = VecSimAllocator::newVecsimAllocator();
size_t dataSize = VecSimParams_GetDataSize(bf_params.type, bf_params.dim, bf_params.metric);

AbstractIndexInitParams abstractInitParams = {.allocator = flat_allocator,
.dim = bf_params.dim,
.vecType = bf_params.type,
.dataSize = dataSize,
.metric = bf_params.metric,
.blockSize = bf_params.blockSize,
.multi = bf_params.multi,
Expand Down Expand Up @@ -80,6 +83,10 @@
est += sizeof(TieredHNSWIndex<bfloat16, float>);
} else if (hnsw_params.type == VecSimType_FLOAT16) {
est += sizeof(TieredHNSWIndex<float16, float>);
} else if (hnsw_params.type == VecSimType_INT8) {
est += sizeof(TieredHNSWIndex<int8_t, float>);
} else {
throw std::invalid_argument("Invalid hnsw_params.type");

Check warning on line 89 in src/VecSim/index_factories/tiered_factory.cpp

View check run for this annotation

Codecov / codecov/patch

src/VecSim/index_factories/tiered_factory.cpp#L89

Added line #L89 was not covered by tests
}

return est;
Expand All @@ -96,6 +103,8 @@
return TieredHNSWFactory::NewIndex<bfloat16, float>(params);
} else if (type == VecSimType_FLOAT16) {
return TieredHNSWFactory::NewIndex<float16, float>(params);
} else if (type == VecSimType_INT8) {
return TieredHNSWFactory::NewIndex<int8_t, float>(params);
}
return nullptr; // Invalid type.
}
Expand Down
6 changes: 6 additions & 0 deletions src/VecSim/spaces/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "(x86_64)|(AMD64|amd64)|(^i.86$)")
list(APPEND OPTIMIZATIONS functions/AVX512F.cpp)
endif()

if(CXX_AVX512F AND CXX_AVX512BW AND CXX_AVX512VL AND CXX_AVX512VNNI)
message("Building with AVX512F, AVX512BW, AVX512VL and AVX512VNNI")
set_source_files_properties(functions/AVX512F_BW_VL_VNNI.cpp PROPERTIES COMPILE_FLAGS "-mavx512f -mavx512bw -mavx512vl -mavx512vnni")
list(APPEND OPTIMIZATIONS functions/AVX512F_BW_VL_VNNI.cpp)
endif()

if(CXX_AVX2)
message("Building with AVX2")
set_source_files_properties(functions/AVX2.cpp PROPERTIES COMPILE_FLAGS -mavx2)
Expand Down
24 changes: 24 additions & 0 deletions src/VecSim/spaces/IP/IP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,27 @@ float FP16_InnerProduct(const void *pVect1, const void *pVect2, size_t dimension
}
return 1.0f - res;
}

static inline int INT8_InnerProductImp(const void *pVect1v, const void *pVect2v, size_t dimension) {
int8_t *pVect1 = (int8_t *)pVect1v;
int8_t *pVect2 = (int8_t *)pVect2v;

int res = 0;
for (size_t i = 0; i < dimension; i++) {
res += pVect1[i] * pVect2[i];
}
return res;
}

float INT8_InnerProduct(const void *pVect1v, const void *pVect2v, size_t dimension) {
return 1 - INT8_InnerProductImp(pVect1v, pVect2v, dimension);
}

float INT8_Cosine(const void *pVect1v, const void *pVect2v, size_t dimension) {
// We expect the vectors' norm to be stored at the end of the vector.
float norm_v1 =
*reinterpret_cast<const float *>(static_cast<const int8_t *>(pVect1v) + dimension);
float norm_v2 =
*reinterpret_cast<const float *>(static_cast<const int8_t *>(pVect2v) + dimension);
return 1.0f - float(INT8_InnerProductImp(pVect1v, pVect2v, dimension)) / (norm_v1 * norm_v2);
}
3 changes: 3 additions & 0 deletions src/VecSim/spaces/IP/IP.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,6 @@ float FP16_InnerProduct(const void *pVect1, const void *pVect2, size_t dimension

float BF16_InnerProduct_LittleEndian(const void *pVect1v, const void *pVect2v, size_t dimension);
float BF16_InnerProduct_BigEndian(const void *pVect1v, const void *pVect2v, size_t dimension);

float INT8_InnerProduct(const void *pVect1, const void *pVect2, size_t dimension);
float INT8_Cosine(const void *pVect1, const void *pVect2, size_t dimension);
Loading
Loading