Skip to content

Draft: Real vectors for tests #980

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions tests/congruence_tests/test_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
init_local,
init_remote,
)

from tests.congruence_tests.test_common import text_vector_size

COLLECTION_NAME = "test_collection"

Expand Down Expand Up @@ -101,7 +101,7 @@ def test_collection_exists():


def test_init_from():
vector_size = 2
vector_size = text_vector_size

remote_client = init_remote()
local_client = init_local()
Expand Down
8 changes: 5 additions & 3 deletions tests/congruence_tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,14 @@
from tests.congruence_tests.settings import TIMEOUT
from tests.fixtures.points import generate_points

from tests.fixtures.points import text_vector_size

COLLECTION_NAME = "congruence_test_collection"

# dense vectors sizes
text_vector_size = 50
image_vector_size = 100
code_vector_size = 80
text_vector_size = text_vector_size # todo 384
image_vector_size = text_vector_size # todo 384
code_vector_size = text_vector_size # todo 384

# sparse vectors sizes
sparse_text_vector_size = 100
Expand Down
3 changes: 2 additions & 1 deletion tests/congruence_tests/test_discovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,13 @@
init_remote,
)
from tests.fixtures.filters import one_random_filter_please
from tests.fixtures.points import sample_queries

secondary_collection_name = "congruence_secondary_collection"


def random_vector(dims: int) -> list[float]:
return np.random.random(dims).round(3).tolist()
return sample_queries(1)[0]


@pytest.fixture(scope="module")
Expand Down
18 changes: 10 additions & 8 deletions tests/congruence_tests/test_group_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
text_vector_size,
)
from tests.fixtures.filters import one_random_filter_please
from tests.fixtures.points import sample_queries

LOOKUP_COLLECTION_NAME = "lookup_collection"

Expand All @@ -26,9 +27,10 @@ class TestGroupSearcher:
__test__ = False

def __init__(self):
self.query_text = np.random.random(text_vector_size).tolist()
self.query_image = np.random.random(image_vector_size).tolist()
self.query_code = np.random.random(code_vector_size).tolist()
queries = sample_queries(3)
self.query_text = queries[0]
self.query_image = queries[1]
self.query_code = queries[2]
self.group_by = "rand_digit"
self.group_size = 1
self.limit = 10
Expand Down Expand Up @@ -217,8 +219,8 @@ def group_by_keys():


def test_group_search_types():
fixture_points = generate_fixtures(vectors_sizes=50)
vectors_config = models.VectorParams(size=50, distance=models.Distance.EUCLID)
fixture_points = generate_fixtures(vectors_sizes=text_vector_size)
vectors_config = models.VectorParams(size=text_vector_size, distance=models.Distance.EUCLID)

searcher = TestGroupSearcher()

Expand All @@ -228,17 +230,17 @@ def test_group_search_types():
remote_client = init_remote()
init_client(remote_client, fixture_points, vectors_config=vectors_config)

query_vector_np = np.random.random(text_vector_size)
query_vector_list = sample_queries(1)[0]
query_vector_np = np.array(query_vector_list)
compare_client_results(
local_client,
remote_client,
searcher.group_search,
query_vector=query_vector_np,
)

query_vector_list = query_vector_np.tolist()
compare_client_results(
local_client, remote_client, searcher.group_search, query_vector=query_vector_list
local_client, remote_client, searcher.group_search, query_vector=query_vector_np
)

delete_fixture_collection(local_client)
Expand Down
16 changes: 6 additions & 10 deletions tests/congruence_tests/test_multivector_discovery_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
init_remote,
multi_vector_config,
)
from tests.fixtures.points import generate_random_multivector
from tests.fixtures.points import sample_random_multivector

secondary_collection_name = "congruence_secondary_collection"

Expand Down Expand Up @@ -126,10 +126,10 @@ def test_context_many_pairs(
http_client,
grpc_client,
):
random_image_multivector_1 = generate_random_multivector(
random_image_multivector_1 = sample_random_multivector(
image_vector_size, random.randint(2, 30)
)
random_image_multivector_2 = generate_random_multivector(
random_image_multivector_2 = sample_random_multivector(
image_vector_size, random.randint(2, 30)
)

Expand Down Expand Up @@ -227,9 +227,7 @@ def test_discover_raw_target(
http_client,
grpc_client,
):
random_image_multivector = generate_random_multivector(
image_vector_size, random.randint(2, 30)
)
random_image_multivector = sample_random_multivector(image_vector_size, random.randint(2, 30))

def f(client: QdrantBase, **kwargs: dict[str, Any]) -> list[models.ScoredPoint]:
return client.query_points(
Expand All @@ -253,9 +251,7 @@ def test_context_raw_positive(
http_client,
grpc_client,
):
random_image_multivector = generate_random_multivector(
image_vector_size, random.randint(2, 30)
)
random_image_multivector = sample_random_multivector(image_vector_size, random.randint(2, 30))

def f(client: QdrantBase, **kwargs: dict[str, Any]) -> list[models.ScoredPoint]:
return client.query_points(
Expand Down Expand Up @@ -416,7 +412,7 @@ def f(client: QdrantBase, **kwargs: dict[str, Any]) -> list[list[models.ScoredPo

def test_query_with_nan():
fixture_points = generate_multivector_fixtures(20)
vector = generate_random_multivector(image_vector_size, random.randint(2, 30))
vector = sample_random_multivector(image_vector_size, random.randint(2, 30))
vector[0][1] = np.nan
using = "multi-image"

Expand Down
18 changes: 12 additions & 6 deletions tests/congruence_tests/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
generate_random_multivector,
)
from tests.utils import read_version
from tests.fixtures.points import sample_queries

SECONDARY_COLLECTION_NAME = "congruence_secondary_collection"

Expand All @@ -46,12 +47,15 @@ def __init__(self):
self.group_size = 3
self.limit = 2 # number of groups

sampled_queries = sample_queries(4)
self.query_image = sampled_queries[0]

# dense query vectors
self.dense_vector_query_text = np.random.random(text_vector_size).tolist()
self.dense_vector_query_text_bis = self.dense_vector_query_text
self.dense_vector_query_text = sampled_queries[1]
self.dense_vector_query_text_bis = sampled_queries[1]
self.dense_vector_query_text_bis[0] += 42.0 # slightly different vector
self.dense_vector_query_image = np.random.random(image_vector_size).tolist()
self.dense_vector_query_code = np.random.random(code_vector_size).tolist()
self.dense_vector_query_image = sampled_queries[2]
self.dense_vector_query_code = sampled_queries[3]

# sparse query vectors
self.sparse_vector_query_text = generate_random_sparse_vector(
Expand Down Expand Up @@ -1458,9 +1462,11 @@ def test_original_input_persistence():
# the reason was that we were replacing point id with a sparse vector, and then, when we needed a dense vector
# from the same point id, we already had point id replaced with a sparse vector
num_points = 50
vectors_config = {"text": models.VectorParams(size=50, distance=models.Distance.COSINE)}
vectors_config = {
"text": models.VectorParams(size=text_vector_size, distance=models.Distance.COSINE)
}
sparse_vectors_config = {"sparse-text": models.SparseVectorParams()}
fixture_points = generate_fixtures(vectors_sizes={"text": 50}, num=num_points)
fixture_points = generate_fixtures(vectors_sizes={"text": text_vector_size}, num=num_points)
sparse_fixture_points = generate_sparse_fixtures(num=num_points)
points = [
models.PointStruct(
Expand Down
26 changes: 13 additions & 13 deletions tests/congruence_tests/test_query_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,11 @@
generate_multivector_fixtures,
multi_vector_config,
)
from tests.fixtures.points import generate_random_sparse_vector, generate_random_multivector
from tests.fixtures.points import (
generate_random_sparse_vector,
generate_random_multivector,
sample_queries,
)


class TestQueryBatchSearcher:
Expand All @@ -39,28 +43,27 @@ def __init__(self):
self.multivector_query_batch_code = []

for _ in range(4):
vecs = sample_queries(4)
self.dense_vector_query_batch_text.append(
models.QueryRequest(
query=np.random.random(text_vector_size).tolist(),
prefetch=models.Prefetch(
query=np.random.random(text_vector_size).tolist(), limit=5, using="text"
),
query=vecs[0],
prefetch=models.Prefetch(query=vecs[1], limit=5, using="text"),
limit=5,
using="text",
with_payload=True,
)
)
self.dense_vector_query_batch_image.append(
models.QueryRequest(
query=np.random.random(image_vector_size).tolist(),
query=vecs[2],
limit=5,
using="image",
with_payload=True,
)
)
self.dense_vector_query_batch_code.append(
models.QueryRequest(
query=np.random.random(code_vector_size).tolist(),
query=vecs[3],
limit=5,
using="code",
with_payload=True,
Expand Down Expand Up @@ -101,16 +104,13 @@ def __init__(self):
)
)

vecs = sample_queries(2)
self.dense_vector_query_batch_text_dbsf = [
models.QueryRequest(
query=models.FusionQuery(fusion=models.Fusion.DBSF),
prefetch=[
models.Prefetch(
query=np.random.random(text_vector_size).tolist(), using="text"
),
models.Prefetch(
query=np.random.random(text_vector_size).tolist(), using="text"
),
models.Prefetch(query=vecs[0], using="text"),
models.Prefetch(query=vecs[1], using="text"),
],
with_payload=True,
)
Expand Down
4 changes: 2 additions & 2 deletions tests/congruence_tests/test_recommendation.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
init_remote,
)
from tests.fixtures.filters import one_random_filter_please
from tests.fixtures.points import sample_queries

secondary_collection_name = "congruence_secondary_collection"

Expand All @@ -24,7 +25,7 @@ class TestSimpleRecommendation:
__test__ = False

def __init__(self):
self.query_image = np.random.random(image_vector_size).tolist()
self.query_image = sample_queries(1)[0]

@classmethod
def simple_recommend_image(cls, client: QdrantBase) -> list[models.ScoredPoint]:
Expand Down Expand Up @@ -291,7 +292,6 @@ def test_recommend_from_another_collection():

def test_simple_recommend() -> None:
fixture_points = generate_fixtures()

secondary_collection_points = generate_fixtures(100)

searcher = TestSimpleRecommendation()
Expand Down
10 changes: 7 additions & 3 deletions tests/congruence_tests/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,19 @@
text_vector_size,
)
from tests.fixtures.filters import one_random_filter_please
from tests.fixtures.points import generate_vectors


class TestSimpleSearcher:
__test__ = False

def __init__(self):
self.query_text = np.random.random(text_vector_size).tolist()
self.query_image = np.random.random(image_vector_size).tolist()
self.query_code = np.random.random(code_vector_size).tolist()
_text_vectors = generate_vectors()
sampled_vectors = np.random.choice(len(_text_vectors), size=3, replace=False)

self.query_text = _text_vectors[sampled_vectors[0]]
self.query_image = _text_vectors[sampled_vectors[1]]
self.query_code = _text_vectors[sampled_vectors[2]]
Comment on lines +26 to +31
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💡 Verification agent

🧩 Analysis chain

Improve test reliability by handling potential missing data files

The test now depends on external data files, which might not be available in all environments. Consider adding error handling for when generate_vectors() fails.

This dependency on external data files could make the tests fail in environments where the data files aren't present. Make sure this is compatible with the test strategy for the project.


🏁 Script executed:

#!/bin/bash
# Check if the test is trying to access the data files
grep -r "data/text.npy" --include="*.py" .
grep -r "data/queries.npy" --include="*.py" .

# Check if these files exist in the repository
fd "text.npy" .
fd "queries.npy" .

Length of output: 349


Handle missing data files in tests/fixtures/points.py

The helper in tests/fixtures/points.py loads

np.load("data/text.npy", …)
np.load("data/queries.npy", …)

but neither file is bundled in the repo. In environments without those .npy files, generate_vectors() will raise FileNotFoundError and break the tests. To improve reliability, please choose one of the following:

  • Wrap each np.load in a try/except that catches FileNotFoundError and calls pytest.skip(…) with a clear message.
  • Add sample .npy files under tests/fixtures/data/ (and update the paths accordingly).
  • Mock or patch np.load in your test suite so that generate_vectors() doesn’t depend on external files.
🤖 Prompt for AI Agents
In tests/congruence_tests/test_search.py around lines 26 to 31, the call to
generate_vectors() depends on external .npy data files that may be missing in
some environments, causing the test to fail. To fix this, add error handling
around generate_vectors() to catch FileNotFoundError and call pytest.skip() with
a clear message explaining the missing data files. This will gracefully skip the
test when the required files are not present, improving test reliability.


def simple_search_text(self, client: QdrantBase) -> list[models.ScoredPoint]:
return client.search(
Expand Down
4 changes: 2 additions & 2 deletions tests/embed_tests/test_local_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@
LateInteractionTextEmbedding,
ImageEmbedding,
)

from tests.congruence_tests.test_common import text_vector_size

COLLECTION_NAME = "inference_collection"
DENSE_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
DENSE_DIM = 384
DENSE_DIM = text_vector_size
SPARSE_MODEL_NAME = "Qdrant/bm42-all-minilm-l6-v2-attentions"
COLBERT_MODEL_NAME = "answerdotai/answerai-colbert-small-v1"
COLBERT_DIM = 96
Expand Down
Loading
Loading