Skip to content

Draft: Real vectors for tests #980

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions tests/congruence_tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
COLLECTION_NAME = "congruence_test_collection"

# dense vectors sizes
text_vector_size = 50
image_vector_size = 100
code_vector_size = 80
text_vector_size = 384
image_vector_size = 384
code_vector_size = 384

# sparse vectors sizes
sparse_text_vector_size = 100
Expand Down
6 changes: 5 additions & 1 deletion tests/congruence_tests/test_discovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,11 @@


def random_vector(dims: int) -> list[float]:
return np.random.random(dims).round(3).tolist()
_text_vectors = np.load("data/queries.npy", allow_pickle=True).astype(np.float32)
_text_vectors_unique = np.unique(_text_vectors, axis=0)
_text_vectors = _text_vectors_unique.tolist()
sampled_vectors = np.random.choice(len(_text_vectors), size=1, replace=False)
return sampled_vectors[0].tolist()


@pytest.fixture(scope="module")
Expand Down
6 changes: 5 additions & 1 deletion tests/congruence_tests/test_recommendation.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,11 @@ class TestSimpleRecommendation:
__test__ = False

def __init__(self):
self.query_image = np.random.random(image_vector_size).tolist()
_text_vectors = np.load("data/queries.npy", allow_pickle=True).astype(np.float32)
_text_vectors_unique = np.unique(_text_vectors, axis=0)
_text_vectors = _text_vectors_unique.tolist()
sampled_vectors = np.random.choice(len(_text_vectors), size=1, replace=False)
self.query_image = sampled_vectors[0].tolist()

@classmethod
def simple_recommend_image(cls, client: QdrantBase) -> list[models.ScoredPoint]:
Expand Down
11 changes: 8 additions & 3 deletions tests/congruence_tests/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,14 @@ class TestSimpleSearcher:
__test__ = False

def __init__(self):
self.query_text = np.random.random(text_vector_size).tolist()
self.query_image = np.random.random(image_vector_size).tolist()
self.query_code = np.random.random(code_vector_size).tolist()
_text_vectors = np.load("data/queries.npy", allow_pickle=True).astype(np.float32)
_text_vectors_unique = np.unique(_text_vectors, axis=0)
_text_vectors = _text_vectors_unique.tolist()
sampled_vectors = np.random.choice(len(_text_vectors), size=3, replace=False)

self.query_text = _text_vectors[sampled_vectors[0]]
self.query_image = _text_vectors[sampled_vectors[1]]
self.query_code = _text_vectors[sampled_vectors[2]]

def simple_search_text(self, client: QdrantBase) -> list[models.ScoredPoint]:
return client.search(
Expand Down
22 changes: 16 additions & 6 deletions tests/fixtures/points.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,24 @@
from qdrant_client.local.sparse import validate_sparse_vector
from tests.fixtures.payload import one_random_payload_please

_text_vectors = np.load("data/text.npy")
_text_vectors_unique = np.unique(_text_vectors, axis=0)
_text_vectors = _text_vectors_unique.tolist()

def random_vectors(
vector_sizes: Union[dict[str, int], int],
) -> models.VectorStruct:

def random_vectors(vector_sizes: Union[dict[str, int], int], idx=None) -> models.VectorStruct:
if isinstance(vector_sizes, int):
return np.random.random(vector_sizes).round(3).tolist()
if idx:
return _text_vectors[idx]
else:
return np.random.random(vector_sizes).round(3).tolist()
elif isinstance(vector_sizes, dict):
vectors = {}
for vector_name, vector_size in vector_sizes.items():
vectors[vector_name] = np.random.random(vector_size).round(3).tolist()
if idx:
vectors[vector_name] = _text_vectors[idx]
else:
vectors[vector_name] = np.random.random(vector_size).round(3).tolist()
return vectors
else:
raise ValueError("vector_sizes must be int or dict")
Expand Down Expand Up @@ -100,7 +108,9 @@ def generate_points(
if skip_vectors and isinstance(vector_sizes, int):
raise ValueError("skip_vectors is not supported for single vector")

sampled_vectors = np.random.choice(len(_text_vectors), size=num_points, replace=False)
points = []

for i in range(num_points):
payload = None
if with_payload:
Expand All @@ -115,7 +125,7 @@ def generate_points(
elif multivector:
vectors = random_multivectors(vector_sizes)
else:
vectors = random_vectors(vector_sizes)
vectors = random_vectors(vector_sizes, sampled_vectors[i])

if skip_vectors:
if random.random() > 0.8:
Expand Down