Skip to content

Commit 4782522

Browse files
committed
Groundtruth computing
1 parent d2ac0f0 commit 4782522

File tree

4 files changed

+131
-37
lines changed

4 files changed

+131
-37
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,5 @@ build
44
data/gist.tar.gz
55
data/sift
66
test.txt
7-
test.vig
7+
test.vig
8+
data/Dummy/dummy-groundtruth.bin

include/Algorithms/GreedySearch.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -115,9 +115,6 @@ struct EuclideanDistanceOrder {
115115
*
116116
* @return Pair of sets: the first set contains the k nearest nodes, and the second set contains all visited nodes
117117
*/
118-
119-
/*
120-
*/
121118
template <typename graph_t>
122119
std::pair<std::set<graph_t>, std::set<graph_t>>
123120
GreedySearch(const Graph<graph_t>& G, const GraphNode<graph_t>& s, const graph_t& xq, unsigned int k, unsigned int L) {

main2.cpp

Lines changed: 129 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -4,53 +4,149 @@
44
#include "include/DataStructures/DataVector/BQDataVectors.h"
55
#include "include/read_data.h"
66
#include "include/distance.h"
7+
#include <fstream>
78

9+
/**
10+
* @brief Compute the groundtruth for a set of base and query vectors.
11+
*
12+
* This function computes the groundtruth for a set of base and query vectors by calculating the Euclidean distance
13+
* between each query vector and all base vectors. The function supports only two query types: 0 and 1. For query type 0,
14+
* the function computes the distances between the query vector and all base vectors. For query type 1, the function computes
15+
* the distances between the query vector and the base vectors with the same C value.
16+
*
17+
* @param base_vectors A vector of BaseDataVector objects representing the base vectors
18+
* @param query_vectors A vector of QueryDataVector objects representing the query vectors
19+
* @param maxDistances The maximum number of distances to compute for each query vector
20+
*
21+
* @return A 2D vector containing the computed distances for each query vector
22+
*/
23+
std::vector<std::vector<float>> computeGroundtruth(
24+
const std::vector<BaseDataVector<float>> base_vectors, const std::vector<QueryDataVector<float>> query_vectors, const unsigned int maxDistances) {
825

9-
int main(int argc, char* argv[]) {
26+
// Allocate memory for the distance vector
27+
std::vector<std::vector<float>> distances(query_vectors.size());
1028

11-
std::vector<BaseDataVector<float>> base_vectors = ReadFilteredBaseVectorFile("data/Dummy/dummy-data.bin");
12-
std::vector<QueryDataVector<float>> query_vectors = ReadFilteredQueryVectorFile("data/Dummy/dummy-queries.bin");
29+
// Compute the distances between the query vectors and the base vectors (with the same filter)
30+
// If no filter provided then compute to the whole graph
31+
for (auto query : query_vectors) {
1332

14-
// Allocate memory for the distances between the query and base vectors
15-
double** distances = new double*[query_vectors.size()];
16-
unsigned int** baseVectorIndeces = new unsigned int*[query_vectors.size()];
17-
for (unsigned int i = 0; i < query_vectors.size(); i++) {
18-
distances[i] = new double[base_vectors.size()];
19-
baseVectorIndeces[i] = new unsigned int[base_vectors.size()];
20-
}
33+
// Compute the distances between the query vector and all base vectors
34+
if (query.getQueryType() == 0) {
35+
for (auto base : base_vectors) {
36+
distances[query.getIndex()].push_back(euclideanDistance(base, query));
37+
}
38+
}
2139

22-
for (unsigned int i = 0; i < query_vectors.size(); i++) {
23-
for (unsigned int j = 0; j < base_vectors.size(); j++) {
24-
float distance = euclideanDistance(query_vectors[i], base_vectors[j]);
25-
distances[i][j] = distance;
26-
baseVectorIndeces[i][j] = j;
40+
// If the filter type is C_EQUALS_v then compute the distances between the query vector
41+
// and the base vectors with the same C value
42+
else if (query.getQueryType() == 1) {
43+
for (auto base : base_vectors) {
44+
if (base.getC() == query.getV()) {
45+
distances[query.getIndex()].push_back(euclideanDistance(base, query));
46+
}
47+
}
2748
}
49+
// IMPORTANT: Queries with filter type 2 and 3 are not supported in this version of the application
2850
}
2951

30-
// Sort the distances and keep the indeces of the base vectors
52+
// Sort the distances for each query vector
3153
for (unsigned int i = 0; i < query_vectors.size(); i++) {
32-
std::sort(baseVectorIndeces[i], baseVectorIndeces[i] + base_vectors.size(),
33-
[&distances, i](unsigned int a, unsigned int b) {
34-
return distances[i][a] < distances[i][b];
35-
}
36-
);
54+
std::sort(distances[i].begin(), distances[i].end());
3755
}
3856

39-
// Print the indeces of the base vectors with the smallest distances to the query vectors
40-
for (unsigned int i = 0; i < query_vectors.size(); i++) {
41-
std::cout << "Query " << i << ":\n";
42-
for (unsigned int j = 0; j < 5; j++) {
43-
std::cout << "Base vector index: " << baseVectorIndeces[i][j] << ", distance: " << distances[i][baseVectorIndeces[i][j]] << std::endl;
44-
}
57+
// Return the first `maxDistances` distances for each query vector
58+
for (auto& query_distances : distances) {
59+
query_distances.resize(maxDistances);
4560
}
4661

47-
// Deallocate memory for the distances
48-
for (unsigned int i = 0; i < query_vectors.size(); i++) {
49-
delete[] distances[i];
50-
delete[] baseVectorIndeces[i];
62+
return distances;
63+
64+
}
65+
66+
/**
67+
* @brief Save the computed groundtruth distances to a binary file.
68+
*
69+
* This function saves the computed groundtruth distances to a binary file. The file format consists of the following:
70+
* - The number of query vectors (4 bytes)
71+
* - For each query vector:
72+
* - The number of distances (4 bytes)
73+
* - The computed distances (4 bytes each)
74+
*
75+
* @param distances A 2D vector containing the computed distances for each query vector
76+
* @param filename The name of the output file to save the distances
77+
*/
78+
void saveGroundtruthToFile(const std::vector<std::vector<float>>& distances, const std::string& filename) {
79+
std::ofstream file(filename, std::ios::binary);
80+
81+
if (!file.is_open()) {
82+
std::cerr << "Error opening file: " << filename << std::endl;
83+
return;
84+
}
85+
86+
// Write the number of query vectors to the file
87+
uint32_t num_queries = distances.size();
88+
file.write(reinterpret_cast<const char*>(&num_queries), sizeof(num_queries));
89+
90+
// Write the distances for each query vector
91+
for (const auto& query_distances : distances) {
92+
uint32_t num_distances = query_distances.size();
93+
file.write(reinterpret_cast<const char*>(&num_distances), sizeof(num_distances));
94+
file.write(reinterpret_cast<const char*>(query_distances.data()), num_distances * sizeof(float));
95+
}
96+
97+
file.close();
98+
}
99+
100+
/**
101+
* @brief Read the groundtruth distances from a binary file.
102+
*
103+
* This function reads the groundtruth distances from a binary file. The file format consists of the following:
104+
* - The number of query vectors (4 bytes)
105+
* - For each query vector:
106+
* - The number of distances (4 bytes)
107+
* - The computed distances (4 bytes each)
108+
*
109+
* @param filename The name of the input file to read the distances from
110+
*
111+
* @return A 2D vector containing the read distances for each query vector
112+
*/
113+
std::vector<std::vector<float>> readGroundtruthFromFile(const std::string& filename) {
114+
std::ifstream file(filename, std::ios::binary);
115+
116+
if (!file.is_open()) {
117+
std::cerr << "Error opening file: " << filename << std::endl;
118+
return {};
119+
}
120+
121+
// Read the number of query vectors from the file
122+
uint32_t num_queries;
123+
file.read(reinterpret_cast<char*>(&num_queries), sizeof(num_queries));
124+
125+
std::vector<std::vector<float>> distances(num_queries);
126+
127+
// Read the distances for each query vector
128+
for (auto& query_distances : distances) {
129+
uint32_t num_distances;
130+
file.read(reinterpret_cast<char*>(&num_distances), sizeof(num_distances));
131+
query_distances.resize(num_distances);
132+
file.read(reinterpret_cast<char*>(query_distances.data()), num_distances * sizeof(float));
51133
}
52-
delete[] distances;
53-
delete[] baseVectorIndeces;
134+
135+
file.close();
136+
return distances;
137+
}
138+
139+
int main(int argc, char* argv[]) {
140+
141+
std::vector<BaseDataVector<float>> base_vectors = ReadFilteredBaseVectorFile("data/Dummy/dummy-data.bin");
142+
std::vector<QueryDataVector<float>> query_vectors = ReadFilteredQueryVectorFile("data/Dummy/dummy-queries.bin");
143+
144+
// Compute the distance vector, and save the computed distances to a file
145+
std::vector<std::vector<float>> distances = computeGroundtruth(base_vectors, query_vectors, 1000);
146+
saveGroundtruthToFile(distances, "data/Dummy/dummy-groundtruth.bin");
147+
148+
// Example usage of readGroundtruthFromFile
149+
std::vector<std::vector<float>> read_distances = readGroundtruthFromFile("data/Dummy/dummy-groundtruth.bin");
54150

55151
return 0;
56152

sample_vectors.bin

-16 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)