diff --git a/Makefile b/Makefile index 5e287dc..6a52e63 100644 --- a/Makefile +++ b/Makefile @@ -67,13 +67,13 @@ FLAGS += $(DEPFLAGS) # Execution Rules create_simple_via: - ./bin/main --create -index-type 'simple' -base-file 'data/siftsmall/siftsmall_base.fvecs' -L 120 -R 12 -alpha 1.0 -save 'simple_index.bin' + ./bin/main --create -index-type 'simple' -base-file 'data/siftsmall/siftsmall_base.fvecs' -L 120 -R 12 -alpha 1.0 -save 'simple_index.bin' -distance-save 'matrix' -distance-threads 1 create_filtered_via: - ./bin/main --create -index-type 'filtered' -base-file 'data/Dummy/dummy-data.bin' -L 120 -R 12 -alpha 1.0 -save 'filtered_index.bin' + ./bin/main --create -index-type 'filtered' -base-file 'data/Dummy/dummy-data.bin' -L 120 -R 12 -alpha 1.0 -save 'filtered_index.bin' -distance-save 'matrix' -distance-threads 1 create_stiched_via: - ./bin/main --create -index-type 'stiched' -base-file 'data/Dummy/dummy-data.bin' -L-small 150 -R-small 12 -R-stiched 20 -alpha 1.0 -save 'stiched_index.bin' + ./bin/main --create -index-type 'stiched' -base-file 'data/Dummy/dummy-data.bin' -L-small 150 -R-small 12 -R-stiched 20 -alpha 1.0 -save 'stiched_index.bin' -distance-threads 1 -computing-threads 1 compute_groundtruth: ./bin/main --compute-gt -base-file 'data/Dummy/dummy-data.bin' -query-file 'data/Dummy/dummy-queries.bin' -gt-file 'data/Dummy/dummy-groundtruth.bin' diff --git a/app/main.cpp b/app/main.cpp index c362a82..141763f 100644 --- a/app/main.cpp +++ b/app/main.cpp @@ -113,16 +113,21 @@ void Create(std::unordered_map args) { using BaseVectorVector = std::vector>; using BaseVectors = std::vector>; - std::string indexType, baseFile, L, R, alpha, outputFile, connectionMode; + std::string indexType, baseFile, L, R, alpha, outputFile, connectionMode, distanceSaveMethod; std::string L_small, R_small, R_stiched; bool save = false; bool leaveEmpty = false; int distanceThreads = 1; // Default value + int computingThreads = 1; // Default value + + std::vector validArguments = {"-index-type", "-base-file", "-L", "-L-small", "-R", "-R-small", "-R-stiched", "-alpha", "-save", "-random-edges", "-connection-mode", "-distance-threads", "-distance-save"}; + if (args["-index-type"] == "stiched") { + validArguments.push_back("-computing-threads"); + } - std::vector validArguments = {"-index-type", "-base-file", "-L", "-L-small", "-R", "-R-small", "-R-stiched", "-alpha", "-save", "-random-edges", "-connection-mode", "-distance-threads"}; for (auto arg : args) { if (std::find(validArguments.begin(), validArguments.end(), arg.first) == validArguments.end()) { - throw std::invalid_argument("Error: Invalid argument: " + arg.first + ". Valid arguments are: -index-type, -base-file, -L, -L-small, -R, -R-small, -R-stiched, -alpha, -save, -connection-mode, -distance-threads"); + throw std::invalid_argument("Error: Invalid argument: " + arg.first + ". Valid arguments are: -index-type, -base-file, -L, -L-small, -R, -R-small, -R-stiched, -alpha, -save, -connection-mode, -distance-threads, -distance-save"); } } @@ -146,6 +151,8 @@ void Create(std::unordered_map args) { } } else if (indexType == "stiched") { + validArguments.push_back("-computing-threads"); + if (args.find("-L-small") == args.end()) { throw std::invalid_argument("Error: Missing required argument: -L-small"); } else { @@ -163,6 +170,10 @@ void Create(std::unordered_map args) { } else { R_stiched = args["-R-stiched"]; } + + if (args.find("-computing-threads") != args.end()) { + computingThreads = std::stoi(args["-computing-threads"]); + } } else { throw std::invalid_argument("Error: Invalid index type: " + indexType + ". Supported index types are: simple, filtered, stiched"); } @@ -196,7 +207,19 @@ void Create(std::unordered_map args) { } } + if (args.find("-distance-save") != args.end()) { + distanceSaveMethod = args["-distance-save"]; + if (distanceSaveMethod != "none" && distanceSaveMethod != "matrix") { + throw std::invalid_argument("Error: Invalid value for -distance-save. Valid values are: none, matrix"); + } + } else { + distanceSaveMethod = "none"; // Default value + } + if (args.find("-distance-threads") != args.end()) { + if (distanceSaveMethod != "matrix") { + throw std::invalid_argument("Error: -distance-threads can only be used if -distance-save is set to 'matrix'"); + } distanceThreads = std::stoi(args["-distance-threads"]); } @@ -206,9 +229,16 @@ void Create(std::unordered_map args) { std::cerr << "Error reading base file" << std::endl; return; } + + DISTANCE_SAVE_METHOD distanceSaveMethodEnum = NONE; + if (distanceSaveMethod == "none") { + distanceSaveMethodEnum = NONE; + } else if (distanceSaveMethod == "matrix") { + distanceSaveMethodEnum = MATRIX; + } VamanaIndex> vamanaIndex = VamanaIndex>(); - vamanaIndex.createGraph(base_vectors, std::stof(alpha), std::stoi(L), std::stoi(R), distanceThreads, true); + vamanaIndex.createGraph(base_vectors, std::stof(alpha), std::stoi(L), std::stoi(R), distanceSaveMethodEnum, distanceThreads, true); if (save) { if (!vamanaIndex.saveGraph(outputFile)) { @@ -226,9 +256,16 @@ void Create(std::unordered_map args) { filters.insert(filter); } + DISTANCE_SAVE_METHOD distanceSaveMethodEnum = NONE; + if (distanceSaveMethod == "none") { + distanceSaveMethodEnum = NONE; + } else if (distanceSaveMethod == "matrix") { + distanceSaveMethodEnum = MATRIX; + } + if (indexType == "filtered") { FilteredVamanaIndex> index(filters); - index.createGraph(base_vectors, std::stoi(alpha), std::stoi(L), std::stoi(R), distanceThreads, true, leaveEmpty); + index.createGraph(base_vectors, std::stoi(alpha), std::stoi(L), std::stoi(R), distanceSaveMethodEnum, distanceThreads, true, leaveEmpty); if (save) { index.saveGraph(outputFile); @@ -236,7 +273,7 @@ void Create(std::unordered_map args) { } } else if (indexType == "stiched") { StichedVamanaIndex> index(filters); - index.createGraph(base_vectors, std::stof(alpha), std::stoi(L_small), std::stoi(R_small), std::stoi(R_stiched), distanceThreads, true, leaveEmpty); + index.createGraph(base_vectors, std::stof(alpha), std::stoi(L_small), std::stoi(R_small), std::stoi(R_stiched), distanceSaveMethodEnum, distanceThreads, computingThreads, true, leaveEmpty); if (save) { index.saveGraph(outputFile); @@ -277,7 +314,7 @@ void TestSimple(std::unordered_map args) { GraphNode> s = vamanaIndex.findMedoid(vamanaIndex.getGraph(), 1000); auto start = std::chrono::high_resolution_clock::now(); - SimpleGreedyResult greedyResult = GreedySearch(vamanaIndex, s, query_vectors.at(std::stoi(queryNumber)), std::stoi(k), std::stoi(L), TEST); + SimpleGreedyResult greedyResult = GreedySearch(vamanaIndex, s, query_vectors.at(std::stoi(queryNumber)), std::stoi(k), std::stoi(L), NONE); auto end = std::chrono::high_resolution_clock::now(); std::chrono::duration elapsed = end - start; @@ -363,7 +400,7 @@ void TestFilteredOrStiched(std::unordered_map args) { } auto start = std::chrono::high_resolution_clock::now(); - FilteredGreedyResult greedyResult = FilteredGreedySearch(index, start_nodes, xq, std::stoi(k), std::stoi(L), Fx, TEST); + FilteredGreedyResult greedyResult = FilteredGreedySearch(index, start_nodes, xq, std::stoi(k), std::stoi(L), Fx, NONE); auto end = std::chrono::high_resolution_clock::now(); std::chrono::duration elapsed = end - start; diff --git a/include/FilteredVamanaIndex.h b/include/FilteredVamanaIndex.h index c813f68..eddf9da 100644 --- a/include/FilteredVamanaIndex.h +++ b/include/FilteredVamanaIndex.h @@ -60,7 +60,16 @@ template class FilteredVamanaIndex : public VamanaIndex& P, const float& alpha, const unsigned int L, const unsigned int R, unsigned int distance_threads = 1, bool visualized = true, bool empty = true); + void createGraph( + const std::vector& P, + const float& alpha, + const unsigned int L, + const unsigned int R, + const DISTANCE_SAVE_METHOD distanceSaveMethod = NONE, + unsigned int distance_threads = 1, + bool visualized = true, + bool empty = true + ); /** * @brief Load a graph from a file. Specifically this method is used to receive the contents of a Vamana Index Graph diff --git a/include/GreedySearch.h b/include/GreedySearch.h index b6e41c5..e41e179 100644 --- a/include/GreedySearch.h +++ b/include/GreedySearch.h @@ -28,6 +28,7 @@ enum EXEC_MODE { TEST = 1 }; + template class VamanaIndex; template class FilteredVamanaIndex; @@ -52,7 +53,7 @@ template std::pair, std:: const query_t& xq, unsigned int k, unsigned int L, - const EXEC_MODE execMode = CREATE + const DISTANCE_SAVE_METHOD distanceSaveMethod = NONE ); /** @@ -82,7 +83,7 @@ template std::pair, std:: const unsigned int k, const unsigned int L, const std::vector& queryFilters, - const EXEC_MODE execMode = CREATE + const DISTANCE_SAVE_METHOD distanceSaveMethod = NONE ); diff --git a/include/RobustPrune.h b/include/RobustPrune.h index b37f6eb..cf33591 100644 --- a/include/RobustPrune.h +++ b/include/RobustPrune.h @@ -3,6 +3,7 @@ #include "DataVector.h" #include "BQDataVectors.h" #include "VamanaIndex.h" +#include "distance.h" template class VamanaIndex; template class FilteredVamanaIndex; @@ -27,8 +28,14 @@ template class FilteredVamanaIndex; * 4. Removes nodes from `V` that do not satisfy the distance threshold defined by `alpha`. * 5. Stops when the number of neighbors of `p_node` reaches `R` or `V` is empty. */ -template -void RobustPrune(VamanaIndex& index, GraphNode& p_node, std::set& V, float alpha, int R); +template void RobustPrune( + VamanaIndex& index, + GraphNode& p_node, + std::set& V, + float alpha, + int R, + const DISTANCE_SAVE_METHOD distanceSaveMethod +); /** * @brief Prunes the neighbors of a given node in a graph based on a robust pruning algorithm with filtering. @@ -45,4 +52,11 @@ void RobustPrune(VamanaIndex& index, GraphNode& p_node, std::s * @param R An integer specifying the maximum number of neighbors to retain. */ template -void FilteredRobustPrune(FilteredVamanaIndex& index, GraphNode& p_node,std::set& V, float alpha,int R); +void FilteredRobustPrune( + FilteredVamanaIndex& index, + GraphNode& p_node, + std::set& V, + float alpha, + int R, + const DISTANCE_SAVE_METHOD distanceSaveMethod +); diff --git a/include/StichedVamanaIndex.h b/include/StichedVamanaIndex.h index 343c0a7..5be1853 100644 --- a/include/StichedVamanaIndex.h +++ b/include/StichedVamanaIndex.h @@ -13,14 +13,16 @@ template class StichedVamanaIndex : public FilteredVamanaInd /** * @brief Default constructor for the StichedVamanaIndex class. */ - StichedVamanaIndex() : FilteredVamanaIndex() {} + StichedVamanaIndex() + : FilteredVamanaIndex() {} /** * @brief Constructor for the StichedVamanaIndex class with filters. * * @param filters A set of CategoricalAttributeFilter to initialize the index with. */ - StichedVamanaIndex(std::set filters) : FilteredVamanaIndex(filters) {} + StichedVamanaIndex(std::set filters) + : FilteredVamanaIndex(filters) {} /** * @brief Create the graph with the given parameters. @@ -30,8 +32,18 @@ template class StichedVamanaIndex : public FilteredVamanaInd * @param L An unsigned int parameter. * @param R An unsigned int parameter. */ - void createGraph(const std::vector& P, const float& alpha, const unsigned int L_small, - const unsigned int R_small, const unsigned int R_stiched, unsigned int distance_threads, bool visualized = true, bool empty = true); + void createGraph( + const std::vector& P, + const float& alpha, + const unsigned int L_small, + const unsigned int R_small, + const unsigned int R_stiched, + const DISTANCE_SAVE_METHOD distanceSaveMethod, + unsigned int distance_threads, + unsigned int compute_threads = 500, + bool visualized = true, + bool empty = true + ); }; diff --git a/include/VamanaIndex.h b/include/VamanaIndex.h index df3493f..7ef4cdd 100644 --- a/include/VamanaIndex.h +++ b/include/VamanaIndex.h @@ -103,7 +103,16 @@ template class VamanaIndex { * @param R the parameter R * */ - void createGraph(const std::vector& P, const float& alpha, const unsigned int L, const unsigned int& R, unsigned int distance_threads = 1, bool visualize = true, double** distanceMatrix = nullptr); + void createGraph( + const std::vector& P, + const float& alpha, + const unsigned int L, + const unsigned int& R, + const DISTANCE_SAVE_METHOD distanceSaveMethod = NONE, + unsigned int distance_threads = 1, + bool visualize = true, + double** distanceMatrix = nullptr + ); /** * @brief Saves a specific graph into a file. Specifically this method is used to save the contents of a Vamana diff --git a/include/distance.h b/include/distance.h index 155094e..a452ad0 100644 --- a/include/distance.h +++ b/include/distance.h @@ -7,6 +7,11 @@ #include #include "DataVector.h" +enum DISTANCE_SAVE_METHOD { + NONE = 0, + MATRIX = 1, +}; + /** * @brief Comparator structure for ordering elements by Euclidean distance. diff --git a/src/Graphics/ProgressBar.cpp b/src/Graphics/ProgressBar.cpp index 4645325..bb40b26 100644 --- a/src/Graphics/ProgressBar.cpp +++ b/src/Graphics/ProgressBar.cpp @@ -15,7 +15,7 @@ bool isUtf8Supported() { } /** - * @brief Function to display a progress bar with a percentage. + * @brief Function to display a progress bar with a percentage and a loading animation. * * Loading Symbol: => * @@ -30,19 +30,22 @@ void displayProgressBar( const int current, const int total, const std::string& message, const std::chrono::steady_clock::time_point& startTime, const unsigned int barWidth) { static bool utf8Supported = isUtf8Supported(); + static const char loadingSymbols[] = {'-', '\\', '|', '/'}; + static int loadingIndex = 0; + static int callCounter = 0; // Counter to slow down the animation const std::string horizontalLineSymbol = "\u2500"; const std::string verticalLineSymbol = "\u2502"; const std::string crossSymbol = "\u253C"; if (firstTime) { - std::cout << brightMagenta << "Action" << std::setw(22) << reset << " " << verticalLineSymbol << " "; + std::cout << brightMagenta << "Action" << std::setw(24) << reset << " " << verticalLineSymbol << " "; std::cout << brightMagenta << "Progress" << std::setw(36) << reset << " " << verticalLineSymbol << " "; std::cout << brightMagenta << "Time Remaining" << reset << " | "; std::cout << brightMagenta << "Time Elapsed" << reset << std::endl; - for (unsigned int i = 0; i < 25; i++) { std::cout << horizontalLineSymbol; } std::cout << crossSymbol; + for (unsigned int i = 0; i < 27; i++) { std::cout << horizontalLineSymbol; } std::cout << crossSymbol; for (unsigned int i = 0; i < 42; i++) { std::cout << horizontalLineSymbol; } std::cout << crossSymbol; for (unsigned int i = 0; i < 16; i++) { std::cout << horizontalLineSymbol; } std::cout << crossSymbol; for (unsigned int i = 0; i < 15; i++) { std::cout << horizontalLineSymbol; } @@ -67,8 +70,18 @@ void displayProgressBar( int minutes = remainingSeconds / 60; int seconds = remainingSeconds % 60; - // Display action message - std::cout << brightYellow << std::setw(24) << std::setfill(' ') << std::left << message << reset; + // Display action message with loading animation + if (current > 0 && current < total) { + std::cout << brightYellow << std::setw(24) << std::setfill(' ') << std::left << message; + if (callCounter % 8 == 0) { // Update loading symbol every 8 calls + loadingIndex++; + } + std::cout << " " << loadingSymbols[loadingIndex % 4] << reset; + } + else if (current == total) { + std::cout << brightGreen << std::setw(24) << std::setfill(' ') << std::left << message; + std::cout << " " << tickSymbol << reset; + } // Display progress bar std::cout << " " << verticalLineSymbol << " "; @@ -104,7 +117,7 @@ void displayProgressBar( } else if (current == total) { std::cout << " " << verticalLineSymbol << " " << yellow; - std::cout << brightGreen << "Done " << tickSymbol << std::setw(12) << std::setfill(' ') << reset; + std::cout << brightGreen << "Done" << std::setw(14) << std::setfill(' ') << reset; } // Display elapsed time @@ -119,6 +132,7 @@ void displayProgressBar( std::cout << "\r"; // Return the cursor to the start of the line std::cout.flush(); + callCounter++; } /** diff --git a/src/VIA/Algorithms/FilteredVamanaIndex.cpp b/src/VIA/Algorithms/FilteredVamanaIndex.cpp index 1e1d405..4bc3f3e 100644 --- a/src/VIA/Algorithms/FilteredVamanaIndex.cpp +++ b/src/VIA/Algorithms/FilteredVamanaIndex.cpp @@ -79,7 +79,8 @@ FilteredVamanaIndex::getNodesWithCategoricalValueFilter(const Categori */ template void FilteredVamanaIndex::createGraph( - const std::vector& P, const float& alpha, const unsigned int L, const unsigned int R, unsigned int distance_threads, bool visualized, bool empty) { + const std::vector& P, const float& alpha, const unsigned int L, const unsigned int R, const DISTANCE_SAVE_METHOD distanceSaveMethod, + unsigned int distance_threads, bool visualized, bool empty) { using Filter = CategoricalAttributeFilter; using GreedyResult = std::pair, std::set>; @@ -87,14 +88,18 @@ void FilteredVamanaIndex::createGraph( // Initialize graph memory unsigned int n = P.size(); this->P = P; - this->distanceMatrix = new double*[n]; - for (unsigned int i = 0; i < n; i++) { - this->distanceMatrix[i] = new double[n]; + + // Compute the distances between the points if it is specified to save the distances in a matrix + if (distanceSaveMethod == MATRIX) { + this->distanceMatrix = new double*[n]; + for (unsigned int i = 0; i < n; i++) { + this->distanceMatrix[i] = new double[n]; + } + this->computeDistances(true, distance_threads); } - this->computeDistances(true, distance_threads); - this->G.setNodesCount(n); // Initialize G to an empty graph and get the medoid node + this->G.setNodesCount(n); this->fillGraphNodes(); // Fill graph with random edges if required @@ -129,7 +134,7 @@ void FilteredVamanaIndex::createGraph( std::vector queryFilters; queryFilters.push_back(F_x_sigma_i); - GreedyResult greedyResult = FilteredGreedySearch(*this, S_F_x_sigma_i, this->P[sigma[i]], 0, L, queryFilters); + GreedyResult greedyResult = FilteredGreedySearch(*this, S_F_x_sigma_i, this->P[sigma[i]], 0, L, queryFilters, distanceSaveMethod); // Construct the V_F_x_sigma[i] based on the second greedy result item std::set V_F_x_sigma_i = greedyResult.second; @@ -138,7 +143,7 @@ void FilteredVamanaIndex::createGraph( // Run Filtered Robust Prune to update out-neighbors of sigma[i] GraphNode* sigma_i = this->G.getNode(this->P[sigma[i]].getIndex()); - FilteredRobustPrune(*this, *sigma_i, V_F_x_sigma_i, alpha, R); + FilteredRobustPrune(*this, *sigma_i, V_F_x_sigma_i, alpha, R, distanceSaveMethod); // Receive neighbors of sigma_i std::vector* neighbors = sigma_i->getNeighborsVector(); @@ -151,7 +156,7 @@ void FilteredVamanaIndex::createGraph( // Checking if the neighbors of j is greater than R. If so run Filtered Robust Prune std::set j_neighbors = j_node->getNeighborsSet(); if (j_neighbors.size() > R) { - FilteredRobustPrune(*this, *j_node, j_neighbors, alpha, R); + FilteredRobustPrune(*this, *j_node, j_neighbors, alpha, R, distanceSaveMethod); } } @@ -159,10 +164,12 @@ void FilteredVamanaIndex::createGraph( }); // Free up the memory allocated for the distance matrix - for (unsigned int i = 0; i < n; i++) { - delete[] this->distanceMatrix[i]; + if (distanceSaveMethod == MATRIX) { + for (unsigned int i = 0; i < n; i++) { + delete[] this->distanceMatrix[i]; + } + delete[] this->distanceMatrix; } - delete[] this->distanceMatrix; } diff --git a/src/VIA/Algorithms/GreedySearch.cpp b/src/VIA/Algorithms/GreedySearch.cpp index 4cd210d..c548da4 100644 --- a/src/VIA/Algorithms/GreedySearch.cpp +++ b/src/VIA/Algorithms/GreedySearch.cpp @@ -70,7 +70,7 @@ static set_t getSetItemAtIndex(const unsigned int& index, const std::set& */ template std::pair, std::set> -GreedySearch(const VamanaIndex& index, const GraphNode& s, const query_t& xq, unsigned int k, unsigned int L, const EXEC_MODE execMode) { +GreedySearch(const VamanaIndex& index, const GraphNode& s, const query_t& xq, unsigned int k, unsigned int L, const DISTANCE_SAVE_METHOD distanceSaveMethod) { std::set candidates = {s.getData()}; std::set visited = {}; @@ -78,7 +78,7 @@ GreedySearch(const VamanaIndex& index, const GraphNode& s, con // Calculate initial difference between candidates and visited sets std::set candidates_minus_visited = getSetDifference(candidates, visited); unsigned int cnt = 0; - float p_star_distance, currentDistance; + float p_star_distance = 0, currentDistance = 0; // Main search loop: continue until there are no unvisited candidates while (!candidates_minus_visited.empty()) { @@ -86,18 +86,18 @@ GreedySearch(const VamanaIndex& index, const GraphNode& s, con // Select the closest candidate to the query vector xq graph_t p_star = getSetItemAtIndex(0, candidates_minus_visited); - if (execMode == TEST) { + if (distanceSaveMethod == NONE) { p_star_distance = euclideanDistance(p_star, xq); - } else { + } else if (distanceSaveMethod == MATRIX) { p_star_distance = index.getDistanceMatrix()[p_star.getIndex()][xq.getIndex()]; } // Compare each unvisited candidate's distance to find the nearest for (auto xp : candidates_minus_visited) { - if (execMode == TEST) { + if (distanceSaveMethod == NONE) { currentDistance = euclideanDistance(xp, xq); - } else { + } else if (distanceSaveMethod == MATRIX) { currentDistance = index.getDistanceMatrix()[xp.getIndex()][xq.getIndex()]; } @@ -119,7 +119,7 @@ GreedySearch(const VamanaIndex& index, const GraphNode& s, con // Limit the size of candidates to L by keeping the closest L elements to the query if (candidates.size() > static_cast(L)) { std::set> newCandidates{ - EuclideanDistanceOrder(xq, index.getDistanceMatrix(), execMode==CREATE) + EuclideanDistanceOrder(xq, index.getDistanceMatrix(), distanceSaveMethod==MATRIX) }; for (auto candidate : candidates) { @@ -141,7 +141,7 @@ GreedySearch(const VamanaIndex& index, const GraphNode& s, con // Final selection of k closest candidates after main loop std::set> newCandidates{ - EuclideanDistanceOrder(xq, index.getDistanceMatrix(), execMode==CREATE) + EuclideanDistanceOrder(xq, index.getDistanceMatrix(), distanceSaveMethod==MATRIX) }; for (auto candidate : candidates) { @@ -182,9 +182,9 @@ GreedySearch(const VamanaIndex& index, const GraphNode& s, con template std::pair, std::set> FilteredGreedySearch( const FilteredVamanaIndex& index, const std::vector>& S, const query_t& xq, - const unsigned int k, const unsigned int L, const std::vector& queryFilters, const EXEC_MODE mode) { + const unsigned int k, const unsigned int L, const std::vector& queryFilters, const DISTANCE_SAVE_METHOD distanceSaveMethod) { - float p_star_distance, currentDistance; + float p_star_distance = 0, currentDistance = 0; std::set candidates = {}; std::set visited = {}; @@ -218,16 +218,16 @@ std::pair, std::set> FilteredGreedySearch( // Select the closest candidate to the query vector xq graph_t p_star = getSetItemAtIndex(0, candidates_minus_visited); - if (mode == TEST) { + if (distanceSaveMethod == NONE) { p_star_distance = euclideanDistance(p_star, xq); - } else { + } else if (distanceSaveMethod == MATRIX) { p_star_distance = index.getDistanceMatrix()[p_star.getIndex()][xq.getIndex()]; } // Compare each unvisited candidate's distance to find the nearest for (auto xp : candidates_minus_visited) { - if (mode == TEST) { + if (distanceSaveMethod == NONE) { currentDistance = euclideanDistance(xp, xq); } else { currentDistance = index.getDistanceMatrix()[xp.getIndex()][xq.getIndex()]; @@ -268,7 +268,7 @@ std::pair, std::set> FilteredGreedySearch( if (candidates.size() > static_cast(L)) { std::set> newCandidates{ - EuclideanDistanceOrder(xq, index.getDistanceMatrix(), mode==CREATE) + EuclideanDistanceOrder(xq, index.getDistanceMatrix(), distanceSaveMethod==MATRIX) }; for (auto candidate : candidates) { @@ -291,7 +291,7 @@ std::pair, std::set> FilteredGreedySearch( // Final selection of k closest candidates after main loop std::set> newCandidates{ - EuclideanDistanceOrder(xq, index.getDistanceMatrix(), mode==CREATE) + EuclideanDistanceOrder(xq, index.getDistanceMatrix(), distanceSaveMethod==MATRIX) }; for (auto candidate : candidates) { @@ -315,7 +315,7 @@ template std::pair>, std::set>> Gre const DataVector& xq, unsigned int k, unsigned int L, - const EXEC_MODE mode + const DISTANCE_SAVE_METHOD distanceSaveMethod ); template std::pair>, std::set>> GreedySearch( @@ -324,7 +324,7 @@ template std::pair>, std::set& xq, unsigned int k, unsigned int L, - const EXEC_MODE mode + const DISTANCE_SAVE_METHOD distanceSaveMethod ); template std::pair>, std::set>> GreedySearch( @@ -333,7 +333,7 @@ template std::pair>, std::set& xq, unsigned int k, unsigned int L, - const EXEC_MODE mode + const DISTANCE_SAVE_METHOD distanceSaveMethod ); // Filtered Greedy Search @@ -344,7 +344,7 @@ template std::pair>, std::set& queryFilters, - const EXEC_MODE mode + const DISTANCE_SAVE_METHOD distanceSaveMethod ); template std::pair>, std::set>> FilteredGreedySearch( @@ -354,5 +354,5 @@ template std::pair>, std::set& queryFilters, - const EXEC_MODE mode + const DISTANCE_SAVE_METHOD distanceSaveMethod ); diff --git a/src/VIA/Algorithms/RobustPrune.cpp b/src/VIA/Algorithms/RobustPrune.cpp index 8bb86be..f2cf859 100644 --- a/src/VIA/Algorithms/RobustPrune.cpp +++ b/src/VIA/Algorithms/RobustPrune.cpp @@ -50,8 +50,11 @@ static set_t getSetItemAtIndex(const unsigned int& index, const std::set& * 5. Stops when the number of neighbors of `p_node` reaches `R` or `V` is empty. */ template -void RobustPrune(VamanaIndex& index, GraphNode& p_node, std::set& V, float alpha, int R) { +void RobustPrune(VamanaIndex& index, GraphNode& p_node, std::set& V, float alpha, int R, const DISTANCE_SAVE_METHOD distanceSaveMethod) { + float p_star_distance = 0, currentDistance = 0; + float distance1 = 0, distance2 = 0; + // Get the data of the node p_node graph_t p = p_node.getData(); @@ -70,11 +73,24 @@ void RobustPrune(VamanaIndex& index, GraphNode& p_node, std::s // Find the closest neighbor to p_node in V, and initialize the distance to p_star graph_t p_star = getSetItemAtIndex(0, V); - float p_star_distance = index.getDistanceMatrix()[p.getIndex()][p_star.getIndex()]; + + if (distanceSaveMethod == NONE) { + p_star_distance = euclideanDistance(p, p_star); + } + else if (distanceSaveMethod == MATRIX) { + p_star_distance = index.getDistanceMatrix()[p.getIndex()][p_star.getIndex()]; + } + // Update p_star if a closer neighbor is found for (auto p_tone : V) { - float currentDistance = index.getDistanceMatrix()[p.getIndex()][p_tone.getIndex()]; + + if (distanceSaveMethod == NONE) { + currentDistance = euclideanDistance(p, p_tone); + } + else if (distanceSaveMethod == MATRIX) { + currentDistance = index.getDistanceMatrix()[p.getIndex()][p_tone.getIndex()]; + } if (currentDistance < p_star_distance) { p_star_distance = currentDistance; @@ -95,8 +111,14 @@ void RobustPrune(VamanaIndex& index, GraphNode& p_node, std::s for (auto p_tone : V_copy) { // Remove neighbors that are too far from p_star based on alpha and euclideanDistance - double distance1 = index.getDistanceMatrix()[p_star.getIndex()][p_tone.getIndex()]; - double distance2 = index.getDistanceMatrix()[p.getIndex()][p_tone.getIndex()]; + if (distanceSaveMethod == NONE) { + distance1 = euclideanDistance(p_star, p_tone); + distance2 = euclideanDistance(p, p_tone); + } + else if (distanceSaveMethod == MATRIX) { + distance1 = index.getDistanceMatrix()[p_star.getIndex()][p_tone.getIndex()]; + distance2 = index.getDistanceMatrix()[p.getIndex()][p_tone.getIndex()]; + } if ((alpha * distance1) <= distance2) { V.erase(p_tone); @@ -122,8 +144,11 @@ void RobustPrune(VamanaIndex& index, GraphNode& p_node, std::s * @param R An integer specifying the maximum number of neighbors to retain. */ template -void FilteredRobustPrune(FilteredVamanaIndex& index, GraphNode& p_node, std::set& V, float alpha, int R) { +void FilteredRobustPrune(FilteredVamanaIndex& index, GraphNode& p_node, std::set& V, float alpha, int R, const DISTANCE_SAVE_METHOD distanceSaveMethod) { + float p_star_distance = 0, currentDistance = 0; + float distance1 = 0, distance2 = 0; + // Get the data of the node p_node graph_t p = p_node.getData(); @@ -142,11 +167,23 @@ void FilteredRobustPrune(FilteredVamanaIndex& index, GraphNode // Find the closest neighbor to p_node in V graph_t p_star = getSetItemAtIndex(0, V); - float p_star_distance = index.getDistanceMatrix()[p.getIndex()][p_star.getIndex()]; + + if (distanceSaveMethod == NONE) { + p_star_distance = euclideanDistance(p, p_star); + } + else if (distanceSaveMethod == MATRIX) { + p_star_distance = index.getDistanceMatrix()[p.getIndex()][p_star.getIndex()]; + } // Update p_star if a closer neighbor is found for (auto p_tone : V) { - float currentDistance = index.getDistanceMatrix()[p.getIndex()][p_tone.getIndex()]; + if (distanceSaveMethod == NONE) { + currentDistance = euclideanDistance(p, p_tone); + } + else if (distanceSaveMethod == MATRIX) { + currentDistance = index.getDistanceMatrix()[p.getIndex()][p_tone.getIndex()]; + } + if (currentDistance < p_star_distance) { p_star_distance = currentDistance; p_star = p_tone; @@ -178,9 +215,15 @@ void FilteredRobustPrune(FilteredVamanaIndex& index, GraphNode } // Remove neighbors that are too far from p_star based on alpha and euclideanDistance - double distance1 = index.getDistanceMatrix()[p_star.getIndex()][p_tone.getIndex()]; - double distance2 = index.getDistanceMatrix()[p.getIndex()][p_tone.getIndex()]; - + if (distanceSaveMethod == NONE) { + distance1 = euclideanDistance(p_star, p_tone); + distance2 = euclideanDistance(p, p_tone); + } + else if (distanceSaveMethod == MATRIX) { + distance1 = index.getDistanceMatrix()[p_star.getIndex()][p_tone.getIndex()]; + distance2 = index.getDistanceMatrix()[p.getIndex()][p_tone.getIndex()]; + } + if ((alpha * distance1) <= distance2) { V.erase(p_tone); } @@ -200,7 +243,8 @@ template void RobustPrune>( GraphNode>& p_node, std::set>& V, float alpha, - int R + int R, + const DISTANCE_SAVE_METHOD distanceSaveMethod ); // Explicit instantiation for FilteredRobustPrune with float data type and DataVector query type @@ -209,7 +253,8 @@ template void RobustPrune>( GraphNode>& p_node, std::set>& V, float alpha, - int R + int R, + const DISTANCE_SAVE_METHOD distanceSaveMethod ); // Explicit instantiation for FilteredRobustPrune with float data type and DataVector query type @@ -218,5 +263,6 @@ template void FilteredRobustPrune>( GraphNode>& p_node, std::set>& V, float alpha, - int R + int R, + const DISTANCE_SAVE_METHOD distanceSaveMethod ); diff --git a/src/VIA/Algorithms/StichedVamanaIndex.cpp b/src/VIA/Algorithms/StichedVamanaIndex.cpp index f4c9bf0..720d900 100644 --- a/src/VIA/Algorithms/StichedVamanaIndex.cpp +++ b/src/VIA/Algorithms/StichedVamanaIndex.cpp @@ -1,4 +1,3 @@ -#include #include "../../../include/StichedVamanaIndex.h" #include "../../../include/Filter.h" #include "../../../include/VamanaIndex.h" @@ -6,6 +5,14 @@ #include "../../../include/RobustPrune.h" #include "../../../include/graphics.h" +#include +#include +#include +#include +#include + +std::mutex computingMutex; + /** * @brief Create the graph with the given parameters. @@ -17,18 +24,22 @@ */ template void StichedVamanaIndex::createGraph(const std::vector& P, const float& alpha, const unsigned int L_small, - const unsigned int R_small, const unsigned int R_stiched, unsigned int distance_threads, bool visualized, bool empty) { + const unsigned int R_small, const unsigned int R_stiched, const DISTANCE_SAVE_METHOD distanceSaveMethod, unsigned int distance_threads, unsigned int compute_threads, bool visualized, bool empty) { using Filter = CategoricalAttributeFilter; // Initialize graph memory unsigned int n = P.size(); this->P = P; - this->distanceMatrix = new double*[n]; - for (unsigned int i = 0; i < n; i++) { - this->distanceMatrix[i] = new double[n]; + + // Compute the distances between the points if it is specified to save the distances in a matrix + if (distanceSaveMethod == MATRIX) { + this->distanceMatrix = new double*[n]; + for (unsigned int i = 0; i < n; i++) { + this->distanceMatrix[i] = new double[n]; + } + this->computeDistances(true, distance_threads); } - this->computeDistances(true, distance_threads); // Initialize G = (V, E) to an empty graph this->G.setNodesCount(n); @@ -52,46 +63,90 @@ void StichedVamanaIndex::createGraph(const std::vector& P, c Pf[filter] = points; } - // Foreach f in F do - withProgress(0, this->F.size(), "Creating Stiched Vamana", [&](int i) { - - // Get the current filter - std::set::iterator it = this->F.begin(); - std::advance(it, i); - Filter filter = *it; - - std::vector currentVector = Pf[filter]; - - // Keep the indexes of the current filter points in P, to connect them later. The idea here is to create a new - // sub-index of type VamanaIndex that will contain the points of the current filter, which are going to have a different - // index inside the new sub-graph. So we need to keep track of the indexes of the points in the original graph. - std::map indexes; - for (unsigned int i = 0; i < currentVector.size(); i++) { - vamana_t currentData = currentVector[i]; - indexes[i] = currentData.getIndex(); - } + std::atomic progress(0); + auto startTime = std::chrono::steady_clock::now(); + + auto compute = [&](int start, int end) { + for (int i = start; i < end; i++) { + // Get the current filter + std::set::iterator it = this->F.begin(); + std::advance(it, i); + Filter filter = *it; + std::vector currentVector = Pf[filter]; + + // Keep the indexes of the current filter points in P, to connect them later. The idea here is to create a new + // sub-index of type VamanaIndex that will contain the points of the current filter, which are going to have a different + // index inside the new sub-graph. So we need to keep track of the indexes of the points in the original graph. + std::map indexes; + for (unsigned int i = 0; i < currentVector.size(); i++) { + vamana_t currentData = currentVector[i]; + indexes[i] = currentData.getIndex(); + } + + // Initialize the sub-index for the current filter and create its graph + VamanaIndex subIndex; + subIndex.createGraph(Pf[filter], alpha, R_small, L_small, distanceSaveMethod, 1, false, this->distanceMatrix); + + for (unsigned int i = 0; i < subIndex.getGraph().getNodesCount(); i++) { + + // Get the current node from the sub-index and its index in the sub-graph + GraphNode* node = subIndex.getGraph().getNode(i); + unsigned int nodeIndex = node->getData().getIndex(); + + // Receive all the neighbors of the current node and connect them in the main graph, using the indexes map above + std::vector* neighbors = node->getNeighborsVector(); + + for (auto neighbor : *neighbors) { + + unsigned int currentNeighborIndex = neighbor.getIndex(); + this->G.connectNodesByIndex( + indexes[nodeIndex], + indexes[currentNeighborIndex] + ); + + } + } - // Initialize the sub-index for the current filter and create its graph - VamanaIndex subIndex; - subIndex.createGraph(Pf[filter], alpha, R_small, L_small, 1, false, this->distanceMatrix); - - for (unsigned int i = 0; i < subIndex.getGraph().getNodesCount(); i++) { - - // Get the current node from the sub-index and its index in the sub-graph - GraphNode* node = subIndex.getGraph().getNode(i); - unsigned int nodeIndex = node->getData().getIndex(); - - // Receive all the neighbors of the current node and connect them in the main graph, using the indexes map above - std::vector* neighbors = node->getNeighborsVector(); - for (auto neighbor : *neighbors) { - unsigned int currentNeighborIndex = neighbor.getIndex(); - this->G.connectNodesByIndex(indexes[nodeIndex], indexes[currentNeighborIndex]); + progress++; + if (visualized && progress % 100 == 0) { + std::lock_guard lock(computingMutex); + displayProgressBar(progress, this->F.size(), "Creating Stiched Vamana", startTime, 30); } - } - }); + }; + + if (compute_threads > 1) { + compute_threads = std::min(compute_threads, (unsigned int)this->F.size()); + std::vector threads; + int threadFiltersChunk = this->F.size() / compute_threads; + for (unsigned int t = 0; t < compute_threads; ++t) { + int start = t * threadFiltersChunk; + int end = (t == compute_threads - 1) ? this->F.size() : start + threadFiltersChunk; + threads.emplace_back(compute, start, end); + } + + if (visualized) { + displayProgressBar(progress, this->F.size(), "Creating Stiched Vamana", startTime, 30); + } + + for (auto& thread : threads) { + thread.join(); + } + + if (visualized) { + displayProgressBar(this->F.size(), this->F.size(), "Creating Stiched Vamana", startTime, 30); + std::cout << std::endl; + } + } + else { + if (visualized) { + withProgress(0, this->F.size(), "Creating Stiched Vamana", [&](int i) { compute(i, i + 1); }); + } else { + compute(0, this->F.size()); + } + } // NOTE: Without the Filtered Robust we get better results... @@ -109,10 +164,12 @@ void StichedVamanaIndex::createGraph(const std::vector& P, c // } // Free up the memory allocated for the distance matrix - for (unsigned int i = 0; i < n; i++) { - delete[] this->distanceMatrix[i]; + if (distanceSaveMethod == MATRIX) { + for (unsigned int i = 0; i < n; i++) { + delete[] this->distanceMatrix[i]; + } + delete[] this->distanceMatrix; } - delete[] this->distanceMatrix; } diff --git a/src/VIA/Algorithms/VamanaIndex.cpp b/src/VIA/Algorithms/VamanaIndex.cpp index 8bce7c8..7dd2b28 100644 --- a/src/VIA/Algorithms/VamanaIndex.cpp +++ b/src/VIA/Algorithms/VamanaIndex.cpp @@ -162,7 +162,8 @@ void VamanaIndex::computeDistances(const bool visualize, const unsigne */ template void VamanaIndex::createGraph( - const std::vector& P, const float& alpha, const unsigned int L, const unsigned int& R, unsigned int distance_threads, bool visualize, double** distanceMatrix) { + const std::vector& P, const float& alpha, const unsigned int L, const unsigned int& R, const DISTANCE_SAVE_METHOD distanceSaveMethod, + unsigned int distance_threads, bool visualize, double** distanceMatrix) { using GreedyResult = std::pair, std::set>; GreedyResult greedyResult; @@ -174,18 +175,21 @@ void VamanaIndex::createGraph( unsigned int n = P.size(); this->P = P; - // If the distance matrix is provided, use it, otherwise compute the distances - if (distanceMatrix != nullptr) { - this->distanceMatrix = distanceMatrix; - } else { - this->distanceMatrix = new double*[n]; - for (unsigned int i = 0; i < n; i++) { - this->distanceMatrix[i] = new double[n]; + if (distanceSaveMethod == MATRIX) { + + // If the distance matrix is provided, use it, otherwise compute the distances + if (distanceMatrix != nullptr) { + this->distanceMatrix = distanceMatrix; + } else { + this->distanceMatrix = new double*[n]; + for (unsigned int i = 0; i < n; i++) { + this->distanceMatrix[i] = new double[n]; + } + this->computeDistances(visualize, distance_threads); } - this->computeDistances(visualize, distance_threads); + } - // this->computeDistances(false); this->G.setNodesCount(n); // Set the number of nodes in the graph, fill the nodes with the dataset points, and create random edges for the nodes @@ -202,8 +206,8 @@ void VamanaIndex::createGraph( GraphNode* sigma_i_node = this->G.getNode(sigma.at(i)); vamana_t sigma_i = sigma_i_node->getData(); - greedyResult = GreedySearch(*this, s, this->P.at(sigma.at(i)), 1, L); - RobustPrune(*this, *sigma_i_node, greedyResult.second, alpha, R); + greedyResult = GreedySearch(*this, s, this->P.at(sigma.at(i)), 1, L, distanceSaveMethod); + RobustPrune(*this, *sigma_i_node, greedyResult.second, alpha, R, distanceSaveMethod); std::vector* sigma_i_neighbors = sigma_i_node->getNeighborsVector(); for (auto j : *sigma_i_neighbors) { @@ -216,7 +220,7 @@ void VamanaIndex::createGraph( outgoing.insert(sigma_i); if (outgoing.size() > (long unsigned int)R) { - RobustPrune(*this, *j_node, outgoing, alpha, R); + RobustPrune(*this, *j_node, outgoing, alpha, R, distanceSaveMethod); } else { j_node->addNeighbor(sigma_i); } @@ -233,7 +237,7 @@ void VamanaIndex::createGraph( } // Free up the memory allocated for the distance matrix, if it was computed - if (distanceMatrix == nullptr) { + if (distanceSaveMethod == MATRIX && distanceMatrix == nullptr) { for (unsigned int i = 0; i < n; i++) { delete[] this->distanceMatrix[i]; }