|
13 | 13 | #include <utility>
|
14 | 14 |
|
15 | 15 | #include <util/feefrac.h>
|
| 16 | +#include <util/vecdeque.h> |
16 | 17 |
|
17 | 18 | namespace cluster_linearize {
|
18 | 19 |
|
@@ -415,7 +416,8 @@ class SearchCandidateFinder
|
415 | 416 | };
|
416 | 417 |
|
417 | 418 | /** The queue of work items. */
|
418 |
| - std::vector<WorkItem> queue; |
| 419 | + VecDeque<WorkItem> queue; |
| 420 | + queue.reserve(std::max<size_t>(256, 2 * m_todo.Count())); |
419 | 421 |
|
420 | 422 | // Create an initial entry with m_todo as undecided. Also use it as best if not provided,
|
421 | 423 | // so that during the work processing loop below, and during the add_fn/split_fn calls, we
|
@@ -445,7 +447,10 @@ class SearchCandidateFinder
|
445 | 447 | // Make sure there are undecided transactions left to split on.
|
446 | 448 | if (und.None()) return;
|
447 | 449 |
|
448 |
| - // Actually construct a new work item on the queue. |
| 450 | + // Actually construct a new work item on the queue. Due to the switch to DFS when queue |
| 451 | + // space runs out (see below), we know that no reallocation of the queue should ever |
| 452 | + // occur. |
| 453 | + Assume(queue.size() < queue.capacity()); |
449 | 454 | queue.emplace_back(std::move(inc), std::move(und));
|
450 | 455 | };
|
451 | 456 |
|
@@ -479,10 +484,33 @@ class SearchCandidateFinder
|
479 | 484 | };
|
480 | 485 |
|
481 | 486 | // Work processing loop.
|
| 487 | + // |
| 488 | + // New work items are always added at the back of the queue, but items to process use a |
| 489 | + // hybrid approach where they can be taken from the front or the back. |
| 490 | + // |
| 491 | + // Depth-first search (DFS) corresponds to always taking from the back of the queue. This |
| 492 | + // is very memory-efficient (linear in the number of transactions). Breadth-first search |
| 493 | + // (BFS) corresponds to always taking from the front, which potentially uses more memory |
| 494 | + // (up to exponential in the transaction count), but seems to work better in practice. |
| 495 | + // |
| 496 | + // The approach here combines the two: use BFS until the queue grows too large, at which |
| 497 | + // point we temporarily switch to DFS until the size shrinks again. |
482 | 498 | while (!queue.empty()) {
|
| 499 | + // Processing the first queue item, and then using DFS for everything it gives rise to, |
| 500 | + // may increase the queue size by the number of undecided elements in there, minus 1 |
| 501 | + // for the first queue item being removed. Thus, only when that pushes the queue over |
| 502 | + // its capacity can we not process from the front (BFS), and should we use DFS. |
| 503 | + while (queue.size() - 1 + queue.front().und.Count() > queue.capacity()) { |
| 504 | + if (!iterations_left) break; |
| 505 | + auto elem = queue.back(); |
| 506 | + queue.pop_back(); |
| 507 | + split_fn(std::move(elem)); |
| 508 | + } |
| 509 | + |
| 510 | + // Process one entry from the front of the queue (BFS exploration) |
483 | 511 | if (!iterations_left) break;
|
484 |
| - auto elem = queue.back(); |
485 |
| - queue.pop_back(); |
| 512 | + auto elem = queue.front(); |
| 513 | + queue.pop_front(); |
486 | 514 | split_fn(std::move(elem));
|
487 | 515 | }
|
488 | 516 |
|
|
0 commit comments