Skip to content

Commit 46aad9b

Browse files
committed
clusterlin: add Linearize function
This adds a first version of the overall linearization interface, which given a DepGraph constructs a good linearization, by incrementally including good candidate sets (found using AncestorCandidateFinder and SearchCandidateFinder).
1 parent ee0ddfe commit 46aad9b

File tree

3 files changed

+173
-0
lines changed

3 files changed

+173
-0
lines changed

src/cluster_linearize.h

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,22 @@ class DepGraph
167167
for (auto pos : elems) ret += entries[pos].feerate;
168168
return ret;
169169
}
170+
171+
/** Append the entries of select to list in a topologically valid order.
172+
*
173+
* Complexity: O(select.Count() * log(select.Count())).
174+
*/
175+
void AppendTopo(std::vector<ClusterIndex>& list, const SetType& select) const noexcept
176+
{
177+
ClusterIndex old_len = list.size();
178+
for (auto i : select) list.push_back(i);
179+
std::sort(list.begin() + old_len, list.end(), [&](ClusterIndex a, ClusterIndex b) noexcept {
180+
const auto a_anc_count = entries[a].ancestors.Count();
181+
const auto b_anc_count = entries[b].ancestors.Count();
182+
if (a_anc_count != b_anc_count) return a_anc_count < b_anc_count;
183+
return a < b;
184+
});
185+
}
170186
};
171187

172188
/** A set of transactions together with their aggregate feerate. */
@@ -486,6 +502,57 @@ class SearchCandidateFinder
486502
}
487503
};
488504

505+
/** Find a linearization for a cluster.
506+
*
507+
* @param[in] depgraph Dependency graph of the cluster to be linearized.
508+
* @param[in] max_iterations Upper bound on the number of optimization steps that will be done.
509+
* @return A pair of:
510+
* - The resulting linearization.
511+
* - A boolean indicating whether the result is guaranteed to be
512+
* optimal.
513+
*
514+
* Complexity: O(N * min(max_iterations + N, 2^N)) where N=depgraph.TxCount().
515+
*/
516+
template<typename SetType>
517+
std::pair<std::vector<ClusterIndex>, bool> Linearize(const DepGraph<SetType>& depgraph, uint64_t max_iterations) noexcept
518+
{
519+
if (depgraph.TxCount() == 0) return {{}, true};
520+
521+
uint64_t iterations_left = max_iterations;
522+
std::vector<ClusterIndex> linearization;
523+
524+
AncestorCandidateFinder anc_finder(depgraph);
525+
SearchCandidateFinder src_finder(depgraph);
526+
linearization.reserve(depgraph.TxCount());
527+
bool optimal = true;
528+
529+
while (true) {
530+
// Initialize best as the best remaining ancestor set.
531+
auto best = anc_finder.FindCandidateSet();
532+
533+
// Invoke bounded search to update best, with up to half of our remaining iterations as
534+
// limit.
535+
uint64_t max_iterations_now = (iterations_left + 1) / 2;
536+
uint64_t iterations_done_now = 0;
537+
std::tie(best, iterations_done_now) = src_finder.FindCandidateSet(max_iterations_now, best);
538+
iterations_left -= iterations_done_now;
539+
540+
if (iterations_done_now == max_iterations_now) {
541+
optimal = false;
542+
}
543+
544+
// Add to output in topological order.
545+
depgraph.AppendTopo(linearization, best.transactions);
546+
547+
// Update state to reflect best is no longer to be linearized.
548+
anc_finder.MarkDone(best.transactions);
549+
if (anc_finder.AllDone()) break;
550+
src_finder.MarkDone(best.transactions);
551+
}
552+
553+
return {std::move(linearization), optimal};
554+
}
555+
489556
} // namespace cluster_linearize
490557

491558
#endif // BITCOIN_CLUSTER_LINEARIZE_H

src/test/fuzz/cluster_linearize.cpp

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <util/bitset.h>
1212
#include <util/feefrac.h>
1313

14+
#include <algorithm>
1415
#include <stdint.h>
1516
#include <vector>
1617
#include <utility>
@@ -140,6 +141,29 @@ class ExhaustiveCandidateFinder
140141
}
141142
};
142143

144+
/** A simple linearization algorithm.
145+
*
146+
* This matches Linearize() in interface and behavior, though with fewer optimizations, and using
147+
* just SimpleCandidateFinder rather than AncestorCandidateFinder and SearchCandidateFinder.
148+
*/
149+
template<typename SetType>
150+
std::pair<std::vector<ClusterIndex>, bool> SimpleLinearize(const DepGraph<SetType>& depgraph, uint64_t max_iterations)
151+
{
152+
std::vector<ClusterIndex> linearization;
153+
SimpleCandidateFinder finder(depgraph);
154+
SetType todo = SetType::Fill(depgraph.TxCount());
155+
bool optimal = true;
156+
while (todo.Any()) {
157+
auto [candidate, iterations_done] = finder.FindCandidateSet(max_iterations);
158+
if (iterations_done == max_iterations) optimal = false;
159+
depgraph.AppendTopo(linearization, candidate.transactions);
160+
todo -= candidate.transactions;
161+
finder.MarkDone(candidate.transactions);
162+
max_iterations -= iterations_done;
163+
}
164+
return {std::move(linearization), optimal};
165+
}
166+
143167
/** Given a dependency graph, and a todo set, read a topological subset of todo from reader. */
144168
template<typename SetType>
145169
SetType ReadTopologicalSet(const DepGraph<SetType>& depgraph, const SetType& todo, SpanReader& reader)
@@ -458,3 +482,68 @@ FUZZ_TARGET(clusterlin_search_finder)
458482
assert(exh_finder.AllDone());
459483
assert(anc_finder.AllDone());
460484
}
485+
486+
FUZZ_TARGET(clusterlin_linearize)
487+
{
488+
// Verify the behavior of Linearize().
489+
490+
// Retrieve an iteration count, and a depgraph from the fuzz input.
491+
SpanReader reader(buffer);
492+
DepGraph<TestBitSet> depgraph;
493+
uint64_t iter_count{0};
494+
try {
495+
reader >> VARINT(iter_count) >> Using<DepGraphFormatter>(depgraph);
496+
} catch (const std::ios_base::failure&) {}
497+
498+
// Invoke Linearize().
499+
iter_count &= 0x7ffff;
500+
auto [linearization, optimal] = Linearize(depgraph, iter_count);
501+
SanityCheck(depgraph, linearization);
502+
auto chunking = ChunkLinearization(depgraph, linearization);
503+
504+
// If the iteration count is sufficiently high, an optimal linearization must be found.
505+
// Each linearization step can use up to 2^k iterations, with steps k=1..n. That sum is
506+
// 2 * (2^n - 1)
507+
const uint64_t n = depgraph.TxCount();
508+
if (n <= 18 && iter_count > 2U * ((uint64_t{1} << n) - 1U)) {
509+
assert(optimal);
510+
}
511+
512+
// If Linearize claims optimal result, run quality tests.
513+
if (optimal) {
514+
// It must be as good as SimpleLinearize.
515+
auto [simple_linearization, simple_optimal] = SimpleLinearize(depgraph, MAX_SIMPLE_ITERATIONS);
516+
SanityCheck(depgraph, simple_linearization);
517+
auto simple_chunking = ChunkLinearization(depgraph, simple_linearization);
518+
auto cmp = CompareChunks(chunking, simple_chunking);
519+
assert(cmp >= 0);
520+
// If SimpleLinearize finds the optimal result too, they must be equal (if not,
521+
// SimpleLinearize is broken).
522+
if (simple_optimal) assert(cmp == 0);
523+
524+
// Only for very small clusters, test every topologically-valid permutation.
525+
if (depgraph.TxCount() <= 7) {
526+
std::vector<ClusterIndex> perm_linearization(depgraph.TxCount());
527+
for (ClusterIndex i = 0; i < depgraph.TxCount(); ++i) perm_linearization[i] = i;
528+
// Iterate over all valid permutations.
529+
do {
530+
// Determine whether perm_linearization is topological.
531+
TestBitSet perm_done;
532+
bool perm_is_topo{true};
533+
for (auto i : perm_linearization) {
534+
perm_done.Set(i);
535+
if (!depgraph.Ancestors(i).IsSubsetOf(perm_done)) {
536+
perm_is_topo = false;
537+
break;
538+
}
539+
}
540+
// If so, verify that the obtained linearization is as good as the permutation.
541+
if (perm_is_topo) {
542+
auto perm_chunking = ChunkLinearization(depgraph, perm_linearization);
543+
auto cmp = CompareChunks(chunking, perm_chunking);
544+
assert(cmp >= 0);
545+
}
546+
} while(std::next_permutation(perm_linearization.begin(), perm_linearization.end()));
547+
}
548+
}
549+
}

src/test/util/cluster_linearize.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
#include <cluster_linearize.h>
99
#include <serialize.h>
10+
#include <span.h>
1011
#include <streams.h>
1112
#include <util/bitset.h>
1213
#include <util/feefrac.h>
@@ -331,6 +332,22 @@ void VerifyDepGraphFromCluster(const Cluster<SetType>& cluster, const DepGraph<S
331332
}
332333
}
333334

335+
/** Perform a sanity check on a linearization. */
336+
template<typename SetType>
337+
void SanityCheck(const DepGraph<SetType>& depgraph, Span<const ClusterIndex> linearization)
338+
{
339+
// Check completeness.
340+
assert(linearization.size() == depgraph.TxCount());
341+
TestBitSet done;
342+
for (auto i : linearization) {
343+
// Check transaction position is in range.
344+
assert(i < depgraph.TxCount());
345+
// Check topology and lack of duplicates.
346+
assert((depgraph.Ancestors(i) - done) == TestBitSet::Singleton(i));
347+
done.Set(i);
348+
}
349+
}
350+
334351
} // namespace
335352

336353
#endif // BITCOIN_TEST_UTIL_CLUSTER_LINEARIZE_H

0 commit comments

Comments
 (0)