Skip to content

Commit 2835216

Browse files
committed
txgraph: make GroupClusters use partition numbers directly (optimization)
1 parent c72c8d5 commit 2835216

File tree

1 file changed

+50
-59
lines changed

1 file changed

+50
-59
lines changed

src/txgraph.cpp

Lines changed: 50 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1103,47 +1103,46 @@ void TxGraphImpl::GroupClusters(int level) noexcept
11031103
// with inefficient and/or oversized Clusters which just end up being split again anyway.
11041104
SplitAll(level);
11051105

1106-
/** Annotated clusters: an entry for each Cluster, together with the representative for the
1107-
* partition it is in if known, or with nullptr if not yet known. */
1108-
std::vector<std::pair<Cluster*, Cluster*>> an_clusters;
1106+
/** Annotated clusters: an entry for each Cluster, together with the sequence number for the
1107+
* representative for the partition it is in (initially its own, later that of the
1108+
* to-be-merged group). */
1109+
std::vector<std::pair<Cluster*, uint64_t>> an_clusters;
11091110
/** Annotated dependencies: an entry for each m_deps_to_add entry (excluding ones that apply
1110-
* to removed transactions), together with the representative root of the partition of
1111-
* Clusters it applies to. */
1112-
std::vector<std::pair<std::pair<GraphIndex, GraphIndex>, Cluster*>> an_deps;
1113-
1114-
// Construct a an_clusters entry for every parent and child in the to-be-applied dependencies.
1111+
* to removed transactions), together with the sequence number of the representative root of
1112+
* Clusters it applies to (initially that of the child Cluster, later that of the
1113+
* to-be-merged group). */
1114+
std::vector<std::pair<std::pair<GraphIndex, GraphIndex>, uint64_t>> an_deps;
1115+
1116+
// Construct a an_clusters entry for every parent and child in the to-be-applied dependencies,
1117+
// and an an_deps entry for each dependency to be applied.
1118+
an_deps.reserve(clusterset.m_deps_to_add.size());
11151119
for (const auto& [par, chl] : clusterset.m_deps_to_add) {
11161120
auto par_cluster = FindCluster(par, level);
11171121
auto chl_cluster = FindCluster(chl, level);
11181122
// Skip dependencies for which the parent or child transaction is removed.
11191123
if (par_cluster == nullptr || chl_cluster == nullptr) continue;
1120-
an_clusters.emplace_back(par_cluster, nullptr);
1124+
an_clusters.emplace_back(par_cluster, par_cluster->m_sequence);
11211125
// Do not include a duplicate when parent and child are identical, as it'll be removed
11221126
// below anyway.
1123-
if (chl_cluster != par_cluster) an_clusters.emplace_back(chl_cluster, nullptr);
1127+
if (chl_cluster != par_cluster) an_clusters.emplace_back(chl_cluster, chl_cluster->m_sequence);
1128+
// Add entry to an_deps, using the child sequence number.
1129+
an_deps.emplace_back(std::pair{par, chl}, chl_cluster->m_sequence);
11241130
}
11251131
// Sort and deduplicate an_clusters, so we end up with a sorted list of all involved Clusters
11261132
// to which dependencies apply.
1127-
std::sort(an_clusters.begin(), an_clusters.end(), [](auto& a, auto& b) noexcept { return CompareClusters(a.first, b.first) < 0; });
1133+
std::sort(an_clusters.begin(), an_clusters.end(), [](auto& a, auto& b) noexcept { return a.second < b.second; });
11281134
an_clusters.erase(std::unique(an_clusters.begin(), an_clusters.end()), an_clusters.end());
1129-
1130-
// Sort the dependencies by child Cluster::m_sequence.
1131-
std::sort(clusterset.m_deps_to_add.begin(), clusterset.m_deps_to_add.end(), [&](auto& a, auto& b) noexcept {
1132-
auto [_a_par, a_chl] = a;
1133-
auto [_b_par, b_chl] = b;
1134-
auto a_chl_cluster = FindCluster(a_chl, level);
1135-
auto b_chl_cluster = FindCluster(b_chl, level);
1136-
return CompareClusters(a_chl_cluster, b_chl_cluster) < 0;
1137-
});
1135+
// Sort an_deps by applying the same order to the involved child cluster.
1136+
std::sort(an_deps.begin(), an_deps.end(), [&](auto& a, auto& b) noexcept { return a.second < b.second; });
11381137

11391138
// Run the union-find algorithm to to find partitions of the input Clusters which need to be
11401139
// grouped together. See https://en.wikipedia.org/wiki/Disjoint-set_data_structure.
11411140
{
11421141
/** Each PartitionData entry contains information about a single input Cluster. */
11431142
struct PartitionData
11441143
{
1145-
/** The cluster this holds information for. */
1146-
Cluster* cluster;
1144+
/** The sequence number of the cluster this holds information for. */
1145+
uint64_t sequence;
11471146
/** All PartitionData entries belonging to the same partition are organized in a tree.
11481147
* Each element points to its parent, or to itself if it is the root. The root is then
11491148
* a representative for the entire tree, and can be found by walking upwards from any
@@ -1157,11 +1156,11 @@ void TxGraphImpl::GroupClusters(int level) noexcept
11571156
std::vector<PartitionData> partition_data;
11581157

11591158
/** Given a Cluster, find its corresponding PartitionData. */
1160-
auto locate_fn = [&](Cluster* arg) noexcept -> PartitionData* {
1161-
auto it = std::lower_bound(partition_data.begin(), partition_data.end(), arg,
1162-
[](auto& a, Cluster* ptr) noexcept { return CompareClusters(a.cluster, ptr) < 0; });
1159+
auto locate_fn = [&](uint64_t sequence) noexcept -> PartitionData* {
1160+
auto it = std::lower_bound(partition_data.begin(), partition_data.end(), sequence,
1161+
[](auto& a, uint64_t seq) noexcept { return a.sequence < seq; });
11631162
Assume(it != partition_data.end());
1164-
Assume(it->cluster == arg);
1163+
Assume(it->sequence == sequence);
11651164
return &*it;
11661165
};
11671166

@@ -1196,67 +1195,59 @@ void TxGraphImpl::GroupClusters(int level) noexcept
11961195
// Start by initializing every Cluster as its own singleton partition.
11971196
partition_data.resize(an_clusters.size());
11981197
for (size_t i = 0; i < an_clusters.size(); ++i) {
1199-
partition_data[i].cluster = an_clusters[i].first;
1198+
partition_data[i].sequence = an_clusters[i].first->m_sequence;
12001199
partition_data[i].parent = &partition_data[i];
12011200
partition_data[i].rank = 0;
12021201
}
12031202

1204-
// Run through all parent/child pairs in m_deps_to_add, and union the
1205-
// the partitions their Clusters are in.
1203+
// Run through all parent/child pairs in an_deps, and union the partitions their Clusters
1204+
// are in.
12061205
Cluster* last_chl_cluster{nullptr};
12071206
PartitionData* last_partition{nullptr};
1208-
for (const auto& [par, chl] : clusterset.m_deps_to_add) {
1207+
for (const auto& [dep, _] : an_deps) {
1208+
auto [par, chl] = dep;
12091209
auto par_cluster = FindCluster(par, level);
12101210
auto chl_cluster = FindCluster(chl, level);
1211+
Assume(chl_cluster != nullptr && par_cluster != nullptr);
12111212
// Nothing to do if parent and child are in the same Cluster.
12121213
if (par_cluster == chl_cluster) continue;
1213-
// Nothing to do if either parent or child transaction is removed already.
1214-
if (par_cluster == nullptr || chl_cluster == nullptr) continue;
12151214
Assume(par != chl);
12161215
if (chl_cluster == last_chl_cluster) {
12171216
// If the child Clusters is the same as the previous iteration, union with the
1218-
// tree they were in, avoiding the need for another lookup. Note that m_deps_to_add
1217+
// tree they were in, avoiding the need for another lookup. Note that an_deps
12191218
// is sorted by child Cluster, so batches with the same child are expected.
1220-
last_partition = union_fn(locate_fn(par_cluster), last_partition);
1219+
last_partition = union_fn(locate_fn(par_cluster->m_sequence), last_partition);
12211220
} else {
12221221
last_chl_cluster = chl_cluster;
1223-
last_partition = union_fn(locate_fn(par_cluster), locate_fn(chl_cluster));
1222+
last_partition = union_fn(locate_fn(par_cluster->m_sequence), locate_fn(chl_cluster->m_sequence));
12241223
}
12251224
}
12261225

1227-
// Populate the an_clusters and an_deps data structures with the list of input Clusters,
1228-
// and the input dependencies, annotated with the representative of the Cluster partition
1229-
// it applies to.
1230-
an_deps.reserve(clusterset.m_deps_to_add.size());
1231-
auto deps_it = clusterset.m_deps_to_add.begin();
1226+
// Update the sequence numbers in an_clusters and an_deps to be those of the partition
1227+
// representative.
1228+
auto deps_it = an_deps.begin();
12321229
for (size_t i = 0; i < partition_data.size(); ++i) {
12331230
auto& data = partition_data[i];
1234-
// Find the representative of the partition Cluster i is in, and store it with the
1235-
// Cluster.
1236-
auto rep = find_root_fn(&data)->cluster;
1237-
Assume(an_clusters[i].second == nullptr);
1238-
an_clusters[i].second = rep;
1231+
// Find the sequence of the representative of the partition Cluster i is in, and store
1232+
// it with the Cluster.
1233+
auto rep_seq = find_root_fn(&data)->sequence;
1234+
an_clusters[i].second = rep_seq;
12391235
// Find all dependencies whose child Cluster is Cluster i, and annotate them with rep.
1240-
while (deps_it != clusterset.m_deps_to_add.end()) {
1241-
auto [par, chl] = *deps_it;
1236+
while (deps_it != an_deps.end()) {
1237+
auto [par, chl] = deps_it->first;
12421238
auto chl_cluster = FindCluster(chl, level);
1243-
if (CompareClusters(chl_cluster, data.cluster) > 0) break;
1244-
// Skip dependencies that apply to earlier Clusters (those necessary are for
1245-
// deleted transactions, as otherwise we'd have processed them already).
1246-
if (chl_cluster == data.cluster) {
1247-
auto par_cluster = FindCluster(par, level);
1248-
// Also filter out dependencies applying to a removed parent.
1249-
if (par_cluster != nullptr) an_deps.emplace_back(*deps_it, rep);
1250-
}
1239+
Assume(chl_cluster != nullptr);
1240+
if (chl_cluster->m_sequence > data.sequence) break;
1241+
deps_it->second = rep_seq;
12511242
++deps_it;
12521243
}
12531244
}
12541245
}
12551246

1256-
// Sort both an_clusters and an_deps by representative of the partition they are in, grouping
1257-
// all those applying to the same partition together.
1258-
std::sort(an_deps.begin(), an_deps.end(), [](auto& a, auto& b) noexcept { return CompareClusters(a.second, b.second) < 0; });
1259-
std::sort(an_clusters.begin(), an_clusters.end(), [](auto& a, auto& b) noexcept { return CompareClusters(a.second, b.second) < 0; });
1247+
// Sort both an_clusters and an_deps by sequence number of the representative of the
1248+
// partition they are in, grouping all those applying to the same partition together.
1249+
std::sort(an_deps.begin(), an_deps.end(), [](auto& a, auto& b) noexcept { return a.second < b.second; });
1250+
std::sort(an_clusters.begin(), an_clusters.end(), [](auto& a, auto& b) noexcept { return a.second < b.second; });
12601251

12611252
// Translate the resulting cluster groups to the m_group_data structure, and the dependencies
12621253
// back to m_deps_to_add.

0 commit comments

Comments
 (0)