@@ -1103,47 +1103,46 @@ void TxGraphImpl::GroupClusters(int level) noexcept
1103
1103
// with inefficient and/or oversized Clusters which just end up being split again anyway.
1104
1104
SplitAll (level);
1105
1105
1106
- /* * Annotated clusters: an entry for each Cluster, together with the representative for the
1107
- * partition it is in if known, or with nullptr if not yet known. */
1108
- std::vector<std::pair<Cluster*, Cluster*>> an_clusters;
1106
+ /* * Annotated clusters: an entry for each Cluster, together with the sequence number for the
1107
+ * representative for the partition it is in (initially its own, later that of the
1108
+ * to-be-merged group). */
1109
+ std::vector<std::pair<Cluster*, uint64_t >> an_clusters;
1109
1110
/* * Annotated dependencies: an entry for each m_deps_to_add entry (excluding ones that apply
1110
- * to removed transactions), together with the representative root of the partition of
1111
- * Clusters it applies to. */
1112
- std::vector<std::pair<std::pair<GraphIndex, GraphIndex>, Cluster*>> an_deps;
1113
-
1114
- // Construct a an_clusters entry for every parent and child in the to-be-applied dependencies.
1111
+ * to removed transactions), together with the sequence number of the representative root of
1112
+ * Clusters it applies to (initially that of the child Cluster, later that of the
1113
+ * to-be-merged group). */
1114
+ std::vector<std::pair<std::pair<GraphIndex, GraphIndex>, uint64_t >> an_deps;
1115
+
1116
+ // Construct a an_clusters entry for every parent and child in the to-be-applied dependencies,
1117
+ // and an an_deps entry for each dependency to be applied.
1118
+ an_deps.reserve (clusterset.m_deps_to_add .size ());
1115
1119
for (const auto & [par, chl] : clusterset.m_deps_to_add ) {
1116
1120
auto par_cluster = FindCluster (par, level);
1117
1121
auto chl_cluster = FindCluster (chl, level);
1118
1122
// Skip dependencies for which the parent or child transaction is removed.
1119
1123
if (par_cluster == nullptr || chl_cluster == nullptr ) continue ;
1120
- an_clusters.emplace_back (par_cluster, nullptr );
1124
+ an_clusters.emplace_back (par_cluster, par_cluster-> m_sequence );
1121
1125
// Do not include a duplicate when parent and child are identical, as it'll be removed
1122
1126
// below anyway.
1123
- if (chl_cluster != par_cluster) an_clusters.emplace_back (chl_cluster, nullptr );
1127
+ if (chl_cluster != par_cluster) an_clusters.emplace_back (chl_cluster, chl_cluster->m_sequence );
1128
+ // Add entry to an_deps, using the child sequence number.
1129
+ an_deps.emplace_back (std::pair{par, chl}, chl_cluster->m_sequence );
1124
1130
}
1125
1131
// Sort and deduplicate an_clusters, so we end up with a sorted list of all involved Clusters
1126
1132
// to which dependencies apply.
1127
- std::sort (an_clusters.begin (), an_clusters.end (), [](auto & a, auto & b) noexcept { return CompareClusters (a. first , b. first ) < 0 ; });
1133
+ std::sort (an_clusters.begin (), an_clusters.end (), [](auto & a, auto & b) noexcept { return a. second < b. second ; });
1128
1134
an_clusters.erase (std::unique (an_clusters.begin (), an_clusters.end ()), an_clusters.end ());
1129
-
1130
- // Sort the dependencies by child Cluster::m_sequence.
1131
- std::sort (clusterset.m_deps_to_add .begin (), clusterset.m_deps_to_add .end (), [&](auto & a, auto & b) noexcept {
1132
- auto [_a_par, a_chl] = a;
1133
- auto [_b_par, b_chl] = b;
1134
- auto a_chl_cluster = FindCluster (a_chl, level);
1135
- auto b_chl_cluster = FindCluster (b_chl, level);
1136
- return CompareClusters (a_chl_cluster, b_chl_cluster) < 0 ;
1137
- });
1135
+ // Sort an_deps by applying the same order to the involved child cluster.
1136
+ std::sort (an_deps.begin (), an_deps.end (), [&](auto & a, auto & b) noexcept { return a.second < b.second ; });
1138
1137
1139
1138
// Run the union-find algorithm to to find partitions of the input Clusters which need to be
1140
1139
// grouped together. See https://en.wikipedia.org/wiki/Disjoint-set_data_structure.
1141
1140
{
1142
1141
/* * Each PartitionData entry contains information about a single input Cluster. */
1143
1142
struct PartitionData
1144
1143
{
1145
- /* * The cluster this holds information for. */
1146
- Cluster* cluster ;
1144
+ /* * The sequence number of the cluster this holds information for. */
1145
+ uint64_t sequence ;
1147
1146
/* * All PartitionData entries belonging to the same partition are organized in a tree.
1148
1147
* Each element points to its parent, or to itself if it is the root. The root is then
1149
1148
* a representative for the entire tree, and can be found by walking upwards from any
@@ -1157,11 +1156,11 @@ void TxGraphImpl::GroupClusters(int level) noexcept
1157
1156
std::vector<PartitionData> partition_data;
1158
1157
1159
1158
/* * Given a Cluster, find its corresponding PartitionData. */
1160
- auto locate_fn = [&](Cluster* arg ) noexcept -> PartitionData* {
1161
- auto it = std::lower_bound (partition_data.begin (), partition_data.end (), arg ,
1162
- [](auto & a, Cluster* ptr ) noexcept { return CompareClusters (a. cluster , ptr) < 0 ; });
1159
+ auto locate_fn = [&](uint64_t sequence ) noexcept -> PartitionData* {
1160
+ auto it = std::lower_bound (partition_data.begin (), partition_data.end (), sequence ,
1161
+ [](auto & a, uint64_t seq ) noexcept { return a. sequence < seq ; });
1163
1162
Assume (it != partition_data.end ());
1164
- Assume (it->cluster == arg );
1163
+ Assume (it->sequence == sequence );
1165
1164
return &*it;
1166
1165
};
1167
1166
@@ -1196,67 +1195,59 @@ void TxGraphImpl::GroupClusters(int level) noexcept
1196
1195
// Start by initializing every Cluster as its own singleton partition.
1197
1196
partition_data.resize (an_clusters.size ());
1198
1197
for (size_t i = 0 ; i < an_clusters.size (); ++i) {
1199
- partition_data[i].cluster = an_clusters[i].first ;
1198
+ partition_data[i].sequence = an_clusters[i].first -> m_sequence ;
1200
1199
partition_data[i].parent = &partition_data[i];
1201
1200
partition_data[i].rank = 0 ;
1202
1201
}
1203
1202
1204
- // Run through all parent/child pairs in m_deps_to_add , and union the
1205
- // the partitions their Clusters are in.
1203
+ // Run through all parent/child pairs in an_deps , and union the partitions their Clusters
1204
+ // are in.
1206
1205
Cluster* last_chl_cluster{nullptr };
1207
1206
PartitionData* last_partition{nullptr };
1208
- for (const auto & [par, chl] : clusterset.m_deps_to_add ) {
1207
+ for (const auto & [dep, _] : an_deps) {
1208
+ auto [par, chl] = dep;
1209
1209
auto par_cluster = FindCluster (par, level);
1210
1210
auto chl_cluster = FindCluster (chl, level);
1211
+ Assume (chl_cluster != nullptr && par_cluster != nullptr );
1211
1212
// Nothing to do if parent and child are in the same Cluster.
1212
1213
if (par_cluster == chl_cluster) continue ;
1213
- // Nothing to do if either parent or child transaction is removed already.
1214
- if (par_cluster == nullptr || chl_cluster == nullptr ) continue ;
1215
1214
Assume (par != chl);
1216
1215
if (chl_cluster == last_chl_cluster) {
1217
1216
// If the child Clusters is the same as the previous iteration, union with the
1218
- // tree they were in, avoiding the need for another lookup. Note that m_deps_to_add
1217
+ // tree they were in, avoiding the need for another lookup. Note that an_deps
1219
1218
// is sorted by child Cluster, so batches with the same child are expected.
1220
- last_partition = union_fn (locate_fn (par_cluster), last_partition);
1219
+ last_partition = union_fn (locate_fn (par_cluster-> m_sequence ), last_partition);
1221
1220
} else {
1222
1221
last_chl_cluster = chl_cluster;
1223
- last_partition = union_fn (locate_fn (par_cluster), locate_fn (chl_cluster));
1222
+ last_partition = union_fn (locate_fn (par_cluster-> m_sequence ), locate_fn (chl_cluster-> m_sequence ));
1224
1223
}
1225
1224
}
1226
1225
1227
- // Populate the an_clusters and an_deps data structures with the list of input Clusters,
1228
- // and the input dependencies, annotated with the representative of the Cluster partition
1229
- // it applies to.
1230
- an_deps.reserve (clusterset.m_deps_to_add .size ());
1231
- auto deps_it = clusterset.m_deps_to_add .begin ();
1226
+ // Update the sequence numbers in an_clusters and an_deps to be those of the partition
1227
+ // representative.
1228
+ auto deps_it = an_deps.begin ();
1232
1229
for (size_t i = 0 ; i < partition_data.size (); ++i) {
1233
1230
auto & data = partition_data[i];
1234
- // Find the representative of the partition Cluster i is in, and store it with the
1235
- // Cluster.
1236
- auto rep = find_root_fn (&data)->cluster ;
1237
- Assume (an_clusters[i].second == nullptr );
1238
- an_clusters[i].second = rep;
1231
+ // Find the sequence of the representative of the partition Cluster i is in, and store
1232
+ // it with the Cluster.
1233
+ auto rep_seq = find_root_fn (&data)->sequence ;
1234
+ an_clusters[i].second = rep_seq;
1239
1235
// Find all dependencies whose child Cluster is Cluster i, and annotate them with rep.
1240
- while (deps_it != clusterset. m_deps_to_add .end ()) {
1241
- auto [par, chl] = * deps_it;
1236
+ while (deps_it != an_deps .end ()) {
1237
+ auto [par, chl] = deps_it-> first ;
1242
1238
auto chl_cluster = FindCluster (chl, level);
1243
- if (CompareClusters (chl_cluster, data.cluster ) > 0 ) break ;
1244
- // Skip dependencies that apply to earlier Clusters (those necessary are for
1245
- // deleted transactions, as otherwise we'd have processed them already).
1246
- if (chl_cluster == data.cluster ) {
1247
- auto par_cluster = FindCluster (par, level);
1248
- // Also filter out dependencies applying to a removed parent.
1249
- if (par_cluster != nullptr ) an_deps.emplace_back (*deps_it, rep);
1250
- }
1239
+ Assume (chl_cluster != nullptr );
1240
+ if (chl_cluster->m_sequence > data.sequence ) break ;
1241
+ deps_it->second = rep_seq;
1251
1242
++deps_it;
1252
1243
}
1253
1244
}
1254
1245
}
1255
1246
1256
- // Sort both an_clusters and an_deps by representative of the partition they are in, grouping
1257
- // all those applying to the same partition together.
1258
- std::sort (an_deps.begin (), an_deps.end (), [](auto & a, auto & b) noexcept { return CompareClusters ( a.second , b.second ) < 0 ; });
1259
- std::sort (an_clusters.begin (), an_clusters.end (), [](auto & a, auto & b) noexcept { return CompareClusters ( a.second , b.second ) < 0 ; });
1247
+ // Sort both an_clusters and an_deps by sequence number of the representative of the
1248
+ // partition they are in, grouping all those applying to the same partition together.
1249
+ std::sort (an_deps.begin (), an_deps.end (), [](auto & a, auto & b) noexcept { return a.second < b.second ; });
1250
+ std::sort (an_clusters.begin (), an_clusters.end (), [](auto & a, auto & b) noexcept { return a.second < b.second ; });
1260
1251
1261
1252
// Translate the resulting cluster groups to the m_group_data structure, and the dependencies
1262
1253
// back to m_deps_to_add.
0 commit comments