Skip to content

Commit d42db5e

Browse files
[NFC][SYCL][Graph] Avoid unnecessary shared_ptr in duplicateNodes
Changes are mostly local to the routine, but I've updated `MIDCache` to use raw `node_impl *` while on it too because `duplicateNodes` accounts for 40% of the places that need updates after the change of the `MIDCache`.
1 parent dd65259 commit d42db5e

File tree

2 files changed

+51
-52
lines changed

2 files changed

+51
-52
lines changed

sycl/source/detail/graph/graph_impl.cpp

Lines changed: 50 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1227,40 +1227,38 @@ exec_graph_impl::enqueue(sycl::detail::queue_impl &Queue,
12271227

12281228
void exec_graph_impl::duplicateNodes() {
12291229
// Map of original modifiable nodes (keys) to new duplicated nodes (values)
1230-
std::map<std::shared_ptr<node_impl>, std::shared_ptr<node_impl>> NodesMap;
1230+
std::map<node_impl *, node_impl *> NodesMap;
12311231

1232-
const std::vector<std::shared_ptr<node_impl>> &ModifiableNodes =
1233-
MGraphImpl->MNodeStorage;
1232+
nodes_range ModifiableNodes{MGraphImpl->MNodeStorage};
12341233
std::deque<std::shared_ptr<node_impl>> NewNodes;
12351234

1236-
for (size_t i = 0; i < ModifiableNodes.size(); i++) {
1237-
auto OriginalNode = ModifiableNodes[i];
1238-
std::shared_ptr<node_impl> NodeCopy =
1239-
std::make_shared<node_impl>(*OriginalNode);
1235+
for (node_impl &OriginalNode : ModifiableNodes) {
1236+
NewNodes.push_back(std::make_shared<node_impl>(OriginalNode));
1237+
node_impl &NodeCopy = *NewNodes.back();
12401238

12411239
// Associate the ID of the original node with the node copy for later quick
12421240
// access
1243-
MIDCache.insert(std::make_pair(OriginalNode->MID, NodeCopy));
1241+
MIDCache.insert(std::make_pair(OriginalNode.MID, &NodeCopy));
12441242

12451243
// Clear edges between nodes so that we can replace with new ones
1246-
NodeCopy->MSuccessors.clear();
1247-
NodeCopy->MPredecessors.clear();
1244+
NodeCopy.MSuccessors.clear();
1245+
NodeCopy.MPredecessors.clear();
12481246
// Push the new node to the front of the stack
1249-
NewNodes.push_back(NodeCopy);
12501247
// Associate the new node with the old one for updating edges
1251-
NodesMap.insert({OriginalNode, NodeCopy});
1248+
NodesMap.insert({&OriginalNode, &NodeCopy});
12521249
}
12531250

12541251
// Now that all nodes have been copied rebuild edges on new nodes. This must
12551252
// be done as a separate step since successors may be out of order.
1256-
for (size_t i = 0; i < ModifiableNodes.size(); i++) {
1257-
auto OriginalNode = ModifiableNodes[i];
1258-
auto NodeCopy = NewNodes[i];
1253+
auto OrigIt = ModifiableNodes.begin(), OrigEnd = ModifiableNodes.end();
1254+
for (auto NewIt = NewNodes.begin(); OrigIt != OrigEnd; ++OrigIt, ++NewIt) {
1255+
node_impl &OriginalNode = *OrigIt;
1256+
node_impl &NodeCopy = **NewIt;
12591257
// Look through all the original node successors, find their copies and
12601258
// register those as successors with the current copied node
1261-
for (node_impl &NextNode : OriginalNode->successors()) {
1262-
node_impl &Successor = *NodesMap.at(NextNode.shared_from_this());
1263-
NodeCopy->registerSuccessor(Successor);
1259+
for (node_impl &NextNode : OriginalNode.successors()) {
1260+
node_impl &Successor = *NodesMap.at(&NextNode);
1261+
NodeCopy.registerSuccessor(Successor);
12641262
}
12651263
}
12661264

@@ -1273,49 +1271,47 @@ void exec_graph_impl::duplicateNodes() {
12731271
if (NewNode->MNodeType != node_type::subgraph) {
12741272
continue;
12751273
}
1276-
const std::vector<std::shared_ptr<node_impl>> &SubgraphNodes =
1277-
NewNode->MSubGraphImpl->MNodeStorage;
1274+
nodes_range SubgraphNodes{NewNode->MSubGraphImpl->MNodeStorage};
12781275
std::deque<std::shared_ptr<node_impl>> NewSubgraphNodes{};
12791276

12801277
// Map of original subgraph nodes (keys) to new duplicated nodes (values)
1281-
std::map<std::shared_ptr<node_impl>, std::shared_ptr<node_impl>>
1282-
SubgraphNodesMap;
1278+
std::map<node_impl *, node_impl *> SubgraphNodesMap;
12831279

12841280
// Copy subgraph nodes
1285-
for (size_t i = 0; i < SubgraphNodes.size(); i++) {
1286-
auto SubgraphNode = SubgraphNodes[i];
1287-
auto NodeCopy = std::make_shared<node_impl>(*SubgraphNode);
1281+
for (node_impl &SubgraphNode : SubgraphNodes) {
1282+
NewSubgraphNodes.push_back(std::make_shared<node_impl>(SubgraphNode));
1283+
node_impl &NodeCopy = *NewSubgraphNodes.back();
12881284
// Associate the ID of the original subgraph node with all extracted node
12891285
// copies for future quick access.
1290-
MIDCache.insert(std::make_pair(SubgraphNode->MID, NodeCopy));
1286+
MIDCache.insert(std::make_pair(SubgraphNode.MID, &NodeCopy));
12911287

1292-
NewSubgraphNodes.push_back(NodeCopy);
1293-
SubgraphNodesMap.insert({SubgraphNode, NodeCopy});
1294-
NodeCopy->MSuccessors.clear();
1295-
NodeCopy->MPredecessors.clear();
1288+
SubgraphNodesMap.insert({&SubgraphNode, &NodeCopy});
1289+
NodeCopy.MSuccessors.clear();
1290+
NodeCopy.MPredecessors.clear();
12961291
}
12971292

12981293
// Rebuild edges for new subgraph nodes
1299-
for (size_t i = 0; i < SubgraphNodes.size(); i++) {
1300-
auto SubgraphNode = SubgraphNodes[i];
1301-
auto NodeCopy = NewSubgraphNodes[i];
1302-
1303-
for (node_impl &NextNode : SubgraphNode->successors()) {
1304-
node_impl &Successor =
1305-
*SubgraphNodesMap.at(NextNode.shared_from_this());
1306-
NodeCopy->registerSuccessor(Successor);
1294+
auto OrigIt = SubgraphNodes.begin(), OrigEnd = SubgraphNodes.end();
1295+
for (auto NewIt = NewSubgraphNodes.begin(); OrigIt != OrigEnd;
1296+
++OrigIt, ++NewIt) {
1297+
node_impl &SubgraphNode = *OrigIt;
1298+
node_impl &NodeCopy = **NewIt;
1299+
1300+
for (node_impl &NextNode : SubgraphNode.successors()) {
1301+
node_impl &Successor = *SubgraphNodesMap.at(&NextNode);
1302+
NodeCopy.registerSuccessor(Successor);
13071303
}
13081304
}
13091305

13101306
// Collect input and output nodes for the subgraph
1311-
std::vector<std::shared_ptr<node_impl>> Inputs;
1312-
std::vector<std::shared_ptr<node_impl>> Outputs;
1313-
for (auto &NodeImpl : NewSubgraphNodes) {
1307+
std::vector<node_impl *> Inputs;
1308+
std::vector<node_impl *> Outputs;
1309+
for (std::shared_ptr<node_impl> &NodeImpl : NewSubgraphNodes) {
13141310
if (NodeImpl->MPredecessors.size() == 0) {
1315-
Inputs.push_back(NodeImpl);
1311+
Inputs.push_back(&*NodeImpl);
13161312
}
13171313
if (NodeImpl->MSuccessors.size() == 0) {
1318-
Outputs.push_back(NodeImpl);
1314+
Outputs.push_back(&*NodeImpl);
13191315
}
13201316
}
13211317

@@ -1335,7 +1331,7 @@ void exec_graph_impl::duplicateNodes() {
13351331

13361332
// Add all input nodes from the subgraph as successors for this node
13371333
// instead
1338-
for (auto &Input : Inputs) {
1334+
for (node_impl *Input : Inputs) {
13391335
PredNode.registerSuccessor(*Input);
13401336
}
13411337
}
@@ -1354,7 +1350,7 @@ void exec_graph_impl::duplicateNodes() {
13541350

13551351
// Add all Output nodes from the subgraph as predecessors for this node
13561352
// instead
1357-
for (auto &Output : Outputs) {
1353+
for (node_impl *Output : Outputs) {
13581354
Output->registerSuccessor(SuccNode);
13591355
}
13601356
}
@@ -1365,15 +1361,18 @@ void exec_graph_impl::duplicateNodes() {
13651361
NewNodes.erase(std::find(NewNodes.begin(), NewNodes.end(), NewNode));
13661362
// Also set the iterator to the newly added nodes so we can continue
13671363
// iterating over all remaining nodes
1368-
auto InsertIt = NewNodes.insert(OldPositionIt, NewSubgraphNodes.begin(),
1369-
NewSubgraphNodes.end());
1364+
auto InsertIt = NewNodes.insert(
1365+
OldPositionIt, std::make_move_iterator(NewSubgraphNodes.begin()),
1366+
std::make_move_iterator(NewSubgraphNodes.end()));
13701367
// Since the new reverse_iterator will be at i - 1 we need to advance it
13711368
// when constructing
13721369
NewNodeIt = std::make_reverse_iterator(std::next(InsertIt));
13731370
}
13741371

13751372
// Store all the new nodes locally
1376-
MNodeStorage.insert(MNodeStorage.begin(), NewNodes.begin(), NewNodes.end());
1373+
MNodeStorage.insert(MNodeStorage.begin(),
1374+
std::make_move_iterator(NewNodes.begin()),
1375+
std::make_move_iterator(NewNodes.end()));
13771376
}
13781377

13791378
void exec_graph_impl::update(std::shared_ptr<graph_impl> GraphImpl) {
@@ -1438,7 +1437,7 @@ void exec_graph_impl::update(std::shared_ptr<graph_impl> GraphImpl) {
14381437

14391438
for (uint32_t i = 0; i < MNodeStorage.size(); ++i) {
14401439
MIDCache.insert(
1441-
std::make_pair(GraphImpl->MNodeStorage[i]->MID, MNodeStorage[i]));
1440+
std::make_pair(GraphImpl->MNodeStorage[i]->MID, MNodeStorage[i].get()));
14421441
}
14431442

14441443
update(GraphImpl->MNodeStorage);
@@ -1711,7 +1710,7 @@ void exec_graph_impl::populateURKernelUpdateStructs(
17111710
auto ExecNode = MIDCache.find(Node->MID);
17121711
assert(ExecNode != MIDCache.end() && "Node ID was not found in ID cache");
17131712

1714-
auto Command = MCommandMap.find(ExecNode->second.get());
1713+
auto Command = MCommandMap.find(ExecNode->second);
17151714
assert(Command != MCommandMap.end());
17161715
UpdateDesc.hCommand = Command->second;
17171716

@@ -1741,7 +1740,7 @@ exec_graph_impl::getURUpdatableNodes(
17411740

17421741
auto ExecNode = MIDCache.find(Node->MID);
17431742
assert(ExecNode != MIDCache.end() && "Node ID was not found in ID cache");
1744-
auto PartitionIndex = MPartitionNodes.find(ExecNode->second.get());
1743+
auto PartitionIndex = MPartitionNodes.find(ExecNode->second);
17451744
assert(PartitionIndex != MPartitionNodes.end());
17461745
PartitionedNodes[PartitionIndex->second].push_back(Node);
17471746
}

sycl/source/detail/graph/graph_impl.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -917,7 +917,7 @@ class exec_graph_impl {
917917

918918
// Stores a cache of node ids from modifiable graph nodes to the companion
919919
// node(s) in this graph. Used for quick access when updating this graph.
920-
std::multimap<node_impl::id_type, std::shared_ptr<node_impl>> MIDCache;
920+
std::multimap<node_impl::id_type, node_impl *> MIDCache;
921921

922922
unsigned long long MID;
923923
// Used for std::hash in order to create a unique hash for the instance.

0 commit comments

Comments
 (0)