@@ -409,10 +409,8 @@ void graph_impl::markCGMemObjs(
409
409
}
410
410
}
411
411
412
- std::shared_ptr<node_impl> graph_impl::add (nodes_range Deps) {
413
- const std::shared_ptr<node_impl> &NodeImpl = std::make_shared<node_impl>();
414
-
415
- MNodeStorage.push_back (NodeImpl);
412
+ node_impl &graph_impl::add (nodes_range Deps) {
413
+ node_impl &NodeImpl = createNode ();
416
414
417
415
addDepsToNode (NodeImpl, Deps);
418
416
// Add an event associated with this explicit node for mixed usage
@@ -421,10 +419,9 @@ std::shared_ptr<node_impl> graph_impl::add(nodes_range Deps) {
421
419
return NodeImpl;
422
420
}
423
421
424
- std::shared_ptr<node_impl>
425
- graph_impl::add (std::function<void (handler &)> CGF,
426
- const std::vector<sycl::detail::ArgDesc> &Args,
427
- std::vector<std::shared_ptr<node_impl>> &Deps) {
422
+ node_impl &graph_impl::add (std::function<void (handler &)> CGF,
423
+ const std::vector<sycl::detail::ArgDesc> &Args,
424
+ nodes_range Deps) {
428
425
(void )Args;
429
426
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
430
427
detail::handler_impl HandlerImpl{*this };
@@ -435,7 +432,9 @@ graph_impl::add(std::function<void(handler &)> CGF,
435
432
436
433
// Pass the node deps to the handler so they are available when processing the
437
434
// CGF, need for async_malloc nodes.
438
- Handler.impl ->MNodeDeps = Deps;
435
+ Handler.impl ->MNodeDeps .clear ();
436
+ for (node_impl &N : Deps)
437
+ Handler.impl ->MNodeDeps .push_back (N.shared_from_this ());
439
438
440
439
#if XPTI_ENABLE_INSTRUMENTATION
441
440
// Save code location if one was set in TLS.
@@ -471,7 +470,7 @@ graph_impl::add(std::function<void(handler &)> CGF,
471
470
: ext::oneapi::experimental::detail::getNodeTypeFromCG (
472
471
Handler.getType ());
473
472
474
- auto NodeImpl =
473
+ node_impl & NodeImpl =
475
474
this ->add (NodeType, std::move (Handler.impl ->MGraphNodeCG ), Deps);
476
475
477
476
// Add an event associated with this explicit node for mixed usage
@@ -489,44 +488,40 @@ graph_impl::add(std::function<void(handler &)> CGF,
489
488
}
490
489
491
490
for (auto &[DynamicParam, ArgIndex] : DynamicParams) {
492
- DynamicParam->registerNode (NodeImpl, ArgIndex);
491
+ DynamicParam->registerNode (NodeImpl. shared_from_this () , ArgIndex);
493
492
}
494
493
495
494
return NodeImpl;
496
495
}
497
496
498
- std::shared_ptr<node_impl>
499
- graph_impl::add (node_type NodeType,
500
- std::shared_ptr<sycl::detail::CG> CommandGroup,
501
- nodes_range Deps) {
497
+ node_impl &graph_impl::add (node_type NodeType,
498
+ std::shared_ptr<sycl::detail::CG> CommandGroup,
499
+ nodes_range Deps) {
502
500
503
501
// A unique set of dependencies obtained by checking requirements and events
504
502
std::set<std::shared_ptr<node_impl>> UniqueDeps = getCGEdges (CommandGroup);
505
503
506
504
// Track and mark the memory objects being used by the graph.
507
505
markCGMemObjs (CommandGroup);
508
506
509
- const std::shared_ptr<node_impl> &NodeImpl =
510
- std::make_shared<node_impl>(NodeType, std::move (CommandGroup));
511
- MNodeStorage.push_back (NodeImpl);
507
+ node_impl &NodeImpl = createNode (NodeType, std::move (CommandGroup));
512
508
513
509
// Add any deps determined from requirements and events into the dependency
514
510
// list
515
511
addDepsToNode (NodeImpl, Deps);
516
512
addDepsToNode (NodeImpl, UniqueDeps);
517
513
518
514
if (NodeType == node_type::async_free) {
519
- auto AsyncFreeCG =
520
- static_cast <CGAsyncFree *>(NodeImpl->MCommandGroup .get ());
515
+ auto AsyncFreeCG = static_cast <CGAsyncFree *>(NodeImpl.MCommandGroup .get ());
521
516
// If this is an async free node mark that it is now available for reuse,
522
517
// and pass the async free node for tracking.
523
- MGraphMemPool.markAllocationAsAvailable (AsyncFreeCG->getPtr (), * NodeImpl);
518
+ MGraphMemPool.markAllocationAsAvailable (AsyncFreeCG->getPtr (), NodeImpl);
524
519
}
525
520
526
521
return NodeImpl;
527
522
}
528
523
529
- std::shared_ptr< node_impl>
524
+ node_impl &
530
525
graph_impl::add (std::shared_ptr<dynamic_command_group_impl> &DynCGImpl,
531
526
nodes_range Deps) {
532
527
// Set of Dependent nodes based on CG event and accessor dependencies.
@@ -551,15 +546,14 @@ graph_impl::add(std::shared_ptr<dynamic_command_group_impl> &DynCGImpl,
551
546
const auto &ActiveKernel = DynCGImpl->getActiveCG ();
552
547
node_type NodeType =
553
548
ext::oneapi::experimental::detail::getNodeTypeFromCG (DynCGImpl->MCGType );
554
- std::shared_ptr<detail::node_impl> NodeImpl =
555
- add (NodeType, ActiveKernel, Deps);
549
+ detail::node_impl &NodeImpl = add (NodeType, ActiveKernel, Deps);
556
550
557
551
// Add an event associated with this explicit node for mixed usage
558
552
addEventForNode (sycl::detail::event_impl::create_completed_host_event (),
559
553
NodeImpl);
560
554
561
555
// Track the dynamic command-group used inside the node object
562
- DynCGImpl->MNodes .push_back (NodeImpl);
556
+ DynCGImpl->MNodes .push_back (NodeImpl. shared_from_this () );
563
557
564
558
return NodeImpl;
565
559
}
@@ -652,7 +646,7 @@ void graph_impl::makeEdge(std::shared_ptr<node_impl> Src,
652
646
bool DestWasGraphRoot = Dest->MPredecessors .size () == 0 ;
653
647
654
648
// We need to add the edges first before checking for cycles
655
- Src->registerSuccessor (Dest);
649
+ Src->registerSuccessor (* Dest);
656
650
657
651
bool DestLostRootStatus = DestWasGraphRoot && Dest->MPredecessors .size () == 1 ;
658
652
if (DestLostRootStatus) {
@@ -1233,40 +1227,38 @@ exec_graph_impl::enqueue(sycl::detail::queue_impl &Queue,
1233
1227
1234
1228
void exec_graph_impl::duplicateNodes () {
1235
1229
// Map of original modifiable nodes (keys) to new duplicated nodes (values)
1236
- std::map<std::shared_ptr< node_impl>, std::shared_ptr< node_impl> > NodesMap;
1230
+ std::map<node_impl *, node_impl * > NodesMap;
1237
1231
1238
- const std::vector<std::shared_ptr<node_impl>> &ModifiableNodes =
1239
- MGraphImpl->MNodeStorage ;
1232
+ nodes_range ModifiableNodes{MGraphImpl->MNodeStorage };
1240
1233
std::deque<std::shared_ptr<node_impl>> NewNodes;
1241
1234
1242
- for (size_t i = 0 ; i < ModifiableNodes.size (); i++) {
1243
- auto OriginalNode = ModifiableNodes[i];
1244
- std::shared_ptr<node_impl> NodeCopy =
1245
- std::make_shared<node_impl>(*OriginalNode);
1235
+ for (node_impl &OriginalNode : ModifiableNodes) {
1236
+ NewNodes.push_back (std::make_shared<node_impl>(OriginalNode));
1237
+ node_impl &NodeCopy = *NewNodes.back ();
1246
1238
1247
1239
// Associate the ID of the original node with the node copy for later quick
1248
1240
// access
1249
- MIDCache.insert (std::make_pair (OriginalNode-> MID , NodeCopy));
1241
+ MIDCache.insert (std::make_pair (OriginalNode. MID , & NodeCopy));
1250
1242
1251
1243
// Clear edges between nodes so that we can replace with new ones
1252
- NodeCopy-> MSuccessors .clear ();
1253
- NodeCopy-> MPredecessors .clear ();
1244
+ NodeCopy. MSuccessors .clear ();
1245
+ NodeCopy. MPredecessors .clear ();
1254
1246
// Push the new node to the front of the stack
1255
- NewNodes.push_back (NodeCopy);
1256
1247
// Associate the new node with the old one for updating edges
1257
- NodesMap.insert ({OriginalNode, NodeCopy});
1248
+ NodesMap.insert ({& OriginalNode, & NodeCopy});
1258
1249
}
1259
1250
1260
1251
// Now that all nodes have been copied rebuild edges on new nodes. This must
1261
1252
// be done as a separate step since successors may be out of order.
1262
- for (size_t i = 0 ; i < ModifiableNodes.size (); i++) {
1263
- auto OriginalNode = ModifiableNodes[i];
1264
- auto NodeCopy = NewNodes[i];
1253
+ auto OrigIt = ModifiableNodes.begin (), OrigEnd = ModifiableNodes.end ();
1254
+ for (auto NewIt = NewNodes.begin (); OrigIt != OrigEnd; ++OrigIt, ++NewIt) {
1255
+ node_impl &OriginalNode = *OrigIt;
1256
+ node_impl &NodeCopy = **NewIt;
1265
1257
// Look through all the original node successors, find their copies and
1266
1258
// register those as successors with the current copied node
1267
- for (node_impl &NextNode : OriginalNode-> successors ()) {
1268
- auto Successor = NodesMap.at (NextNode. shared_from_this () );
1269
- NodeCopy-> registerSuccessor (Successor);
1259
+ for (node_impl &NextNode : OriginalNode. successors ()) {
1260
+ node_impl & Successor = * NodesMap.at (& NextNode);
1261
+ NodeCopy. registerSuccessor (Successor);
1270
1262
}
1271
1263
}
1272
1264
@@ -1279,48 +1271,47 @@ void exec_graph_impl::duplicateNodes() {
1279
1271
if (NewNode->MNodeType != node_type::subgraph) {
1280
1272
continue ;
1281
1273
}
1282
- const std::vector<std::shared_ptr<node_impl>> &SubgraphNodes =
1283
- NewNode->MSubGraphImpl ->MNodeStorage ;
1274
+ nodes_range SubgraphNodes{NewNode->MSubGraphImpl ->MNodeStorage };
1284
1275
std::deque<std::shared_ptr<node_impl>> NewSubgraphNodes{};
1285
1276
1286
1277
// Map of original subgraph nodes (keys) to new duplicated nodes (values)
1287
- std::map<std::shared_ptr<node_impl>, std::shared_ptr<node_impl>>
1288
- SubgraphNodesMap;
1278
+ std::map<node_impl *, node_impl *> SubgraphNodesMap;
1289
1279
1290
1280
// Copy subgraph nodes
1291
- for (size_t i = 0 ; i < SubgraphNodes. size (); i++ ) {
1292
- auto SubgraphNode = SubgraphNodes[i] ;
1293
- auto NodeCopy = std::make_shared<node_impl>(*SubgraphNode );
1281
+ for (node_impl &SubgraphNode : SubgraphNodes) {
1282
+ NewSubgraphNodes. push_back (std::make_shared<node_impl>( SubgraphNode)) ;
1283
+ node_impl & NodeCopy = *NewSubgraphNodes. back ( );
1294
1284
// Associate the ID of the original subgraph node with all extracted node
1295
1285
// copies for future quick access.
1296
- MIDCache.insert (std::make_pair (SubgraphNode-> MID , NodeCopy));
1286
+ MIDCache.insert (std::make_pair (SubgraphNode. MID , & NodeCopy));
1297
1287
1298
- NewSubgraphNodes.push_back (NodeCopy);
1299
- SubgraphNodesMap.insert ({SubgraphNode, NodeCopy});
1300
- NodeCopy->MSuccessors .clear ();
1301
- NodeCopy->MPredecessors .clear ();
1288
+ SubgraphNodesMap.insert ({&SubgraphNode, &NodeCopy});
1289
+ NodeCopy.MSuccessors .clear ();
1290
+ NodeCopy.MPredecessors .clear ();
1302
1291
}
1303
1292
1304
1293
// Rebuild edges for new subgraph nodes
1305
- for (size_t i = 0 ; i < SubgraphNodes.size (); i++) {
1306
- auto SubgraphNode = SubgraphNodes[i];
1307
- auto NodeCopy = NewSubgraphNodes[i];
1308
-
1309
- for (node_impl &NextNode : SubgraphNode->successors ()) {
1310
- auto Successor = SubgraphNodesMap.at (NextNode.shared_from_this ());
1311
- NodeCopy->registerSuccessor (Successor);
1294
+ auto OrigIt = SubgraphNodes.begin (), OrigEnd = SubgraphNodes.end ();
1295
+ for (auto NewIt = NewSubgraphNodes.begin (); OrigIt != OrigEnd;
1296
+ ++OrigIt, ++NewIt) {
1297
+ node_impl &SubgraphNode = *OrigIt;
1298
+ node_impl &NodeCopy = **NewIt;
1299
+
1300
+ for (node_impl &NextNode : SubgraphNode.successors ()) {
1301
+ node_impl &Successor = *SubgraphNodesMap.at (&NextNode);
1302
+ NodeCopy.registerSuccessor (Successor);
1312
1303
}
1313
1304
}
1314
1305
1315
1306
// Collect input and output nodes for the subgraph
1316
- std::vector<std::shared_ptr< node_impl> > Inputs;
1317
- std::vector<std::shared_ptr< node_impl> > Outputs;
1318
- for (auto &NodeImpl : NewSubgraphNodes) {
1307
+ std::vector<node_impl * > Inputs;
1308
+ std::vector<node_impl * > Outputs;
1309
+ for (std::shared_ptr<node_impl> &NodeImpl : NewSubgraphNodes) {
1319
1310
if (NodeImpl->MPredecessors .size () == 0 ) {
1320
- Inputs.push_back (NodeImpl);
1311
+ Inputs.push_back (&* NodeImpl);
1321
1312
}
1322
1313
if (NodeImpl->MSuccessors .size () == 0 ) {
1323
- Outputs.push_back (NodeImpl);
1314
+ Outputs.push_back (&* NodeImpl);
1324
1315
}
1325
1316
}
1326
1317
@@ -1340,8 +1331,8 @@ void exec_graph_impl::duplicateNodes() {
1340
1331
1341
1332
// Add all input nodes from the subgraph as successors for this node
1342
1333
// instead
1343
- for (auto & Input : Inputs) {
1344
- PredNode.registerSuccessor (Input);
1334
+ for (node_impl * Input : Inputs) {
1335
+ PredNode.registerSuccessor (* Input);
1345
1336
}
1346
1337
}
1347
1338
@@ -1359,8 +1350,8 @@ void exec_graph_impl::duplicateNodes() {
1359
1350
1360
1351
// Add all Output nodes from the subgraph as predecessors for this node
1361
1352
// instead
1362
- for (auto & Output : Outputs) {
1363
- Output->registerSuccessor (SuccNode. shared_from_this () );
1353
+ for (node_impl * Output : Outputs) {
1354
+ Output->registerSuccessor (SuccNode);
1364
1355
}
1365
1356
}
1366
1357
@@ -1370,15 +1361,18 @@ void exec_graph_impl::duplicateNodes() {
1370
1361
NewNodes.erase (std::find (NewNodes.begin (), NewNodes.end (), NewNode));
1371
1362
// Also set the iterator to the newly added nodes so we can continue
1372
1363
// iterating over all remaining nodes
1373
- auto InsertIt = NewNodes.insert (OldPositionIt, NewSubgraphNodes.begin (),
1374
- NewSubgraphNodes.end ());
1364
+ auto InsertIt = NewNodes.insert (
1365
+ OldPositionIt, std::make_move_iterator (NewSubgraphNodes.begin ()),
1366
+ std::make_move_iterator (NewSubgraphNodes.end ()));
1375
1367
// Since the new reverse_iterator will be at i - 1 we need to advance it
1376
1368
// when constructing
1377
1369
NewNodeIt = std::make_reverse_iterator (std::next (InsertIt));
1378
1370
}
1379
1371
1380
1372
// Store all the new nodes locally
1381
- MNodeStorage.insert (MNodeStorage.begin (), NewNodes.begin (), NewNodes.end ());
1373
+ MNodeStorage.insert (MNodeStorage.begin (),
1374
+ std::make_move_iterator (NewNodes.begin ()),
1375
+ std::make_move_iterator (NewNodes.end ()));
1382
1376
}
1383
1377
1384
1378
void exec_graph_impl::update (std::shared_ptr<graph_impl> GraphImpl) {
@@ -1443,7 +1437,7 @@ void exec_graph_impl::update(std::shared_ptr<graph_impl> GraphImpl) {
1443
1437
1444
1438
for (uint32_t i = 0 ; i < MNodeStorage.size (); ++i) {
1445
1439
MIDCache.insert (
1446
- std::make_pair (GraphImpl->MNodeStorage [i]->MID , MNodeStorage[i]));
1440
+ std::make_pair (GraphImpl->MNodeStorage [i]->MID , MNodeStorage[i]. get () ));
1447
1441
}
1448
1442
1449
1443
update (GraphImpl->MNodeStorage );
@@ -1716,7 +1710,7 @@ void exec_graph_impl::populateURKernelUpdateStructs(
1716
1710
auto ExecNode = MIDCache.find (Node->MID );
1717
1711
assert (ExecNode != MIDCache.end () && " Node ID was not found in ID cache" );
1718
1712
1719
- auto Command = MCommandMap.find (ExecNode->second . get () );
1713
+ auto Command = MCommandMap.find (ExecNode->second );
1720
1714
assert (Command != MCommandMap.end ());
1721
1715
UpdateDesc.hCommand = Command->second ;
1722
1716
@@ -1746,7 +1740,7 @@ exec_graph_impl::getURUpdatableNodes(
1746
1740
1747
1741
auto ExecNode = MIDCache.find (Node->MID );
1748
1742
assert (ExecNode != MIDCache.end () && " Node ID was not found in ID cache" );
1749
- auto PartitionIndex = MPartitionNodes.find (ExecNode->second . get () );
1743
+ auto PartitionIndex = MPartitionNodes.find (ExecNode->second );
1750
1744
assert (PartitionIndex != MPartitionNodes.end ());
1751
1745
PartitionedNodes[PartitionIndex->second ].push_back (Node);
1752
1746
}
@@ -1843,38 +1837,25 @@ node modifiable_command_graph::addImpl(dynamic_command_group &DynCGF,
1843
1837
" dynamic command-group." );
1844
1838
}
1845
1839
1846
- std::vector<std::shared_ptr<detail::node_impl>> DepImpls;
1847
- for (auto &D : Deps) {
1848
- DepImpls.push_back (sycl::detail::getSyclObjImpl (D));
1849
- }
1850
-
1851
1840
graph_impl::WriteLock Lock (impl->MMutex );
1852
- std::shared_ptr< detail::node_impl> NodeImpl = impl->add (DynCGFImpl, DepImpls );
1853
- return sycl::detail::createSyclObjFromImpl<node>(std::move ( NodeImpl) );
1841
+ detail::node_impl & NodeImpl = impl->add (DynCGFImpl, Deps );
1842
+ return sycl::detail::createSyclObjFromImpl<node>(NodeImpl);
1854
1843
}
1855
1844
1856
1845
node modifiable_command_graph::addImpl (const std::vector<node> &Deps) {
1857
1846
impl->throwIfGraphRecordingQueue (" Explicit API \" Add()\" function" );
1858
- std::vector<std::shared_ptr<detail::node_impl>> DepImpls;
1859
- for (auto &D : Deps) {
1860
- DepImpls.push_back (sycl::detail::getSyclObjImpl (D));
1861
- }
1862
1847
1863
1848
graph_impl::WriteLock Lock (impl->MMutex );
1864
- std::shared_ptr< detail::node_impl> NodeImpl = impl->add (DepImpls );
1865
- return sycl::detail::createSyclObjFromImpl<node>(std::move ( NodeImpl) );
1849
+ detail::node_impl & NodeImpl = impl->add (Deps );
1850
+ return sycl::detail::createSyclObjFromImpl<node>(NodeImpl);
1866
1851
}
1867
1852
1868
1853
node modifiable_command_graph::addImpl (std::function<void (handler &)> CGF,
1869
1854
const std::vector<node> &Deps) {
1870
1855
impl->throwIfGraphRecordingQueue (" Explicit API \" Add()\" function" );
1871
- std::vector<std::shared_ptr<detail::node_impl>> DepImpls;
1872
- for (auto &D : Deps) {
1873
- DepImpls.push_back (sycl::detail::getSyclObjImpl (D));
1874
- }
1875
1856
1876
- std::shared_ptr< detail::node_impl> NodeImpl = impl->add (CGF, {}, DepImpls );
1877
- return sycl::detail::createSyclObjFromImpl<node>(std::move ( NodeImpl) );
1857
+ detail::node_impl & NodeImpl = impl->add (CGF, {}, Deps );
1858
+ return sycl::detail::createSyclObjFromImpl<node>(NodeImpl);
1878
1859
}
1879
1860
1880
1861
void modifiable_command_graph::addGraphLeafDependencies (node Node) {
0 commit comments