@@ -435,9 +435,300 @@ ur_result_t urEnqueueEventsWaitWithBarrierExt(
435
435
*OutEvent // /< [in,out][optional] return an event object that identifies
436
436
// /< this particular command instance.
437
437
) {
438
- return ur::level_zero::urEnqueueEventsWaitWithBarrier (
439
- Queue, NumEventsInWaitList, EventWaitList, OutEvent);
438
+ bool InterruptBased =
439
+ EnqueueExtProp &&
440
+ (EnqueueExtProp->flags & UR_EXP_ENQUEUE_EXT_FLAG_LOW_POWER_EVENTS);
441
+ if (!InterruptBased) {
442
+ return ur::level_zero::urEnqueueEventsWaitWithBarrier (
443
+ Queue, NumEventsInWaitList, EventWaitList, OutEvent);
444
+ }
445
+ // Lock automatically releases when this goes out of scope.
446
+ std::scoped_lock<ur_shared_mutex> lock (Queue->Mutex );
447
+
448
+ // Helper function for appending a barrier to a command list.
449
+ auto insertBarrierIntoCmdList = [&Queue](ur_command_list_ptr_t CmdList,
450
+ _ur_ze_event_list_t &EventWaitList,
451
+ ur_event_handle_t &Event,
452
+ bool IsInternal) {
453
+ UR_CALL (createEventAndAssociateQueue (
454
+ Queue, &Event, UR_COMMAND_EVENTS_WAIT_WITH_BARRIER, CmdList, IsInternal,
455
+ false , std::nullopt, true ));
456
+ Event->WaitList = EventWaitList;
457
+
458
+ // For in-order queue we don't need a real barrier, just wait for
459
+ // requested events in potentially different queues and add a "barrier"
460
+ // event signal because it is already guaranteed that previous commands
461
+ // in this queue are completed when the signal is started.
462
+ //
463
+ // Only consideration here is that when profiling is used, signalEvent
464
+ // cannot be used if EventWaitList.Length == 0. In those cases, we need
465
+ // to fallback directly to barrier to have correct timestamps. See here:
466
+ // https://spec.oneapi.io/level-zero/latest/core/api.html?highlight=appendsignalevent#_CPPv430zeCommandListAppendSignalEvent24ze_command_list_handle_t17ze_event_handle_t
467
+ //
468
+ // TODO: this and other special handling of in-order queues to be
469
+ // updated when/if Level Zero adds native support for in-order queues.
470
+ //
471
+ if (Queue->isInOrderQueue () && InOrderBarrierBySignal &&
472
+ !Queue->isProfilingEnabled ()) {
473
+ if (EventWaitList.Length ) {
474
+ if (CmdList->second .IsInOrderList ) {
475
+ for (unsigned i = EventWaitList.Length ; i-- > 0 ;) {
476
+ // If the event is a multidevice event, then given driver in order
477
+ // lists, we cannot include this into the wait event list due to
478
+ // driver limitations.
479
+ if (EventWaitList.UrEventList [i]->IsMultiDevice ) {
480
+ EventWaitList.Length --;
481
+ if (EventWaitList.Length != i) {
482
+ std::swap (EventWaitList.UrEventList [i],
483
+ EventWaitList.UrEventList [EventWaitList.Length ]);
484
+ std::swap (EventWaitList.ZeEventList [i],
485
+ EventWaitList.ZeEventList [EventWaitList.Length ]);
486
+ }
487
+ }
488
+ }
489
+ }
490
+ ZE2UR_CALL (
491
+ zeCommandListAppendWaitOnEvents,
492
+ (CmdList->first , EventWaitList.Length , EventWaitList.ZeEventList ));
493
+ }
494
+ ZE2UR_CALL (zeCommandListAppendSignalEvent,
495
+ (CmdList->first , Event->ZeEvent ));
496
+ } else {
497
+ ZE2UR_CALL (zeCommandListAppendBarrier,
498
+ (CmdList->first , Event->ZeEvent , EventWaitList.Length ,
499
+ EventWaitList.ZeEventList ));
500
+ }
501
+
502
+ return UR_RESULT_SUCCESS;
503
+ };
504
+
505
+ // If the queue is in-order then each command in it effectively acts as a
506
+ // barrier, so we don't need to do anything except if we were requested
507
+ // a "barrier" event to be created. Or if we need to wait for events in
508
+ // potentially different queues.
509
+ //
510
+ if (Queue->isInOrderQueue () && NumEventsInWaitList == 0 && !OutEvent) {
511
+ return UR_RESULT_SUCCESS;
512
+ }
513
+
514
+ ur_event_handle_t ResultEvent = nullptr ;
515
+ bool IsInternal = OutEvent == nullptr ;
516
+ // For in-order queue and wait-list which is empty or has events from
517
+ // the same queue just use the last command event as the barrier event.
518
+ // This optimization is disabled when profiling is enabled to ensure
519
+ // accurate profiling values & the overhead that profiling incurs.
520
+ if (Queue->isInOrderQueue () && !Queue->isProfilingEnabled () &&
521
+ WaitListEmptyOrAllEventsFromSameQueue (Queue, NumEventsInWaitList,
522
+ EventWaitList) &&
523
+ Queue->LastCommandEvent && !Queue->LastCommandEvent ->IsDiscarded ) {
524
+ UR_CALL (ur::level_zero::urEventRetain (Queue->LastCommandEvent ));
525
+ ResultEvent = Queue->LastCommandEvent ;
526
+ if (OutEvent) {
527
+ *OutEvent = ResultEvent;
528
+ }
529
+ return UR_RESULT_SUCCESS;
530
+ }
531
+
532
+ // Indicator for whether batching is allowed. This may be changed later in
533
+ // this function, but allow it by default.
534
+ bool OkToBatch = true ;
535
+
536
+ // If we have a list of events to make the barrier from, then we can create a
537
+ // barrier on these and use the resulting event as our future barrier.
538
+ // We use the same approach if
539
+ // UR_L0_USE_MULTIPLE_COMMANDLIST_BARRIERS is not set to a
540
+ // positive value.
541
+ // We use the same approach if we have in-order queue because every command
542
+ // depends on previous one, so we don't need to insert barrier to multiple
543
+ // command lists.
544
+ if (NumEventsInWaitList || !UseMultipleCmdlistBarriers ||
545
+ Queue->isInOrderQueue ()) {
546
+ // Retain the events as they will be owned by the result event.
547
+ _ur_ze_event_list_t TmpWaitList;
548
+ UR_CALL (TmpWaitList.createAndRetainUrZeEventList (
549
+ NumEventsInWaitList, EventWaitList, Queue, false /* UseCopyEngine=*/ ));
550
+
551
+ // Get an arbitrary command-list in the queue.
552
+ ur_command_list_ptr_t CmdList;
553
+ UR_CALL (Queue->Context ->getAvailableCommandList (
554
+ Queue, CmdList, false /* UseCopyEngine=*/ , NumEventsInWaitList,
555
+ EventWaitList, OkToBatch, nullptr /* ForcedCmdQueue*/ ));
556
+
557
+ // Insert the barrier into the command-list and execute.
558
+ UR_CALL (insertBarrierIntoCmdList (CmdList, TmpWaitList, ResultEvent,
559
+ IsInternal));
560
+
561
+ UR_CALL (
562
+ Queue->executeCommandList (CmdList, false /* IsBlocking*/ , OkToBatch));
563
+
564
+ // Because of the dependency between commands in the in-order queue we don't
565
+ // need to keep track of any active barriers if we have in-order queue.
566
+ if (UseMultipleCmdlistBarriers && !Queue->isInOrderQueue ()) {
567
+ auto UREvent = reinterpret_cast <ur_event_handle_t >(ResultEvent);
568
+ Queue->ActiveBarriers .add (UREvent);
569
+ }
570
+
571
+ if (OutEvent) {
572
+ *OutEvent = ResultEvent;
573
+ }
574
+ return UR_RESULT_SUCCESS;
575
+ }
576
+
577
+ // Since there are no events to explicitly create a barrier for, we are
578
+ // inserting a queue-wide barrier.
579
+
580
+ // Command list(s) for putting barriers.
581
+ std::vector<ur_command_list_ptr_t > CmdLists;
582
+
583
+ // There must be at least one L0 queue.
584
+ auto &ComputeGroup = Queue->ComputeQueueGroupsByTID .get ();
585
+ auto &CopyGroup = Queue->CopyQueueGroupsByTID .get ();
586
+ UR_ASSERT (!ComputeGroup.ZeQueues .empty () || !CopyGroup.ZeQueues .empty (),
587
+ UR_RESULT_ERROR_INVALID_QUEUE);
588
+
589
+ size_t NumQueues = 0 ;
590
+ for (auto &QueueMap :
591
+ {Queue->ComputeQueueGroupsByTID , Queue->CopyQueueGroupsByTID })
592
+ for (auto &QueueGroup : QueueMap)
593
+ NumQueues += QueueGroup.second .ZeQueues .size ();
594
+
595
+ OkToBatch = true ;
596
+ // Get an available command list tied to each command queue. We need
597
+ // these so a queue-wide barrier can be inserted into each command
598
+ // queue.
599
+ CmdLists.reserve (NumQueues);
600
+ for (auto &QueueMap :
601
+ {Queue->ComputeQueueGroupsByTID , Queue->CopyQueueGroupsByTID })
602
+ for (auto &QueueGroup : QueueMap) {
603
+ bool UseCopyEngine =
604
+ QueueGroup.second .Type != ur_queue_handle_t_::queue_type::Compute;
605
+ if (Queue->UsingImmCmdLists ) {
606
+ // If immediate command lists are being used, each will act as their own
607
+ // queue, so we must insert a barrier into each.
608
+ for (auto &ImmCmdList : QueueGroup.second .ImmCmdLists )
609
+ if (ImmCmdList != Queue->CommandListMap .end ())
610
+ CmdLists.push_back (ImmCmdList);
611
+ } else {
612
+ for (auto ZeQueue : QueueGroup.second .ZeQueues ) {
613
+ if (ZeQueue) {
614
+ ur_command_list_ptr_t CmdList;
615
+ UR_CALL (Queue->Context ->getAvailableCommandList (
616
+ Queue, CmdList, UseCopyEngine, NumEventsInWaitList,
617
+ EventWaitList, OkToBatch, &ZeQueue));
618
+ CmdLists.push_back (CmdList);
619
+ }
620
+ }
621
+ }
622
+ }
623
+
624
+ // If no activity has occurred on the queue then there will be no cmdlists.
625
+ // We need one for generating an Event, so create one.
626
+ if (CmdLists.size () == 0 ) {
627
+ // Get any available command list.
628
+ ur_command_list_ptr_t CmdList;
629
+ UR_CALL (Queue->Context ->getAvailableCommandList (
630
+ Queue, CmdList, false /* UseCopyEngine=*/ , NumEventsInWaitList,
631
+ EventWaitList, OkToBatch, nullptr /* ForcedCmdQueue*/ ));
632
+ CmdLists.push_back (CmdList);
633
+ }
634
+
635
+ if (CmdLists.size () > 1 ) {
636
+ // Insert a barrier into each unique command queue using the available
637
+ // command-lists.
638
+ std::vector<ur_event_handle_t > EventWaitVector (CmdLists.size ());
639
+ for (size_t I = 0 ; I < CmdLists.size (); ++I) {
640
+ _ur_ze_event_list_t waitlist;
641
+ UR_CALL (insertBarrierIntoCmdList (
642
+ CmdLists[I], waitlist, EventWaitVector[I], true /* IsInternal*/ ));
643
+ }
644
+ // If there were multiple queues we need to create a "convergence" event to
645
+ // be our active barrier. This convergence event is signalled by a barrier
646
+ // on all the events from the barriers we have inserted into each queue.
647
+ // Use the first command list as our convergence command list.
648
+ ur_command_list_ptr_t &ConvergenceCmdList = CmdLists[0 ];
649
+
650
+ // Create an event list. It will take ownership over all relevant events so
651
+ // we relinquish ownership and let it keep all events it needs.
652
+ _ur_ze_event_list_t BaseWaitList;
653
+ UR_CALL (BaseWaitList.createAndRetainUrZeEventList (
654
+ EventWaitVector.size (),
655
+ reinterpret_cast <const ur_event_handle_t *>(EventWaitVector.data ()),
656
+ Queue, ConvergenceCmdList->second .isCopy (Queue)));
657
+
658
+ // Insert a barrier with the events from each command-queue into the
659
+ // convergence command list. The resulting event signals the convergence of
660
+ // all barriers.
661
+ UR_CALL (insertBarrierIntoCmdList (ConvergenceCmdList, BaseWaitList,
662
+ ResultEvent, IsInternal));
663
+ } else {
664
+ // If there is only a single queue then insert a barrier and the single
665
+ // result event can be used as our active barrier and used as the return
666
+ // event. Take into account whether output event is discarded or not.
667
+ _ur_ze_event_list_t waitlist;
668
+ UR_CALL (insertBarrierIntoCmdList (CmdLists[0 ], waitlist, ResultEvent,
669
+ IsInternal));
670
+ }
671
+
672
+ // Execute each command list so the barriers can be encountered.
673
+ for (ur_command_list_ptr_t &CmdList : CmdLists) {
674
+ bool IsCopy =
675
+ CmdList->second .isCopy (reinterpret_cast <ur_queue_handle_t >(Queue));
676
+ const auto &CommandBatch =
677
+ (IsCopy) ? Queue->CopyCommandBatch : Queue->ComputeCommandBatch ;
678
+ // Only batch if the matching CmdList is already open.
679
+ OkToBatch = CommandBatch.OpenCommandList == CmdList;
680
+
681
+ UR_CALL (
682
+ Queue->executeCommandList (CmdList, false /* IsBlocking*/ , OkToBatch));
683
+ }
684
+
685
+ UR_CALL (Queue->ActiveBarriers .clear ());
686
+ Queue->ActiveBarriers .add (ResultEvent);
687
+ if (OutEvent) {
688
+ *OutEvent = ResultEvent;
689
+ }
690
+ return UR_RESULT_SUCCESS;
440
691
}
692
+ /*
693
+ ur_result_t urEnqueueEventsWaitWithBarrierExt(
694
+ ur_queue_handle_t Queue, ///< [in] handle of the queue object
695
+ const ur_exp_enqueue_ext_properties_t
696
+ *EnqueueExtProp, ///< [in][optional] pointer to the extended enqueue
697
+ properties uint32_t NumEventsInWaitList, ///< [in] size of the event wait list
698
+ const ur_event_handle_t
699
+ *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)]
700
+ ///< pointer to a list of events that must be complete
701
+ ///< before this command can be executed. If nullptr,
702
+ ///< the numEventsInWaitList must be 0, indicating that
703
+ ///< all previously enqueued commands must be complete.
704
+ ur_event_handle_t
705
+ *OutEvent ///< [in,out][optional] return an event object that identifies
706
+ ///< this particular command instance.
707
+ ) {
708
+ bool InterruptBased = EnqueueExtProp && (EnqueueExtProp->flags &
709
+ UR_EXP_ENQUEUE_EXT_FLAG_LOW_POWER_EVENTS); ur_event_handle_t ResultEvent =
710
+ nullptr;
711
+
712
+ if (InterruptBased) {
713
+ // Create the event with interrupt-based properties
714
+ ur_command_list_ptr_t CmdList;
715
+ UR_CALL(Queue->Context->getAvailableCommandList(Queue, CmdList, false,
716
+ NumEventsInWaitList, EventWaitList, true, nullptr));
717
+ UR_CALL(createEventAndAssociateQueue(Queue, &ResultEvent,
718
+ UR_COMMAND_EVENTS_WAIT_WITH_BARRIER, CmdList, true, false, std::nullopt,
719
+ InterruptBased));
720
+ }
721
+
722
+ ur_result_t result = ur::level_zero::urEnqueueEventsWaitWithBarrier(
723
+ Queue, NumEventsInWaitList, EventWaitList, OutEvent);
724
+
725
+ if (InterruptBased && OutEvent) {
726
+ *OutEvent = ResultEvent;
727
+ }
728
+ return result;
729
+ }
730
+
731
+ */
441
732
442
733
ur_result_t urEventGetInfo (
443
734
ur_event_handle_t Event, // /< [in] handle of the event object
0 commit comments