diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc
index f0a1e36e0ef34..441bc6a6eb383 100644
--- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc
+++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc
@@ -765,10 +765,11 @@ _executable_ by the user invoking `command_graph::finalize()` to create a
 new executable instance of the graph. An executable graph cannot be converted
 to a modifiable graph. After finalizing a graph in the modifiable state, it is
 valid for a user to add additional nodes and finalize again to create subsequent
-executable graphs. The state of a `command_graph` object is made explicit by
-templating on state to make the class strongly typed, with the default template
-argument being `graph_state::modifiable` to reduce code verbosity on
-construction.
+executable graphs. When an executable graph is destroyed, the underlying
+resources will be freed only once any enqueued submissions of the graph have
+completed. The state of a `command_graph` object is made explicit by templating
+on state to make the class strongly typed, with the default template argument
+being `graph_state::modifiable` to reduce code verbosity on construction.
 
 .Graph State Diagram
 [source, mermaid]
diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp
index caf50642c3a2c..1f90250f2c8ac 100644
--- a/sycl/source/detail/graph_impl.cpp
+++ b/sycl/source/detail/graph_impl.cpp
@@ -1002,11 +1002,6 @@ exec_graph_impl::~exec_graph_impl() {
     const sycl::detail::AdapterPtr &Adapter =
         sycl::detail::getSyclObjImpl(MContext)->getAdapter();
     MSchedule.clear();
-    // We need to wait on all command buffer executions before we can release
-    // them.
-    for (auto &Event : MExecutionEvents) {
-      Event->wait(Event);
-    }
 
     // Clean up any graph-owned allocations that were allocated
     MGraphImpl->getMemPool().deallocateAndUnmapAll();
diff --git a/sycl/test-e2e/Graph/Explicit/release_while_executing.cpp b/sycl/test-e2e/Graph/Explicit/release_while_executing.cpp
new file mode 100644
index 0000000000000..5751486d76dea
--- /dev/null
+++ b/sycl/test-e2e/Graph/Explicit/release_while_executing.cpp
@@ -0,0 +1,10 @@
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+// Extra run to check for immediate-command-list in Level Zero
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+
+#define GRAPH_E2E_EXPLICIT
+
+#include "../Inputs/release_while_executing.cpp"
diff --git a/sycl/test-e2e/Graph/Inputs/release_while_executing.cpp b/sycl/test-e2e/Graph/Inputs/release_while_executing.cpp
new file mode 100644
index 0000000000000..ac2fdcd28b95c
--- /dev/null
+++ b/sycl/test-e2e/Graph/Inputs/release_while_executing.cpp
@@ -0,0 +1,47 @@
+// Tests destroying finalized command_graph before it is finished executing,
+// relying on the backends to properly synchronize and wait for the submitted
+// work to finish.
+
+#include "../graph_common.hpp"
+
+int main() {
+  queue Queue;
+
+  using T = int;
+
+  std::vector<T> DataA(Size), ReferenceA(Size);
+
+  std::iota(DataA.begin(), DataA.end(), 1);
+  std::iota(ReferenceA.begin(), ReferenceA.end(), 2);
+
+  T *PtrA = malloc_device<T>(Size, Queue);
+
+  // Create the command_graph in a seperate scope so that it's destroyed before
+  // Queue.wait()
+  {
+    exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()};
+
+    Queue.copy(DataA.data(), PtrA, Size);
+    Queue.wait_and_throw();
+
+    auto Node = add_node(Graph, Queue, [&](handler &CGH) {
+      CGH.parallel_for(Size, [=](item<1> Item) { PtrA[Item.get_id()] += 1; });
+    });
+
+    auto GraphExec = Graph.finalize();
+    Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); });
+  }
+
+  Queue.wait_and_throw();
+
+  Queue.copy(PtrA, DataA.data(), Size);
+  Queue.wait_and_throw();
+
+  free(PtrA, Queue);
+
+  for (size_t i = 0; i < Size; i++) {
+    assert(check_value(i, ReferenceA[i], DataA[i], "DataA"));
+  }
+
+  return 0;
+}
diff --git a/sycl/test-e2e/Graph/RecordReplay/release_while_executing.cpp b/sycl/test-e2e/Graph/RecordReplay/release_while_executing.cpp
new file mode 100644
index 0000000000000..cca6b7085a697
--- /dev/null
+++ b/sycl/test-e2e/Graph/RecordReplay/release_while_executing.cpp
@@ -0,0 +1,10 @@
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+// Extra run to check for immediate-command-list in Level Zero
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+
+#define GRAPH_E2E_RECORD_REPLAY
+
+#include "../Inputs/release_while_executing.cpp"
diff --git a/unified-runtime/include/ur_api.h b/unified-runtime/include/ur_api.h
index 9c967b411131d..484ca4ef94768 100644
--- a/unified-runtime/include/ur_api.h
+++ b/unified-runtime/include/ur_api.h
@@ -11078,7 +11078,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferRetainExp(
 
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Decrement the command-buffer object's reference count and delete the
-///        command-buffer object if the reference count becomes zero.
+///        command-buffer object if the reference count becomes zero. It is
+///        legal to call the entry-point while `hCommandBuffer` is still
+///        executing, which will block on completion if the reference count of
+///        `hCommandBuffer` becomes zero.
 ///
 /// @returns
 ///     - ::UR_RESULT_SUCCESS
diff --git a/unified-runtime/scripts/core/EXP-COMMAND-BUFFER.rst b/unified-runtime/scripts/core/EXP-COMMAND-BUFFER.rst
index 136c7024632ec..2b443c13f6a3d 100644
--- a/unified-runtime/scripts/core/EXP-COMMAND-BUFFER.rst
+++ b/unified-runtime/scripts/core/EXP-COMMAND-BUFFER.rst
@@ -563,3 +563,4 @@ Contributors
 * Maxime France-Pillois `maxime.francepillois@codeplay.com <maxime.francepillois@codeplay.com>`_
 * Aaron Greig `aaron.greig@codeplay.com <aaron.greig@codeplay.com>`_
 * Fábio Mestre `fabio.mestre@codeplay.com <fabio.mestre@codeplay.com>`_
+* Konrad Kusiak `konrad.kusiak@codeplay.com <konrad.kusiak@codeplay.com>`_
diff --git a/unified-runtime/scripts/core/exp-command-buffer.yml b/unified-runtime/scripts/core/exp-command-buffer.yml
index 05e61985cd4b8..e8f2caa15d59d 100644
--- a/unified-runtime/scripts/core/exp-command-buffer.yml
+++ b/unified-runtime/scripts/core/exp-command-buffer.yml
@@ -310,7 +310,7 @@ returns:
     - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY
 --- #--------------------------------------------------------------------------
 type: function
-desc: "Decrement the command-buffer object's reference count and delete the command-buffer object if the reference count becomes zero."
+desc: "Decrement the command-buffer object's reference count and delete the command-buffer object if the reference count becomes zero. It is legal to call the entry-point while `hCommandBuffer` is still executing, which will block on completion if the reference count of `hCommandBuffer` becomes zero."
 class: $xCommandBuffer
 name: ReleaseExp
 params:
diff --git a/unified-runtime/source/adapters/level_zero/command_buffer.cpp b/unified-runtime/source/adapters/level_zero/command_buffer.cpp
index 020afb90564ff..6101e3b6d4ea0 100644
--- a/unified-runtime/source/adapters/level_zero/command_buffer.cpp
+++ b/unified-runtime/source/adapters/level_zero/command_buffer.cpp
@@ -671,6 +671,25 @@ ur_result_t createMainCommandList(ur_context_handle_t Context,
   return UR_RESULT_SUCCESS;
 }
 
+/**
+ * Waits for any ongoing executions of the command-buffer to finish
+ * @param CommandBuffer The command-buffer to wait for.
+ * @return UR_RESULT_SUCCESS or an error code on failure
+ */
+ur_result_t
+waitForOngoingExecution(ur_exp_command_buffer_handle_t CommandBuffer) {
+
+  if (ur_event_handle_t &CurrentSubmissionEvent =
+          CommandBuffer->CurrentSubmissionEvent) {
+    ZE2UR_CALL(zeEventHostSynchronize,
+               (CurrentSubmissionEvent->ZeEvent, UINT64_MAX));
+    UR_CALL(urEventReleaseInternal(CurrentSubmissionEvent));
+    CurrentSubmissionEvent = nullptr;
+  }
+
+  return UR_RESULT_SUCCESS;
+}
+
 /**
  * Checks whether the command-buffer can be constructed using in order
  * command-lists.
@@ -832,6 +851,7 @@ urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t CommandBuffer) {
   if (!CommandBuffer->RefCount.decrementAndTest())
     return UR_RESULT_SUCCESS;
 
+  UR_CALL(waitForOngoingExecution(CommandBuffer));
   CommandBuffer->cleanupCommandBufferResources();
   delete CommandBuffer;
   return UR_RESULT_SUCCESS;
@@ -1453,25 +1473,6 @@ ur_result_t getZeCommandQueue(ur_queue_handle_t Queue, bool UseCopyEngine,
   return UR_RESULT_SUCCESS;
 }
 
-/**
- * Waits for any ongoing executions of the command-buffer to finish.
- * @param CommandBuffer The command-buffer to wait for.
- * @return UR_RESULT_SUCCESS or an error code on failure
- */
-ur_result_t
-waitForOngoingExecution(ur_exp_command_buffer_handle_t CommandBuffer) {
-
-  if (ur_event_handle_t &CurrentSubmissionEvent =
-          CommandBuffer->CurrentSubmissionEvent) {
-    ZE2UR_CALL(zeEventHostSynchronize,
-               (CurrentSubmissionEvent->ZeEvent, UINT64_MAX));
-    UR_CALL(urEventReleaseInternal(CurrentSubmissionEvent));
-    CurrentSubmissionEvent = nullptr;
-  }
-
-  return UR_RESULT_SUCCESS;
-}
-
 /**
  * Waits for the all the dependencies of the command-buffer
  * @param[in] CommandBuffer The command-buffer.
diff --git a/unified-runtime/source/adapters/level_zero/v2/command_buffer.cpp b/unified-runtime/source/adapters/level_zero/v2/command_buffer.cpp
index 4d0b1ef5dc752..e5713074a096a 100644
--- a/unified-runtime/source/adapters/level_zero/v2/command_buffer.cpp
+++ b/unified-runtime/source/adapters/level_zero/v2/command_buffer.cpp
@@ -263,6 +263,10 @@ urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) try {
   if (!hCommandBuffer->RefCount.decrementAndTest())
     return UR_RESULT_SUCCESS;
 
+  if (auto executionEvent = hCommandBuffer->getExecutionEventUnlocked()) {
+    ZE2UR_CALL(zeEventHostSynchronize,
+               (executionEvent->getZeEvent(), UINT64_MAX));
+  }
   delete hCommandBuffer;
   return UR_RESULT_SUCCESS;
 } catch (...) {
diff --git a/unified-runtime/source/loader/ur_libapi.cpp b/unified-runtime/source/loader/ur_libapi.cpp
index ed14b29cd8393..f202f98eaaa21 100644
--- a/unified-runtime/source/loader/ur_libapi.cpp
+++ b/unified-runtime/source/loader/ur_libapi.cpp
@@ -8503,7 +8503,10 @@ ur_result_t UR_APICALL urCommandBufferRetainExp(
 
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Decrement the command-buffer object's reference count and delete the
-///        command-buffer object if the reference count becomes zero.
+///        command-buffer object if the reference count becomes zero. It is
+///        legal to call the entry-point while `hCommandBuffer` is still
+///        executing, which will block on completion if the reference count of
+///        `hCommandBuffer` becomes zero.
 ///
 /// @returns
 ///     - ::UR_RESULT_SUCCESS
diff --git a/unified-runtime/source/ur_api.cpp b/unified-runtime/source/ur_api.cpp
index 549bd97e87f6b..fc2889cee9a94 100644
--- a/unified-runtime/source/ur_api.cpp
+++ b/unified-runtime/source/ur_api.cpp
@@ -7406,7 +7406,10 @@ ur_result_t UR_APICALL urCommandBufferRetainExp(
 
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Decrement the command-buffer object's reference count and delete the
-///        command-buffer object if the reference count becomes zero.
+///        command-buffer object if the reference count becomes zero. It is
+///        legal to call the entry-point while `hCommandBuffer` is still
+///        executing, which will block on completion if the reference count of
+///        `hCommandBuffer` becomes zero.
 ///
 /// @returns
 ///     - ::UR_RESULT_SUCCESS
diff --git a/unified-runtime/test/conformance/exp_command_buffer/enqueue.cpp b/unified-runtime/test/conformance/exp_command_buffer/enqueue.cpp
index 4f2a10593c19d..9fab249bf831c 100644
--- a/unified-runtime/test/conformance/exp_command_buffer/enqueue.cpp
+++ b/unified-runtime/test/conformance/exp_command_buffer/enqueue.cpp
@@ -190,3 +190,14 @@ TEST_P(urEnqueueCommandBufferExpTest, SerializeInOrOutOfOrderQueue) {
     ASSERT_EQ(reference, Output[i]);
   }
 }
+
+// Tests releasing command-buffer while it is still executing relying
+// on synchronization during urCommandBufferReleaseExp call.
+TEST_P(urEnqueueCommandBufferExpTest, EnqueueAndRelease) {
+  ASSERT_SUCCESS(urEnqueueCommandBufferExp(
+      in_or_out_of_order_queue, cmd_buf_handle, 0, nullptr, nullptr));
+
+  // Release the command buffer without explicitly waiting beforehand
+  EXPECT_SUCCESS(urCommandBufferReleaseExp(cmd_buf_handle));
+  cmd_buf_handle = nullptr;
+}