oneapi-src
diff --git a/‎test/conformance/CMakeLists.txt
Lines changed: 1 addition & 0 deletions b/‎test/conformance/CMakeLists.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎test/conformance/exp_enqueue_native/CMakeLists.txt
Lines changed: 18 additions & 0 deletions b/‎test/conformance/exp_enqueue_native/CMakeLists.txt
Lines changed: 18 additions & 0 deletions
diff --git a/‎test/conformance/exp_enqueue_native/enqueue_native_cuda.cpp
Lines changed: 87 additions & 0 deletions b/‎test/conformance/exp_enqueue_native/enqueue_native_cuda.cpp
Lines changed: 87 additions & 0 deletions
diff --git a/‎test/conformance/exp_enqueue_native/exp_enqueue_native_cuda_adapter_cuda.match b/‎test/conformance/exp_enqueue_native/exp_enqueue_native_cuda_adapter_cuda.match
diff --git a/‎test/conformance/exp_enqueue_native/urEnqueueNative.cpp
Lines changed: 0 additions & 11 deletions b/‎test/conformance/exp_enqueue_native/urEnqueueNative.cpp
Lines changed: 0 additions & 11 deletions
@@ -142,6 +142,7 @@ if(UR_DPCXX)
     add_subdirectory(enqueue)
     add_subdirectory(integration)
     add_subdirectory(exp_command_buffer)
+    add_subdirectory(exp_enqueue_native)
     add_subdirectory(exp_usm_p2p)
     add_subdirectory(exp_launch_properties)
     add_subdirectory(memory-migrate)
 
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
+# See LICENSE.TXT
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+if (UR_BUILD_ADAPTER_CUDA)
+  add_conformance_test_with_kernels_environment(
+    exp_enqueue_native_cuda
+    enqueue_native_cuda.cpp
+  )
+  target_include_directories(test-exp_enqueue_native_cuda PRIVATE
+      ${PROJECT_SOURCE_DIR}/source
+      ${PROJECT_SOURCE_DIR}/source/adapters/cuda
+  )
+  target_link_libraries(test-exp_enqueue_native_cuda PRIVATE cudadrv)
+endif()
+
+# TODO: Add more tests for different triples
@@ -0,0 +1,87 @@
+// Copyright (C) 2024 Intel Corporation
+// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
+// See LICENSE.TXT
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <cuda.h>
+#include <uur/fixtures.h>
+#include <vector>
+
+using T = uint32_t;
+
+struct urCudaEnqueueNativeCommandTest : uur::urQueueTest {
+    void SetUp() {
+        UUR_RETURN_ON_FATAL_FAILURE(uur::urQueueTest::SetUp());
+
+        host_vec = std::vector<T>(global_size, 0);
+        ASSERT_EQ(host_vec.size(), global_size);
+        ASSERT_SUCCESS(urUSMDeviceAlloc(context, device, nullptr, nullptr,
+                                        allocation_size, &device_ptr));
+        ASSERT_NE(device_ptr, nullptr);
+    }
+    static constexpr T val = 42;
+    static constexpr uint32_t global_size = 1e7;
+    std::vector<T> host_vec;
+    void *device_ptr = nullptr;
+    static constexpr size_t allocation_size = sizeof(val) * global_size;
+};
+
+UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urCudaEnqueueNativeCommandTest);
+
+struct InteropData1 {
+    void *fill_ptr;
+};
+
+// Fill a device ptr with the pattern val
+void interop_func_1(ur_queue_handle_t hQueue, void *data) {
+    CUstream stream;
+    ASSERT_SUCCESS(
+        urQueueGetNativeHandle(hQueue, nullptr, (ur_native_handle_t *)&stream));
+    InteropData1 *func_data = reinterpret_cast<InteropData1 *>(data);
+
+    ASSERT_EQ(cuMemsetD32Async((CUdeviceptr)func_data->fill_ptr,
+                               urCudaEnqueueNativeCommandTest::val,
+                               urCudaEnqueueNativeCommandTest::global_size,
+                               stream),
+              CUDA_SUCCESS);
+}
+
+struct InteropData2 {
+    void *from, *to;
+};
+
+// Read from device ptr to host ptr
+void interop_func_2(ur_queue_handle_t hQueue, void *data) {
+    CUstream stream;
+    ASSERT_SUCCESS(
+        urQueueGetNativeHandle(hQueue, nullptr, (ur_native_handle_t *)&stream));
+    InteropData2 *func_data = reinterpret_cast<InteropData2 *>(data);
+
+    ASSERT_EQ(cuMemcpyDtoHAsync(func_data->to, (CUdeviceptr)func_data->from,
+                                urCudaEnqueueNativeCommandTest::allocation_size,
+                                stream),
+              CUDA_SUCCESS);
+}
+
+TEST_P(urCudaEnqueueNativeCommandTest, Success) {
+    InteropData1 data_1{device_ptr};
+    ur_event_handle_t event_1;
+    ASSERT_SUCCESS(urEnqueueNativeCommandExp(queue, &interop_func_1, &data_1,
+                                             nullptr, 0, nullptr, &event_1));
+}
+
+TEST_P(urCudaEnqueueNativeCommandTest, Dependencies) {
+    ur_event_handle_t event_1, event_2;
+
+    InteropData1 data_1{device_ptr};
+    ASSERT_SUCCESS(urEnqueueNativeCommandExp(queue, &interop_func_1, &data_1,
+                                             nullptr, 0, nullptr, &event_1));
+
+    InteropData2 data_2{device_ptr, host_vec.data()};
+    ASSERT_SUCCESS(urEnqueueNativeCommandExp(queue, &interop_func_2, &data_2,
+                                             nullptr, 1, &event_1, &event_2));
+    urQueueFinish(queue);
+    for (auto &i : host_vec) {
+        ASSERT_EQ(i, val);
+    }
+}