[SYCL][E2E] Run InlineAsm/letter_example and InlineAsm/malloc_shared_32 on only pre intel_gpu_bmg_g21 devices (#17175)

dyniols · web-flow · commit 1d5076f53f45 · 2025-02-28T07:29:26.000+01:00
Purpose of this PR is to limit the execution of
`InlineAsm/letter_example` and `InlineAsm/malloc_shared_32` tests to pre
`intel_gpu_bmg_g21` devices.
diff --git a/sycl/test-e2e/InlineAsm/letter_example.cpp b/sycl/test-e2e/InlineAsm/letter_example.cpp
@@ -1,66 +1,76 @@
 // REQUIRES: sg-16,aspect-usm_shared_allocations
-// UNSUPPORTED: arch-intel_gpu_bmg_g21
-// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/16921
 // RUN: %{build} -o %t.out
 // RUN: %{run} %t.out
 
 #include "include/asmhelper.h"
 #include <iostream>
 #include <sycl/usm.hpp>
 
+namespace syclex = sycl::ext::oneapi::experimental;
+
 constexpr size_t problem_size = 16;
 
 class kernel_name;
 
 int main() {
   sycl::queue q;
   sycl::device Device = q.get_device();
+  int Failed = 0;
 
   if (!isInlineASMSupported(Device)) {
     std::cout << "Skipping test\n";
     return 0;
   }
+
+  syclex::architecture CurrentDeviceArch =
+      Device.get_info<syclex::info::device::architecture>();
+  // This check is carried out because the test is not supported on BMG and
+  // subsequent devices.
+  if (CurrentDeviceArch >= syclex::architecture::intel_gpu_bmg_g21) {
+    std::cout << "This test is not supported on BMG and later. Skipping..."
+              << std::endl;
+    return 0;
+  }
+
   auto ctx = q.get_context();
-  int *a =
-      (int *)malloc_shared(sizeof(int) * problem_size, q.get_device(), ctx);
+  int *a = (int *)malloc_shared(sizeof(int) * problem_size, Device, ctx);
+
   for (int i = 0; i < problem_size; i++) {
     a[i] = i;
   }
-  q.submit([&](sycl::handler &cgh) {
-     cgh.parallel_for<kernel_name>(
-         sycl::range<1>(problem_size),
-         [=](sycl::id<1> idx) [[sycl::reqd_sub_group_size(16)]] {
+
+  q.parallel_for<kernel_name>(
+       sycl::range<1>(problem_size),
+       [=](sycl::id<1> idx) [[sycl::reqd_sub_group_size(16)]] {
+         // The use of if_architecture_is_ge is a precaution in case the test is
+         // compiled with the -fsycl-targets flag.
+         syclex::if_architecture_is_ge<syclex::architecture::intel_gpu_bmg_g21>(
+             []() {})
+             .otherwise([&]() {
 #if defined(__SYCL_DEVICE_ONLY__)
-           int i = idx[0];
-           asm volatile("{\n.decl V52 v_type=G type=d num_elts=16 align=GRF\n"
-                        "svm_gather.4.1 (M1, 16) %0.0 V52.0\n"
-                        "add(M1, 16) V52(0, 0)<1> V52(0, 0)<1; 1, 0> 0x1:w\n"
-                        "svm_scatter.4.1 (M1, 16) %0.0 V52.0\n}"
-                        :
-                        : "rw"(&a[i]));
+               int i = idx[0];
+               asm volatile(
+                   "{\n.decl V52 v_type=G type=d num_elts=16 align=GRF\n"
+                   "svm_gather.4.1 (M1, 16) %0.0 V52.0\n"
+                   "add(M1, 16) V52(0, 0)<1> V52(0, 0)<1; 1, 0> 0x1:w\n"
+                   "svm_scatter.4.1 (M1, 16) %0.0 V52.0\n}"
+                   :
+                   : "rw"(&a[i]));
 #else
-           a[idx[0]]++;
+               a[idx[0]]++;
 #endif
-         });
-   }).wait();
+             });
+       })
+      .wait();
 
-  bool currect = true;
   for (int i = 0; i < problem_size; i++) {
     if (a[i] != (i + 1)) {
-      currect = false;
       std::cerr << "error in a[" << i << "]=" << a[i] << "!=" << (i + 1)
                 << std::endl;
-      break;
+      ++Failed;
     }
   }
 
-  if (!currect) {
-    std::cerr << "Error" << std::endl;
-    sycl::free(a, ctx);
-    return 1;
-  }
-
-  std::cerr << "Pass" << std::endl;
   sycl::free(a, ctx);
-  return 0;
+  return Failed;
 }
diff --git a/sycl/test-e2e/InlineAsm/malloc_shared_32.cpp b/sycl/test-e2e/InlineAsm/malloc_shared_32.cpp
@@ -1,6 +1,4 @@
 // REQUIRES: sg-32,aspect-usm_shared_allocations
-// UNSUPPORTED: arch-intel_gpu_bmg_g21
-// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/16921
 // RUN: %{build} -o %t.out
 // RUN: %{run} %t.out
 
@@ -12,16 +10,28 @@ constexpr size_t problem_size = 32;
 
 class kernel_name;
 
+namespace syclex = sycl::ext::oneapi::experimental;
+
 int main() {
   sycl::queue q;
-
   sycl::device Device = q.get_device();
+  int Failed = 0;
 
   if (!isInlineASMSupported(Device)) {
     std::cout << "Skipping test\n";
     return 0;
   }
 
+  syclex::architecture CurrentDeviceArch =
+      Device.get_info<syclex::info::device::architecture>();
+  // This check is carried out because the test is not supported on BMG and
+  // subsequent devices.
+  if (CurrentDeviceArch >= syclex::architecture::intel_gpu_bmg_g21) {
+    std::cout << "This test is not supported on BMG and later. Skipping..."
+              << std::endl;
+    return 0;
+  }
+
   auto ctx = q.get_context();
   int *a =
       (int *)malloc_shared(sizeof(int) * problem_size, q.get_device(), ctx);
@@ -35,13 +45,17 @@ int main() {
     c[i] = i;
   }
 
-  q.submit([&](sycl::handler &cgh) {
-     cgh.parallel_for<kernel_name>(
-         sycl::range<1>(problem_size),
-         [=](sycl::id<1> idx) [[sycl::reqd_sub_group_size(32)]] {
-           int i = idx[0];
+  q.parallel_for<kernel_name>(
+       sycl::range<1>(problem_size),
+       [=](sycl::id<1> idx) [[sycl::reqd_sub_group_size(32)]] {
+         int i = idx[0];
+         // The use of if_architecture_is_ge is a precaution in case the test is
+         // compiled with the -fsycl-targets flag.
+         syclex::if_architecture_is_ge<syclex::architecture::intel_gpu_bmg_g21>(
+             []() {})
+             .otherwise([&]() {
 #if defined(__SYCL_DEVICE_ONLY__)
-           asm volatile(R"a(
+               asm volatile(R"a(
     {
         .decl V52 v_type=G type=d num_elts=16 align=GRF
         .decl V53 v_type=G type=d num_elts=16 align=GRF
@@ -59,35 +73,26 @@ int main() {
         svm_scatter.4.1 (M1, 16) %1.0 V53.0
     }
     )a" ::"rw"(&b[i]),
-                        "rw"(&b[i] + 16), "rw"(&a[i]), "rw"(&a[i] + 16),
-                        "rw"(&c[i]), "rw"(&c[i] + 16));
+                            "rw"(&b[i] + 16), "rw"(&a[i]), "rw"(&a[i] + 16),
+                            "rw"(&c[i]), "rw"(&c[i] + 16));
 #else
-           b[i] = a[i] * c[i];
+               b[i] = a[i] * c[i];
 #endif
-         });
-   }).wait();
+             });
+       })
+      .wait();
 
-  bool currect = true;
   for (int i = 0; i < problem_size; i++) {
     if (b[i] != a[i] * c[i]) {
-      currect = false;
       std::cerr << "error in a[" << i << "]=" << b[i] << "!=" << a[i] * c[i]
                 << std::endl;
-      break;
+      ++Failed;
     }
   }
 
-  if (!currect) {
-    std::cerr << "Error" << std::endl;
-    sycl::free(a, ctx);
-    sycl::free(b, ctx);
-    sycl::free(c, ctx);
-    return 1;
-  }
-
-  std::cerr << "Pass" << std::endl;
   sycl::free(a, ctx);
   sycl::free(b, ctx);
   sycl::free(c, ctx);
-  return 0;
+
+  return Failed;
 }