Skip to content

Commit dba8684

Browse files
authored
[SYCLomatic] Turn on sycl extension feature "free function queries" by default. (#2794)
Deprecate option value: use-experimental-features=free-function-queries Add option value: --no-dpcpp-extensions=free-function-queries Signed-off-by: Chen, Sheng S <sheng.s.chen@intel.com>
1 parent b6ca5ce commit dba8684

File tree

129 files changed

+1318
-1347
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

129 files changed

+1318
-1347
lines changed

clang/include/clang/DPCT/DPCTOptions.inc

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -657,6 +657,13 @@ DPCT_ENUM_OPTION(
657657
"enqueued_barriers",
658658
int(DPCPPExtensionsDefaultEnabled::ExtDE_EnqueueBarrier),
659659
"Disable the enqueued barriers extension.", false),
660+
DPCT_OPTION_ENUM_VALUE(
661+
"free-function-queries",
662+
int(DPCPPExtensionsDefaultEnabled::ExtDE_FreeQueries),
663+
"Disable the free function query experimental extension that "
664+
"allows getting 'id', 'item', 'nd_item', 'group', and 'sub_group' "
665+
"instances globally.",
666+
false),
660667
DPCT_OPTION_ENUM_VALUE(
661668
"peer_access", int(DPCPPExtensionsDefaultEnabled::ExtDE_PeerAccess),
662669
"Disable the peer access extension.", false),
@@ -723,9 +730,10 @@ DPCT_ENUM_OPTION(
723730
DPCT_OPTION_VALUES(
724731
DPCT_OPTION_ENUM_VALUE(
725732
"free-function-queries", int(ExperimentalFeatures::Exp_FreeQueries),
726-
"Experimental extension that allows getting 'id', "
733+
"DEPRECATED : Experimental extension that allows getting 'id', "
727734
"'item', 'nd_item', 'group', and\n"
728-
"'sub_group' instances globally.",
735+
"'sub_group' instances globally. Deprecate this option as the "
736+
"feature is turned on by default.",
729737
false),
730738
DPCT_OPTION_ENUM_VALUE(
731739
"local-memory-kernel-scope-allocation",

clang/lib/DPCT/AnalysisInfo.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1296,7 +1296,9 @@ class DpctGlobalInfo {
12961296
return getUsingExperimental<ExperimentalFeatures::Exp_RootGroup>();
12971297
}
12981298
static bool useFreeQueries() {
1299-
return getUsingExperimental<ExperimentalFeatures::Exp_FreeQueries>();
1299+
return getUsingExperimental<ExperimentalFeatures::Exp_FreeQueries>() ||
1300+
getUsingExtensionDE(
1301+
DPCPPExtensionsDefaultEnabled::ExtDE_FreeQueries);
13001302
}
13011303
static bool useGroupLocalMemory() {
13021304
return getUsingExperimental<ExperimentalFeatures::Exp_GroupSharedMemory>();

clang/lib/DPCT/CommandOption/ValidateArguments.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ enum class DPCPPExtensionsDefaultEnabled : unsigned int {
6868
ExtDE_DeviceInfo,
6969
ExtDE_BFloat16,
7070
ExtDE_PeerAccess,
71+
ExtDE_FreeQueries,
7172
ExtDE_Assert,
7273
ExtDE_QueueEmpty,
7374
ExtDE_DPCPPExtensionsDefaultEnabledEnumSize,

clang/test/dpct/a_vcxproj_test/a_kernel.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
// UNSUPPORTED: system-linux
22
// RUN: cat %S/DemoCudaProj.vcxproj > %T/DemoCudaProj.vcxproj
33
// RUN: cd %T
4-
// RUN: dpct --format-range=none --vcxprojfile=%T/DemoCudaProj.vcxproj -in-root=%S -out-root=%T %s --cuda-include-path="%cuda-path/include"
4+
// RUN: dpct --no-dpcpp-extensions=free-function-queries --format-range=none --vcxprojfile=%T/DemoCudaProj.vcxproj -in-root=%S -out-root=%T %s --cuda-include-path="%cuda-path/include"
55

66
// RUN: cat %S/check_compilation_ref.txt >%T/check_compilation_db.txt
77
// RUN: cat %T/compile_commands.json >> %T/check_compilation_db.txt
88

99
// RUN: FileCheck --match-full-lines --input-file %T/check_compilation_db.txt %T/check_compilation_db.txt
1010

11-
// RUN: dpct --format-range=none -p=%S -in-root=%S -out-root=%T/2 --process-all --cuda-include-path="%cuda-path/include"
11+
// RUN: dpct --no-dpcpp-extensions=free-function-queries --format-range=none -p=%S -in-root=%S -out-root=%T/2 --process-all --cuda-include-path="%cuda-path/include"
1212

1313
// RUN: FileCheck --input-file %T/2/a_kernel.dp.cpp --match-full-lines %S/a_kernel.cu
1414
// RUN: %if build_lit %{icpx -c -fsycl %T/2/a_kernel.dp.cpp -o %T/2/a_kernel.dp.o %}

clang/test/dpct/anonymous_shared_var_macro.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@
1616
// CHECK: float a1d[7]; \
1717
// CHECK: }; \
1818
// CHECK: type_ct2 *atoms = (type_ct2 *)atoms_ct1; \
19-
// CHECK: if (item_ct1.get_local_id(2) < 7) { \
20-
// CHECK: bspline_coeffs.a1d[item_ct1.get_local_id(2)] = 0; \
19+
// CHECK: if (sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_local_id(2) < 7) { \
20+
// CHECK: bspline_coeffs.a1d[sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_local_id(2)] = 0; \
2121
// CHECK: };
2222

2323
#define BSPLINE_DEFS \

clang/test/dpct/asm/cvta.cu

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// UNSUPPORTED: cuda-8.0, cuda-9.0, cuda-9.1, cuda-9.2, cuda-10.0, cuda-10.1, cuda-10.2
22
// UNSUPPORTED: v8.0, v9.0, v9.1, v9.2, v10.0, v10.1, v10.2
3-
// RUN: dpct --format-range=none -out-root %T/cvta %s --cuda-include-path="%cuda-path/include" -- -std=c++14 -x cuda --cuda-host-only
3+
// RUN: dpct --format-range=none -out-root %T/cvta %s --cuda-include-path="%cuda-path/include" -- -std=c++14 -x cuda --cuda-host-only
44
// RUN: FileCheck %s --match-full-lines --input-file %T/cvta/cvta.dp.cpp
55
// RUN: %if build_lit %{icpx -c -fsycl %T/cvta/cvta.dp.cpp -o %T/cvta/cvta.dp.o %}
66

@@ -9,13 +9,12 @@
99
#include <cuda_runtime.h>
1010

1111

12-
// CHECK: void test_cvta_to_shared_u64(uint64_t* output, const sycl::nd_item<3> &item_ct1,
13-
// CHECK-NEXT: int *shared_data) {
12+
// CHECK: void test_cvta_to_shared_u64(uint64_t* output, int *shared_data) {
1413
// CHECK-NEXT: // Shared memory
1514
// CHECK-NEXT: shared_data[0] = 0;
1615
// CHECK-NEXT: uint64_t shared_addr = 0;
1716
// CHECK-NEXT: shared_addr = (uint64_t)(&shared_data[0]);
18-
// CHECK-NEXT: output[item_ct1.get_local_id(2)] = shared_addr;
17+
// CHECK-NEXT: output[sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_local_id(2)] = shared_addr;
1918
// CHECK-NEXT:}
2019
__global__ void test_cvta_to_shared_u64(uint64_t* output) {
2120
__shared__ int shared_data[1]; // Shared memory
@@ -27,8 +26,8 @@ __global__ void test_cvta_to_shared_u64(uint64_t* output) {
2726

2827

2928
#define N 128
30-
// CHECK: void testKernel(unsigned int *addr_out, const sycl::nd_item<3> &item_ct1,
31-
// CHECK-NEXT: int *B_shared) {
29+
// CHECK: void testKernel(unsigned int *addr_out, int *B_shared) {
30+
// CHECK-NEXT: auto item_ct1 = sycl::ext::oneapi::this_work_item::get_nd_item<3>();
3231
// CHECK-NEXT: // Shared memory
3332
// CHECK-NEXT: unsigned int addr1;
3433
// CHECK-NEXT: int k_0_1 = item_ct1.get_group(2);
@@ -55,8 +54,8 @@ __global__ void testKernel(unsigned int *addr_out) {
5554
}
5655

5756

58-
// CHECK: void read_shared_value(int *output, const sycl::nd_item<3> &item_ct1,
59-
// CHECK-NEXT: int *shared_data) {
57+
// CHECK: void read_shared_value(int *output, int *shared_data) {
58+
// CHECK-NEXT: auto item_ct1 = sycl::ext::oneapi::this_work_item::get_nd_item<3>();
6059
// CHECK-NEXT: // Shared memory allocation
6160
// CHECK-NEXT: if (item_ct1.get_local_id(2) == 0) {
6261
// CHECK-NEXT: shared_data[0] = 42;

clang/test/dpct/asm/red.cu

Lines changed: 24 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -8,107 +8,96 @@
88
#include <cstdint>
99
#include <cuda_runtime.h>
1010

11-
// CHECK: void atomicAddKernel(int* lock, int val, const sycl::nd_item<3> &item_ct1) {
12-
// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::plus<>());
11+
// CHECK: void atomicAddKernel(int* lock, int val) {
12+
// CHECK-NEXT: *lock = sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(), val,sycl::plus<>());
1313
// CHECK-NEXT:}
1414
__global__ void atomicAddKernel(int* lock, int val) {
1515
asm volatile("red.relaxed.gpu.global.add.s32 [%0], %1;\n"
1616
::"l"(lock),"r"(val):"memory");
1717
}
1818

19-
// CHECK: void atomicOrKernel(uint32_t* lock, uint32_t val,
20-
// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) {
21-
// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::bit_or<>());
19+
// CHECK: void atomicOrKernel(uint32_t* lock, uint32_t val) {
20+
// CHECK-NEXT: *lock = sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(), val,sycl::bit_or<>());
2221
// CHECK-NEXT:}
2322
__global__ void atomicOrKernel(uint32_t* lock, uint32_t val) {
2423
asm volatile("red.relaxed.gpu.global.or.b32 [%0], %1;\n"
2524
::"l"(lock),"r"(val):"memory");
2625
}
2726

28-
// CHECK: void atomicXorKernel(uint32_t* lock, uint32_t val,
29-
// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) {
30-
// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::bit_xor<>());
27+
// CHECK: void atomicXorKernel(uint32_t* lock, uint32_t val) {
28+
// CHECK-NEXT: *lock = sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(), val,sycl::bit_xor<>());
3129
// CHECK-NEXT:}
3230
__global__ void atomicXorKernel(uint32_t* lock, uint32_t val) {
3331
asm volatile("red.relaxed.gpu.global.xor.b32 [%0], %1;\n"
3432
::"l"(lock),"r"(val):"memory");
3533
}
3634

37-
// CHECK: void atomicAndKernel(uint32_t* lock, uint32_t val,
38-
// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) {
39-
// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::bit_and<>());
35+
// CHECK: void atomicAndKernel(uint32_t* lock, uint32_t val) {
36+
// CHECK-NEXT: *lock = sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(), val,sycl::bit_and<>());
4037
// CHECK-NEXT: }
4138
__global__ void atomicAndKernel(uint32_t* lock, uint32_t val) {
4239
asm volatile("red.relaxed.gpu.global.and.b32 [%0], %1;\n"
4340
::"l"(lock),"r"(val):"memory");
4441
}
4542

46-
// CHECK: void atomicMaxKernel(uint32_t* lock, uint32_t val,
47-
// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) {
48-
// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::maximum<>());
43+
// CHECK: void atomicMaxKernel(uint32_t* lock, uint32_t val) {
44+
// CHECK-NEXT: *lock = sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(), val,sycl::maximum<>());
4945
// CHECK-NEXT: }
5046
__global__ void atomicMaxKernel(uint32_t* lock, uint32_t val) {
5147
asm volatile("red.relaxed.gpu.global.max.u32 [%0], %1;\n"
5248
::"l"(lock),"r"(val):"memory");
5349
}
5450

55-
// CHECK: void atomicMinKernel(uint32_t* lock, uint32_t val,
56-
// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) {
57-
// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::minimum<>());
51+
// CHECK: void atomicMinKernel(uint32_t* lock, uint32_t val) {
52+
// CHECK-NEXT: *lock = sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(), val,sycl::minimum<>());
5853
// CHECK-NEXT: }
5954
__global__ void atomicMinKernel(uint32_t* lock, uint32_t val) {
6055
asm volatile("red.relaxed.gpu.global.min.u32 [%0], %1;\n"
6156
::"l"(lock),"r"(val):"memory");
6257
}
6358

64-
// CHECK: void atomicAddKernelRelease(int* lock, int val,
65-
// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) {
66-
// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::plus<>());
59+
// CHECK: void atomicAddKernelRelease(int* lock, int val) {
60+
// CHECK-NEXT: *lock = sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(), val,sycl::plus<>());
6761
// CHECK-NEXT:}
6862
__global__ void atomicAddKernelRelease(int* lock, int val) {
6963
asm volatile("red.release.gpu.global.add.s32 [%0], %1;\n"
7064
::"l"(lock),"r"(val):"memory");
7165
}
7266

73-
// CHECK: void atomicOrKernelRelease(uint32_t* lock, uint32_t val,
74-
// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) {
75-
// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::bit_or<>());
67+
// CHECK: void atomicOrKernelRelease(uint32_t* lock, uint32_t val) {
68+
// CHECK-NEXT: *lock = sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(), val,sycl::bit_or<>());
7669
// CHECK-NEXT:}
7770
__global__ void atomicOrKernelRelease(uint32_t* lock, uint32_t val) {
7871
asm volatile("red.release.gpu.global.or.b32 [%0], %1;\n"
7972
::"l"(lock),"r"(val):"memory");
8073
}
8174

82-
// CHECK: void atomicXorKernelRelease(uint32_t* lock, uint32_t val,
83-
// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) {
84-
// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::bit_xor<>());
75+
// CHECK: void atomicXorKernelRelease(uint32_t* lock, uint32_t val) {
76+
// CHECK-NEXT: *lock = sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(), val,sycl::bit_xor<>());
8577
// CHECK-NEXT:}
8678
__global__ void atomicXorKernelRelease(uint32_t* lock, uint32_t val) {
8779
asm volatile("red.release.gpu.global.xor.b32 [%0], %1;\n"
8880
::"l"(lock),"r"(val):"memory");
8981
}
9082

91-
// CHECK: void atomicAndKernelRelease(uint32_t* lock, uint32_t val,
92-
// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) {
93-
// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::bit_and<>());
83+
// CHECK: void atomicAndKernelRelease(uint32_t* lock, uint32_t val) {
84+
// CHECK-NEXT: *lock = sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(), val,sycl::bit_and<>());
9485
// CHECK-NEXT: }
9586
__global__ void atomicAndKernelRelease(uint32_t* lock, uint32_t val) {
9687
asm volatile("red.release.gpu.global.and.b32 [%0], %1;\n"
9788
::"l"(lock),"r"(val):"memory");
9889
}
9990

100-
// CHECK: void atomicMaxKernelRelease(uint32_t* lock, uint32_t val,
101-
// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) {
102-
// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::maximum<>());
91+
// CHECK: void atomicMaxKernelRelease(uint32_t* lock, uint32_t val) {
92+
// CHECK-NEXT: *lock = sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(), val,sycl::maximum<>());
10393
// CHECK-NEXT: }
10494
__global__ void atomicMaxKernelRelease(uint32_t* lock, uint32_t val) {
10595
asm volatile("red.release.gpu.global.max.u32 [%0], %1;\n"
10696
::"l"(lock),"r"(val):"memory");
10797
}
10898

109-
// CHECK: void atomicMinKernelRelease(uint32_t* lock, uint32_t val,
110-
// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) {
111-
// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::minimum<>());
99+
// CHECK: void atomicMinKernelRelease(uint32_t* lock, uint32_t val) {
100+
// CHECK-NEXT: *lock = sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(), val,sycl::minimum<>());
112101
// CHECK-NEXT: }
113102
__global__ void atomicMinKernelRelease(uint32_t* lock, uint32_t val) {
114103
asm volatile("red.release.gpu.global.min.u32 [%0], %1;\n"

clang/test/dpct/atomic_functions.cu

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
template <typename T>
1313
__global__ void test(T *data) {
14-
// CHECK: T tid = item_ct1.get_local_id(2);
14+
// CHECK: T tid = sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_local_id(2);
1515
T tid = threadIdx.x;
1616

1717
// CHECK: dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(&data[0], tid);
@@ -108,7 +108,7 @@ void InvokeKernel() {
108108
// CHECK-NEXT: cgh.parallel_for<dpct_kernel_name<class test_{{[a-f0-9]+}}, T>>(
109109
// CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, k_threads_per_block), sycl::range<3>(1, 1, k_threads_per_block)),
110110
// CHECK-NEXT: [=](sycl::nd_item<3> item_ct1) {
111-
// CHECK-NEXT: test<T>(dev_ptr_acc_ct0.get_raw_pointer(), item_ct1);
111+
// CHECK-NEXT: test<T>(dev_ptr_acc_ct0.get_raw_pointer());
112112
// CHECK-NEXT: });
113113
// CHECK-NEXT: });
114114
test<T><<<1, k_threads_per_block>>>(dev_ptr);
@@ -139,8 +139,8 @@ int main() {
139139
InvokeKernel<double>();
140140
}
141141

142-
// CHECK: void foo(const sycl::nd_item<3> &item_ct1, uint8_t *dpct_local,
143-
// CHECK-NEXT: uint32_t &share_v) {
142+
// CHECK: void foo(uint8_t *dpct_local, uint32_t &share_v) {
143+
// CHECK-NEXT: auto item_ct1 = sycl::ext::oneapi::this_work_item::get_nd_item<3>();
144144
// CHECK-NEXT: auto share_array = (uint32_t *)dpct_local;
145145
// CHECK-NEXT: for (int b = item_ct1.get_local_id(2); b < 64; b += item_ct1.get_local_range(2)) {
146146
// CHECK-NEXT: dpct::atomic_fetch_add<sycl::access::address_space::generic_space>(&share_array[b], 1);
@@ -159,8 +159,8 @@ __shared__ uint32_t share_v;
159159
atomicAdd(&share_v, 1);
160160
}
161161

162-
// CHECK: void foo_2(const sycl::nd_item<3> &item_ct1, uint8_t *dpct_local,
163-
// CHECK-NEXT: uint32_t &share_v) {
162+
// CHECK: void foo_2(uint8_t *dpct_local, uint32_t &share_v) {
163+
// CHECK-NEXT: auto item_ct1 = sycl::ext::oneapi::this_work_item::get_nd_item<3>();
164164
// CHECK-NEXT: auto share_array = (uint32_t *)dpct_local;
165165
// CHECK-NEXT: for (int b = item_ct1.get_local_id(2); b < 64; b += item_ct1.get_local_range(2)) {
166166
// CHECK-NEXT: uint32_t *p_1 = &share_array[b];
@@ -454,8 +454,8 @@ __global__ void k() {
454454
atomicAdd(&f, f);
455455
}
456456

457-
// CHECK: void mykernel(unsigned int *dev, const sycl::nd_item<3> &item_ct1,
458-
// CHECK-NEXT: uint8_t *dpct_local) {
457+
// CHECK: void mykernel(unsigned int *dev, uint8_t *dpct_local) {
458+
// CHECK-NEXT: auto item_ct1 = sycl::ext::oneapi::this_work_item::get_nd_item<3>();
459459
// CHECK-NEXT: auto sm = (unsigned int *)dpct_local;
460460
// CHECK-NEXT: unsigned int* as= (unsigned int*)sm;
461461
// CHECK-NEXT: const int kc=item_ct1.get_local_id(2);
@@ -474,9 +474,8 @@ __global__ void mykernel(unsigned int *dev) {
474474

475475
// TODO: Further refine the analysis of barrier to support this case.
476476
// CHECK: void mykernel_1(unsigned char *buffer, long size,
477-
// CHECK-NEXT: unsigned int *histo,
478-
// CHECK-NEXT: const sycl::nd_item<3> &item_ct1,
479-
// CHECK-NEXT: unsigned int *temp) {
477+
// CHECK-NEXT: unsigned int *histo, unsigned int *temp) {
478+
// CHECK-NEXT: auto item_ct1 = sycl::ext::oneapi::this_work_item::get_nd_item<3>();
480479
// CHECK-EMPTY:
481480
// CHECK-NEXT: temp[item_ct1.get_local_id(2)] = 0;
482481
// CHECK-NEXT: /*
@@ -552,8 +551,8 @@ __device__ void __gpu_sync(int blocks_to_synch) {
552551
while(g_mutex < blocks_to_synch);
553552
}
554553

555-
//CHECK:void atomicInc_foo(const sycl::nd_item<3> &item_ct1, uint8_t *dpct_local,
556-
//CHECK-NEXT: unsigned int &share_v) {
554+
//CHECK:void atomicInc_foo(uint8_t *dpct_local, unsigned int &share_v) {
555+
//CHECK-NEXT: auto item_ct1 = sycl::ext::oneapi::this_work_item::get_nd_item<3>();
557556
//CHECK-NEXT: auto share_array = (unsigned int *)dpct_local;
558557
//CHECK-NEXT: for (int b = item_ct1.get_local_id(2); b < 64; b += item_ct1.get_local_range(2)) {
559558
//CHECK-NEXT: dpct::atomic_fetch_compare_inc<sycl::access::address_space::generic_space>(&share_array[b], 1);

clang/test/dpct/atomic_functions_system_wide.cu

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
#include <iostream>
88
#include <memory>
99

10-
// CHECK:void atomic_kernel(int *atomic_array, const sycl::nd_item<3> &item_ct1) {
10+
// CHECK:void atomic_kernel(int *atomic_array) {
11+
// CHECK-NEXT: auto item_ct1 = sycl::ext::oneapi::this_work_item::get_nd_item<3>();
1112
// CHECK-NEXT: unsigned int tid = item_ct1.get_local_range(2) * item_ct1.get_group(2) + item_ct1.get_local_id(2);
1213
// CHECK-NEXT: dpct::atomic_fetch_add<sycl::access::address_space::generic_space, sycl::memory_order::relaxed, sycl::memory_scope::system>(&atomic_array[0], 10);
1314
// CHECK-NEXT: dpct::atomic_exchange<sycl::access::address_space::generic_space, sycl::memory_order::relaxed, sycl::memory_scope::system>(&atomic_array[1], tid);

clang/test/dpct/b_vcxproj_test/b_kernel.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// UNSUPPORTED: system-linux
22
// RUN: cat %S/SVMbenchmark.vcxproj > %T/SVMbenchmark.vcxproj
3-
// RUN: dpct -output-file=b_kernel_outputfile_win.txt --format-range=none --vcxprojfile=%T/SVMbenchmark.vcxproj -in-root=%S -out-root=%T %s -extra-arg="-I %S" --cuda-include-path="%cuda-path/include"
3+
// RUN: dpct --no-dpcpp-extensions=free-function-queries -output-file=b_kernel_outputfile_win.txt --format-range=none --vcxprojfile=%T/SVMbenchmark.vcxproj -in-root=%S -out-root=%T %s -extra-arg="-I %S" --cuda-include-path="%cuda-path/include"
44

55
// RUN: cat %S/check_compilation_ref.txt >%T/check_compilation_db.txt
66
// RUN: cat %T/compile_commands.json >>%T/check_compilation_db.txt
@@ -10,7 +10,7 @@
1010
// RUN: cat %T/b_kernel_outputfile_win.txt >>%T/check_b_kernel_outputfile_windows.txt
1111
// RUN: FileCheck --match-full-lines --input-file %T/check_b_kernel_outputfile_windows.txt %T/check_b_kernel_outputfile_windows.txt
1212

13-
// RUN: dpct --format-range=none -output-file=output-file.txt -in-root=%S -out-root=%T/2 %s --process-all --cuda-include-path="%cuda-path/include"
13+
// RUN: dpct --no-dpcpp-extensions=free-function-queries --format-range=none -output-file=output-file.txt -in-root=%S -out-root=%T/2 %s --process-all --cuda-include-path="%cuda-path/include"
1414
// RUN: cat %S/readme_2_ref.txt > %T/2/readme_2.txt
1515
// RUN: cat %S/readme_2.txt > %T/2/check_output-file.txt
1616
// RUN: cat %T/2/output-file.txt >>%T/2/check_output-file.txt

0 commit comments

Comments
 (0)