@@ -26,7 +26,7 @@ Except for m8n8k4, all other shapes are supported for row/col layout of A/B matr
26
26
27
27
__global__ void mma_kernel_m8n8k4 (int *a, int *b, float *c) {
28
28
// CHECK: {
29
- // CHECK-NEXT: volatile float *d_mat_frag_ct1[8] = { &c[0], &c[1], &c[2], &c[3], &c[4], &c[5], &c[6], &c[7] };
29
+ // CHECK-NEXT: volatile void *d_mat_frag_ct1[8] = { &c[0], &c[1], &c[2], &c[3], &c[4], &c[5], &c[6], &c[7] };
30
30
// CHECK-NEXT: sycl::vec<uint32_t, 2> a_mat_frag_ct1(a[0], a[1]);
31
31
// CHECK-NEXT: sycl::vec<uint32_t, 2> b_mat_frag_ct1(b[0], b[1]);
32
32
// CHECK-NEXT: sycl::vec<float, 8> c_mat_frag_ct1(c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7]);
@@ -44,7 +44,7 @@ __global__ void mma_kernel_m8n8k4(int *a, int *b, float *c) {
44
44
45
45
__global__ void mma_kernel_m8n8k16 (int *a, int *b, int *c, int *d) {
46
46
// CHECK: {
47
- // CHECK-NEXT: volatile int32_t *d_mat_frag_ct1[2] = { &d[0], &d[1] };
47
+ // CHECK-NEXT: volatile void *d_mat_frag_ct1[2] = { &d[0], &d[1] };
48
48
// CHECK-NEXT: sycl::vec<uint32_t, 1> a_mat_frag_ct1(a[0]);
49
49
// CHECK-NEXT: sycl::vec<uint32_t, 1> b_mat_frag_ct1(b[0]);
50
50
// CHECK-NEXT: sycl::vec<int32_t, 2> c_mat_frag_ct1(c[0], c[1]);
@@ -63,7 +63,7 @@ __global__ void mma_kernel_m8n8k16(int *a, int *b, int *c, int *d) {
63
63
64
64
__global__ void mma_kernel_m16n8k8 (int *a, int *b, float *fc, float *fd) {
65
65
// CHECK: {
66
- // CHECK-NEXT: volatile float *d_mat_frag_ct1[4] = { &fd[0], &fd[1], &fd[2], &fd[3] };
66
+ // CHECK-NEXT: volatile void *d_mat_frag_ct1[4] = { &fd[0], &fd[1], &fd[2], &fd[3] };
67
67
// CHECK-NEXT: sycl::vec<uint32_t, 2> a_mat_frag_ct1(*(reinterpret_cast<int *>(&a[0])), *(reinterpret_cast<int *>(&a[1])));
68
68
// CHECK-NEXT: sycl::vec<uint32_t, 1> b_mat_frag_ct1(*(reinterpret_cast<int *>(&b[0])));
69
69
// CHECK-NEXT: sycl::vec<float, 4> c_mat_frag_ct1(fc[0], fc[1], fc[2], fc[3]);
@@ -81,7 +81,7 @@ __global__ void mma_kernel_m16n8k8(int *a, int *b, float *fc, float *fd) {
81
81
" f" (fc[0 ]), " f" (fc[1 ]), " f" (fc[2 ]), " f" (fc[3 ]));
82
82
83
83
// CHECK: {
84
- // CHECK-NEXT: volatile float *d_mat_frag_ct1[4] = { &fd[0], &fd[1], &fd[2], &fd[3] };
84
+ // CHECK-NEXT: volatile void *d_mat_frag_ct1[4] = { &fd[0], &fd[1], &fd[2], &fd[3] };
85
85
// CHECK-NEXT: sycl::vec<uint32_t, 2> a_mat_frag_ct1(*(reinterpret_cast<int *>(&a[0])), *(reinterpret_cast<int *>(&a[1])));
86
86
// CHECK-NEXT: sycl::vec<uint32_t, 1> b_mat_frag_ct1(*(reinterpret_cast<int *>(&b[0])));
87
87
// CHECK-NEXT: sycl::vec<float, 4> c_mat_frag_ct1(fc[0], fc[1], fc[2], fc[3]);
@@ -101,7 +101,7 @@ __global__ void mma_kernel_m16n8k8(int *a, int *b, float *fc, float *fd) {
101
101
102
102
__global__ void mma_kernel_m16n8k16 (int *a, int *b, int *c, float *fc, int *d) {
103
103
// CHECK: {
104
- // CHECK-NEXT: volatile float *d_mat_frag_ct1[4] = { &fc[0], &fc[1], &fc[2], &fc[3] };
104
+ // CHECK-NEXT: volatile void *d_mat_frag_ct1[4] = { &fc[0], &fc[1], &fc[2], &fc[3] };
105
105
// CHECK-NEXT: sycl::vec<uint32_t, 4> a_mat_frag_ct1(a[0], a[1], a[2], a[3]);
106
106
// CHECK-NEXT: sycl::vec<uint32_t, 2> b_mat_frag_ct1(b[0], b[1]);
107
107
// CHECK-NEXT: sycl::vec<float, 4> c_mat_frag_ct1(fc[0], fc[1], fc[2], fc[3]);
@@ -117,7 +117,7 @@ __global__ void mma_kernel_m16n8k16(int *a, int *b, int *c, float *fc, int *d) {
117
117
" r" (b[0 ]), " r" (b[1 ]));
118
118
119
119
// CHECK: {
120
- // CHECK-NEXT: volatile int32_t *d_mat_frag_ct1[4] = { &d[0], &d[1], &d[2], &d[3] };
120
+ // CHECK-NEXT: volatile void *d_mat_frag_ct1[4] = { &d[0], &d[1], &d[2], &d[3] };
121
121
// CHECK-NEXT: sycl::vec<uint32_t, 2> a_mat_frag_ct1(a[0], a[1]);
122
122
// CHECK-NEXT: sycl::vec<uint32_t, 1> b_mat_frag_ct1(b[0]);
123
123
// CHECK-NEXT: sycl::vec<int32_t, 4> c_mat_frag_ct1(c[0], c[1], c[2], c[3]);
@@ -136,7 +136,7 @@ __global__ void mma_kernel_m16n8k16(int *a, int *b, int *c, float *fc, int *d) {
136
136
137
137
__global__ void mma_kernel_m16n8k32 (int *a, int *b, int *c, int *d) {
138
138
// CHECK: {
139
- // CHECK-NEXT: volatile int32_t *d_mat_frag_ct1[4] = { &d[0], &d[1], &d[2], &d[3] };
139
+ // CHECK-NEXT: volatile void *d_mat_frag_ct1[4] = { &d[0], &d[1], &d[2], &d[3] };
140
140
// CHECK-NEXT: sycl::vec<uint32_t, 4> a_mat_frag_ct1(a[0], a[1], a[2], a[3]);
141
141
// CHECK-NEXT: sycl::vec<uint32_t, 2> b_mat_frag_ct1(b[0], b[1]);
142
142
// CHECK-NEXT: sycl::vec<int32_t, 4> c_mat_frag_ct1(c[0], c[1], c[2], c[3]);
0 commit comments