|
1 |
| -; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,HSA,CO-V2 %s |
2 |
| -; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mcpu=carrizo -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,HSA,CO-V2 %s |
3 |
| -; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,MESA %s |
4 |
| -; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,MESA %s |
5 |
| -; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=+flat-for-global -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,CO-V2 %s |
6 |
| -; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,CO-V2 %s |
| 1 | +; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,HSA,CO-V2,UNPACKED %s |
| 2 | +; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mcpu=carrizo -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,HSA,CO-V2,UNPACKED %s |
| 3 | +; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s |
| 4 | +; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s |
| 5 | +; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=+flat-for-global -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,CO-V2,UNPACKED %s |
| 6 | +; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,CO-V2,UNPACKED %s |
7 | 7 | ; RUN: llc -global-isel -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s
|
8 | 8 |
|
9 | 9 | declare i32 @llvm.amdgcn.workitem.id.x() #0
|
@@ -125,5 +125,75 @@ define void @test_workitem_id_z_func(i32 addrspace(1)* %out) #1 {
|
125 | 125 | ret void
|
126 | 126 | }
|
127 | 127 |
|
| 128 | +; FIXME: Should be able to avoid enabling in kernel inputs |
| 129 | +; FIXME: Packed tid should avoid the and |
| 130 | +; ALL-LABEL: {{^}}test_reqd_workgroup_size_x_only: |
| 131 | +; CO-V2: enable_vgpr_workitem_id = 2 |
| 132 | + |
| 133 | +; ALL-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} |
| 134 | +; UNPACKED-DAG: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 |
| 135 | + |
| 136 | +; PACKED: v_and_b32_e32 [[MASKED:v[0-9]+]], 0x3ff, v0 |
| 137 | +; PACKED: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]] |
| 138 | + |
| 139 | +; ALL: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] |
| 140 | +; ALL: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] |
| 141 | +define amdgpu_kernel void @test_reqd_workgroup_size_x_only(i32* %out) !reqd_work_group_size !0 { |
| 142 | + %id.x = call i32 @llvm.amdgcn.workitem.id.x() |
| 143 | + %id.y = call i32 @llvm.amdgcn.workitem.id.y() |
| 144 | + %id.z = call i32 @llvm.amdgcn.workitem.id.z() |
| 145 | + store volatile i32 %id.x, i32* %out |
| 146 | + store volatile i32 %id.y, i32* %out |
| 147 | + store volatile i32 %id.z, i32* %out |
| 148 | + ret void |
| 149 | +} |
| 150 | + |
| 151 | +; ALL-LABEL: {{^}}test_reqd_workgroup_size_y_only: |
| 152 | +; CO-V2: enable_vgpr_workitem_id = 2 |
| 153 | + |
| 154 | +; ALL: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} |
| 155 | +; ALL: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] |
| 156 | + |
| 157 | +; UNPACKED: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1 |
| 158 | + |
| 159 | +; PACKED: v_bfe_u32 [[MASKED:v[0-9]+]], v0, 10, 10 |
| 160 | +; PACKED: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]] |
| 161 | + |
| 162 | +; ALL: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] |
| 163 | +define amdgpu_kernel void @test_reqd_workgroup_size_y_only(i32* %out) !reqd_work_group_size !1 { |
| 164 | + %id.x = call i32 @llvm.amdgcn.workitem.id.x() |
| 165 | + %id.y = call i32 @llvm.amdgcn.workitem.id.y() |
| 166 | + %id.z = call i32 @llvm.amdgcn.workitem.id.z() |
| 167 | + store volatile i32 %id.x, i32* %out |
| 168 | + store volatile i32 %id.y, i32* %out |
| 169 | + store volatile i32 %id.z, i32* %out |
| 170 | + ret void |
| 171 | +} |
| 172 | + |
| 173 | +; ALL-LABEL: {{^}}test_reqd_workgroup_size_z_only: |
| 174 | +; CO-V2: enable_vgpr_workitem_id = 2 |
| 175 | + |
| 176 | +; ALL: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} |
| 177 | +; ALL: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] |
| 178 | +; ALL: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] |
| 179 | + |
| 180 | +; UNPACKED: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v2 |
| 181 | + |
| 182 | +; PACKED: v_bfe_u32 [[MASKED:v[0-9]+]], v0, 10, 20 |
| 183 | +; PACKED: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]] |
| 184 | +define amdgpu_kernel void @test_reqd_workgroup_size_z_only(i32* %out) !reqd_work_group_size !2 { |
| 185 | + %id.x = call i32 @llvm.amdgcn.workitem.id.x() |
| 186 | + %id.y = call i32 @llvm.amdgcn.workitem.id.y() |
| 187 | + %id.z = call i32 @llvm.amdgcn.workitem.id.z() |
| 188 | + store volatile i32 %id.x, i32* %out |
| 189 | + store volatile i32 %id.y, i32* %out |
| 190 | + store volatile i32 %id.z, i32* %out |
| 191 | + ret void |
| 192 | +} |
| 193 | + |
128 | 194 | attributes #0 = { nounwind readnone }
|
129 | 195 | attributes #1 = { nounwind }
|
| 196 | + |
| 197 | +!0 = !{i32 64, i32 1, i32 1} |
| 198 | +!1 = !{i32 1, i32 64, i32 1} |
| 199 | +!2 = !{i32 1, i32 1, i32 64} |
0 commit comments