|
| 1 | +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_subgroups %s -o - | FileCheck %s |
| 2 | + |
| 3 | +; CHECK-DAG: Capability SubgroupShuffleINTEL |
| 4 | +; CHECK-DAG: Capability SubgroupBufferBlockIOINTEL |
| 5 | +; CHECK-DAG: Capability SubgroupImageBlockIOINTEL |
| 6 | +; CHECK: Extension "SPV_INTEL_subgroups" |
| 7 | + |
| 8 | +; CHECK-DAG: %[[#Float:]] = OpTypeFloat 32 |
| 9 | +; CHECK-DAG: %[[#FloatVec:]] = OpTypeVector %[[#Float]] 2 |
| 10 | +; CHECK-DAG: %[[#Int:]] = OpTypeInt 32 0 |
| 11 | +; CHECK-DAG: %[[#IntVec:]] = OpTypeVector %[[#Int]] 2 |
| 12 | + |
| 13 | +; CHECK: Function |
| 14 | +; CHECK: %[[#X:]] = OpFunctionParameter |
| 15 | +; CHECK: %[[#C:]] = OpFunctionParameter |
| 16 | +; CHECK: %[[#ImgIn:]] = OpFunctionParameter |
| 17 | +; CHECK: %[[#ImgOut:]] = OpFunctionParameter |
| 18 | +; CHECK: %[[#Coord:]] = OpFunctionParameter |
| 19 | +; CHECK: %[[#Ptr:]] = OpFunctionParameter |
| 20 | + |
| 21 | +; CHECK: %[[#]] = OpSubgroupShuffleINTEL %[[#FloatVec]] %[[#X]] %[[#C]] |
| 22 | +; CHECK: %[[#]] = OpSubgroupShuffleDownINTEL %[[#FloatVec]] %[[#X]] %[[#X]] %[[#C]] |
| 23 | +; CHECK: %[[#]] = OpSubgroupShuffleUpINTEL %[[#FloatVec]] %[[#X]] %[[#X]] %[[#C]] |
| 24 | +; CHECK: %[[#]] = OpSubgroupShuffleXorINTEL %[[#FloatVec]] %[[#X]] %[[#C]] |
| 25 | +; CHECK: %[[#ResImg1:]] = OpSubgroupImageBlockReadINTEL %[[#IntVec]] %[[#ImgIn]] %[[#Coord]] |
| 26 | +; CHECK: OpSubgroupImageBlockWriteINTEL %[[#ImgOut]] %[[#Coord]] %[[#ResImg1]] |
| 27 | +; CHECK: %[[#Res1:]] = OpSubgroupBlockReadINTEL %[[#IntVec]] %[[#Ptr]] |
| 28 | +; CHECK: OpSubgroupBlockWriteINTEL %[[#Ptr]] %[[#Res1]] |
| 29 | +; CHECK: %[[#]] = OpSubgroupShuffleINTEL %[[#FloatVec]] %[[#X]] %[[#C]] |
| 30 | +; CHECK: %[[#]] = OpSubgroupShuffleDownINTEL %[[#FloatVec]] %[[#X]] %[[#X]] %[[#C]] |
| 31 | +; CHECK: %[[#]] = OpSubgroupShuffleUpINTEL %[[#FloatVec]] %[[#X]] %[[#X]] %[[#C]] |
| 32 | +; CHECK: %[[#]] = OpSubgroupShuffleXorINTEL %[[#FloatVec]] %[[#X]] %[[#C]] |
| 33 | +; CHECK: %[[#ResImg2:]] = OpSubgroupImageBlockReadINTEL %[[#IntVec]] %[[#ImgIn]] %[[#Coord]] |
| 34 | +; CHECK: OpSubgroupImageBlockWriteINTEL %[[#ImgOut]] %[[#Coord]] %[[#ResImg2]] |
| 35 | +; CHECK: %[[#Res2:]] = OpSubgroupBlockReadINTEL %[[#IntVec]] %[[#Ptr]] |
| 36 | +; CHECK: OpSubgroupBlockWriteINTEL %[[#Ptr]] %[[#Res2]] |
| 37 | +; CHECK: Return |
| 38 | + |
| 39 | +define spir_kernel void @test(<2 x float> %x, i32 %c, ptr addrspace(1) %image_in, ptr addrspace(1) %image_out, <2 x i32> %coord, ptr addrspace(1) %p) { |
| 40 | +entry: |
| 41 | + %wrap = tail call spir_func <2 x float> @__spirv_SubgroupShuffleINTEL(<2 x float> %x, i32 %c) |
| 42 | + %wrap1 = tail call spir_func <2 x float> @__spirv_SubgroupShuffleDownINTEL(<2 x float> %x, <2 x float> %x, i32 %c) |
| 43 | + %wrap2 = tail call spir_func <2 x float> @__spirv_SubgroupShuffleUpINTEL(<2 x float> %x, <2 x float> %x, i32 %c) |
| 44 | + %wrap3 = tail call spir_func <2 x float> @__spirv_SubgroupShuffleXorINTEL(<2 x float> %x, i32 %c) |
| 45 | + |
| 46 | + %wrap4 = tail call spir_func <2 x i32> @__spirv_SubgroupImageBlockReadINTEL(ptr addrspace(1) %image_in, <2 x i32> %coord) |
| 47 | + tail call spir_func void @__spirv_SubgroupImageBlockWriteINTEL(ptr addrspace(1) %image_out, <2 x i32> %coord, <2 x i32> %wrap4) |
| 48 | + %wrap5 = tail call spir_func <2 x i32> @__spirv_SubgroupBlockReadINTEL(ptr addrspace(1) %p) |
| 49 | + tail call spir_func void @__spirv_SubgroupBlockWriteINTEL(ptr addrspace(1) %p, <2 x i32> %wrap5) |
| 50 | + |
| 51 | + %ocl = tail call spir_func <2 x float> @intel_sub_group_shuffle(<2 x float> %x, i32 %c) |
| 52 | + %ocl1 = tail call spir_func <2 x float> @intel_sub_group_shuffle_down(<2 x float> %x, <2 x float> %x, i32 %c) |
| 53 | + %ocl2 = tail call spir_func <2 x float> @intel_sub_group_shuffle_up(<2 x float> %x, <2 x float> %x, i32 %c) |
| 54 | + %ocl3 = tail call spir_func <2 x float> @intel_sub_group_shuffle_xor(<2 x float> %x, i32 %c) |
| 55 | + |
| 56 | + %ocl4 = tail call spir_func <2 x i32> @_Z27intel_sub_group_block_read214ocl_image2d_roDv2_i(ptr addrspace(1) %image_in, <2 x i32> %coord) |
| 57 | + tail call spir_func void @_Z28intel_sub_group_block_write214ocl_image2d_woDv2_iDv2_j(ptr addrspace(1) %image_out, <2 x i32> %coord, <2 x i32> %ocl4) |
| 58 | + %ocl5 = tail call spir_func <2 x i32> @intel_sub_group_block_read(ptr addrspace(1) %p) |
| 59 | + tail call spir_func void @intel_sub_group_block_write(ptr addrspace(1) %p, <2 x i32> %ocl5) |
| 60 | + |
| 61 | + ret void |
| 62 | +} |
| 63 | + |
| 64 | +declare spir_func <2 x float> @__spirv_SubgroupShuffleINTEL(<2 x float>, i32) |
| 65 | +declare spir_func <2 x float> @__spirv_SubgroupShuffleDownINTEL(<2 x float>, <2 x float>, i32) |
| 66 | +declare spir_func <2 x float> @__spirv_SubgroupShuffleUpINTEL(<2 x float>, <2 x float>, i32) |
| 67 | +declare spir_func <2 x float> @__spirv_SubgroupShuffleXorINTEL(<2 x float>, i32) |
| 68 | + |
| 69 | +declare spir_func <2 x i32> @__spirv_SubgroupBlockReadINTEL(ptr addrspace(1)) |
| 70 | +declare spir_func void @__spirv_SubgroupBlockWriteINTEL(ptr addrspace(1), <2 x i32>) |
| 71 | + |
| 72 | +declare spir_func <2 x i32> @__spirv_SubgroupImageBlockReadINTEL(ptr addrspace(1), <2 x i32>) |
| 73 | +declare spir_func void @__spirv_SubgroupImageBlockWriteINTEL(ptr addrspace(1), <2 x i32>, <2 x i32>) |
| 74 | + |
| 75 | +declare spir_func <2 x float> @intel_sub_group_shuffle(<2 x float>, i32) |
| 76 | +declare spir_func <2 x float> @intel_sub_group_shuffle_down(<2 x float>, <2 x float>, i32) |
| 77 | +declare spir_func <2 x float> @intel_sub_group_shuffle_up(<2 x float>, <2 x float>, i32) |
| 78 | +declare spir_func <2 x float> @intel_sub_group_shuffle_xor(<2 x float>, i32) |
| 79 | + |
| 80 | +declare spir_func <2 x i32> @intel_sub_group_block_read(ptr addrspace(1)) |
| 81 | +declare spir_func void @intel_sub_group_block_write(ptr addrspace(1), <2 x i32>) |
| 82 | + |
| 83 | +declare spir_func <2 x i32> @_Z27intel_sub_group_block_read214ocl_image2d_roDv2_i(ptr addrspace(1), <2 x i32>) |
| 84 | +declare spir_func void @_Z28intel_sub_group_block_write214ocl_image2d_woDv2_iDv2_j(ptr addrspace(1), <2 x i32>, <2 x i32>) |
0 commit comments