Skip to content

Commit 2d8889f

Browse files
MrSidimsvmaksimo
authored andcommitted
Implement SPV_INTEL_fpga_loop_fuse extension
It adds LoopFuseINTEL decoration Request, to the extent possible, that loops in the function be fused if they are contained in strictly fewer than Depth other loops in the function. Depth is a 32-bit unsigned integer type scalar with value at least 1. Independent is a 32-bit unsigned integer type scalar. If Independent is non-zero, it guarantees that negative distance dependencies do not exist between loops in the function that are contained in strictly fewer than Depth other loops within the function. Spec: KhronosGroup/SPIRV-Registry#88 Signed-off-by: Dmitry Sidorov <dmitry.sidorov@intel.com>
1 parent 8dca6b9 commit 2d8889f

File tree

9 files changed

+57
-16
lines changed

9 files changed

+57
-16
lines changed

llvm-spirv/include/LLVMSPIRVExtensions.inc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,4 @@ EXT(SPV_INTEL_arbitrary_precision_floating_point)
2929
EXT(SPV_INTEL_variable_length_array)
3030
EXT(SPV_INTEL_fp_fast_math_mode)
3131
EXT(SPV_INTEL_fpga_cluster_attributes)
32+
EXT(SPV_INTEL_loop_fuse)

llvm-spirv/lib/SPIRV/SPIRVInternal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,7 @@ const static char MaxWGDim[] = "max_global_work_dim";
400400
const static char NumSIMD[] = "num_simd_work_items";
401401
const static char StallEnable[] = "stall_enable";
402402
const static char FmaxMhz[] = "scheduler_target_fmax_mhz";
403+
const static char LoopFuse[] = "loop_fuse";
403404
} // namespace kSPIR2MD
404405

405406
enum Spir2SamplerKind {

llvm-spirv/lib/SPIRV/SPIRVReader.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4208,6 +4208,14 @@ bool SPIRVToLLVM::transFPGAFunctionMetadata(SPIRVFunction *BF, Function *F) {
42084208
MetadataVec.push_back(ConstantAsMetadata::get(getInt32(M, 1)));
42094209
F->setMetadata(kSPIR2MD::StallEnable, MDNode::get(*Context, MetadataVec));
42104210
}
4211+
if (BF->hasDecorate(DecorationFuseLoopsInFunctionINTEL)) {
4212+
std::vector<Metadata *> MetadataVec;
4213+
auto Literals =
4214+
BF->getDecorationLiterals(DecorationFuseLoopsInFunctionINTEL);
4215+
MetadataVec.push_back(ConstantAsMetadata::get(getUInt32(M, Literals[0])));
4216+
MetadataVec.push_back(ConstantAsMetadata::get(getUInt32(M, Literals[1])));
4217+
F->setMetadata(kSPIR2MD::LoopFuse, MDNode::get(*Context, MetadataVec));
4218+
}
42114219
return true;
42124220
}
42134221

llvm-spirv/lib/SPIRV/SPIRVWriter.cpp

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -593,9 +593,7 @@ SPIRVFunction *LLVMToSPIRV::transFunctionDecl(Function *F) {
593593
if (BM->isAllowedToUseExtension(ExtensionID::SPV_INTEL_vector_compute))
594594
transVectorComputeMetadata(F);
595595

596-
if (BM->isAllowedToUseExtension(
597-
ExtensionID::SPV_INTEL_fpga_cluster_attributes))
598-
transFPGAFunctionMetadata(BF, F);
596+
transFPGAFunctionMetadata(BF, F);
599597

600598
SPIRVDBG(dbgs() << "[transFunction] " << *F << " => ";
601599
spvdbgs() << *BF << '\n';)
@@ -695,9 +693,21 @@ void LLVMToSPIRV::transVectorComputeMetadata(Function *F) {
695693

696694
void LLVMToSPIRV::transFPGAFunctionMetadata(SPIRVFunction *BF, Function *F) {
697695
if (MDNode *StallEnable = F->getMetadata(kSPIR2MD::StallEnable)) {
698-
if (getMDOperandAsInt(StallEnable, 0)) {
699-
BM->addCapability(CapabilityFPGAClusterAttributesINTEL);
700-
BF->addDecorate(new SPIRVDecorateStallEnableINTEL(BF));
696+
if (BM->isAllowedToUseExtension(
697+
ExtensionID::SPV_INTEL_fpga_cluster_attributes)) {
698+
if (getMDOperandAsInt(StallEnable, 0)) {
699+
BM->addCapability(CapabilityFPGAClusterAttributesINTEL);
700+
BF->addDecorate(new SPIRVDecorateStallEnableINTEL(BF));
701+
}
702+
}
703+
}
704+
if (MDNode *LoopFuse = F->getMetadata(kSPIR2MD::LoopFuse)) {
705+
if (BM->isAllowedToUseExtension(ExtensionID::SPV_INTEL_loop_fuse)) {
706+
size_t Depth = getMDOperandAsInt(LoopFuse, 0);
707+
size_t Independent = getMDOperandAsInt(LoopFuse, 1);
708+
BM->addCapability(CapabilityLoopFuseINTEL);
709+
BF->addDecorate(
710+
new SPIRVDecorateFuseLoopsInFunctionINTEL(BF, Depth, Independent));
701711
}
702712
}
703713
}

llvm-spirv/lib/SPIRV/libSPIRV/SPIRVDecorate.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,8 @@ class SPIRVDecorate : public SPIRVDecorateGeneric {
179179
return ExtensionID::SPV_INTEL_float_controls2;
180180
case DecorationStallEnableINTEL:
181181
return ExtensionID::SPV_INTEL_fpga_cluster_attributes;
182+
case DecorationFuseLoopsInFunctionINTEL:
183+
return ExtensionID::SPV_INTEL_loop_fuse;
182184
default:
183185
return {};
184186
}
@@ -619,6 +621,15 @@ class SPIRVDecorateStallEnableINTEL : public SPIRVDecorate {
619621
: SPIRVDecorate(spv::DecorationStallEnableINTEL, TheTarget){};
620622
};
621623

624+
class SPIRVDecorateFuseLoopsInFunctionINTEL : public SPIRVDecorate {
625+
public:
626+
// Complete constructor for SPIRVDecorateFuseLoopsInFunctionINTEL
627+
SPIRVDecorateFuseLoopsInFunctionINTEL(SPIRVEntry *TheTarget, SPIRVWord Depth,
628+
SPIRVWord Independent)
629+
: SPIRVDecorate(spv::DecorationFuseLoopsInFunctionINTEL, TheTarget, Depth,
630+
Independent){};
631+
};
632+
622633
} // namespace SPIRV
623634

624635
#endif // SPIRV_LIBSPIRV_SPIRVDECORATE_H

llvm-spirv/lib/SPIRV/libSPIRV/SPIRVEnum.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,7 @@ template <> inline void SPIRVMap<Decoration, SPIRVCapVec>::init() {
420420
{CapabilityVectorComputeINTEL});
421421
ADD_VEC_INIT(DecorationStallEnableINTEL,
422422
{CapabilityFPGAClusterAttributesINTEL});
423+
ADD_VEC_INIT(DecorationFuseLoopsInFunctionINTEL, {CapabilityLoopFuseINTEL});
423424
}
424425

425426
template <> inline void SPIRVMap<BuiltIn, SPIRVCapVec>::init() {

llvm-spirv/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ template <> inline void SPIRVMap<Decoration, std::string>::init() {
168168
add(DecorationVectorComputeCallableFunctionINTEL,
169169
"VectorComputeCallableFunctionINTEL");
170170
add(DecorationStallEnableINTEL, "StallEnableINTEL");
171+
add(DecorationFuseLoopsInFunctionINTEL, "FuseLoopsInFunctionINTEL");
171172
add(DecorationMax, "Max");
172173
}
173174
SPIRV_DEF_NAMEMAP(Decoration, SPIRVDecorationNameMap)
@@ -509,6 +510,7 @@ template <> inline void SPIRVMap<Capability, std::string>::init() {
509510
add(CapabilityFPGAMemoryAccessesINTEL, "FPGAMemoryAccessesINTEL");
510511
add(CapabilityIOPipeINTEL, "IOPipeINTEL");
511512
add(CapabilityFPGAClusterAttributesINTEL, "FPGAClusterAttributesINTEL");
513+
add(CapabilityLoopFuseINTEL, "LoopFuseINTEL");
512514
add(CapabilityMax, "Max");
513515
}
514516
SPIRV_DEF_NAMEMAP(Capability, SPIRVCapabilityNameMap)

llvm-spirv/lib/SPIRV/libSPIRV/spirv.hpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,7 @@ enum Decoration {
525525
DecorationDontStaticallyCoalesceINTEL = 5901,
526526
DecorationPrefetchINTEL = 5902,
527527
DecorationStallEnableINTEL = 5905,
528+
DecorationFuseLoopsInFunctionINTEL = 5907,
528529
DecorationBufferLocationINTEL = 5921,
529530
DecorationIOPipeStorageINTEL = 5944,
530531
DecorationFunctionFloatingPointModeINTEL = 6080,
@@ -983,16 +984,17 @@ enum Capability {
983984
CapabilityArbitraryPrecisionFloatingPointINTEL = 5845,
984985
CapabilityUnstructuredLoopControlsINTEL = 5886,
985986
CapabilityFPGALoopControlsINTEL = 5888,
986-
CapabilityBlockingPipesINTEL = 5945,
987-
CapabilityFPGARegINTEL = 5948,
988987
CapabilityKernelAttributesINTEL = 5892,
988+
CapabilityFPGAMemoryAccessesINTEL = 5898,
989989
CapabilityFPGAKernelAttributesINTEL = 5897,
990990
CapabilityFPGAClusterAttributesINTEL = 5904,
991+
CapabilityLoopFuseINTEL = 5906,
991992
CapabilityFPGABufferLocationINTEL = 5920,
992993
CapabilityArbitraryPrecisionFixedPointINTEL = 5922,
993994
CapabilityUSMStorageClassesINTEL = 5935,
994-
CapabilityFPGAMemoryAccessesINTEL = 5898,
995995
CapabilityIOPipeINTEL = 5943,
996+
CapabilityBlockingPipesINTEL = 5945,
997+
CapabilityFPGARegINTEL = 5948,
996998
CapabilityAtomicFloat32AddEXT = 6033,
997999
CapabilityAtomicFloat64AddEXT = 6034,
9981000
CapabilityMax = 0x7fffffff,

llvm-spirv/test/transcoding/IntelFPGAFunctionAttributes.ll

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@
44
;; [[intel::no_global_work_offset,
55
;; intel::max_global_work_dim(1),
66
;; intel::max_work_group_size(1,1,1),
7-
;; intel::num_simd_work_items(8)
8-
;; intel::stall_enable]] void operator()() {}
9-
;; intel::scheduler_target_fmax_mhz(1000)]] void operator()() {}
7+
;; intel::num_simd_work_items(8),
8+
;; intel::stall_enable,
9+
;; intel::scheduler_target_fmax_mhz(1000),
10+
;; intel::loop_fuse_independent(3)]] void operator()() {}
1011
;; };
1112
;;
1213
;; template <typename name, typename Func>
@@ -17,11 +18,11 @@
1718
;; void bar() {
1819
;; Foo boo;
1920
;; kernel<class kernel_name>(boo);
20-
;; kernel<class kernel_name2>([]() [[intelfpga::no_global_work_offset(0)]]{});
21+
;; kernel<class kernel_name2>([]() [[intel::no_global_work_offset(0)]]{});
2122
;; }
2223

2324
; RUN: llvm-as %s -o %t.bc
24-
; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_kernel_attributes --spirv-ext=+SPV_INTEL_fpga_cluster_attributes -o %t.spv
25+
; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_kernel_attributes --spirv-ext=+SPV_INTEL_fpga_cluster_attributes,+SPV_INTEL_loop_fuse -o %t.spv
2526
; RUN: llvm-spirv %t.spv -to-text -o %t.spt
2627
; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV
2728

@@ -34,18 +35,21 @@
3435
; CHECK-SPIRV: 2 Capability KernelAttributesINTEL
3536
; CHECK-SPIRV: 2 Capability FPGAKernelAttributesINTEL
3637
; CHECK-SPIRV: 2 Capability FPGAClusterAttributesINTEL
38+
; CHECK-SPIRV: 2 Capability LoopFuseINTEL
3739
; CHECK-SPIRV: 6 ExecutionMode [[FUNCENTRY:[0-9]+]] 5893 1 1 1
3840
; CHECK-SPIRV: 4 ExecutionMode [[FUNCENTRY]] 5894 1
3941
; CHECK-SPIRV: 3 ExecutionMode [[FUNCENTRY]] 5895
4042
; CHECK-SPIRV: 4 ExecutionMode [[FUNCENTRY]] 5896 8
4143
; CHECK-SPIRV: 4 ExecutionMode [[FUNCENTRY]] 5903 1000
4244
; CHECK-SPIRV: 3 Decorate [[FUNCENTRY]] StallEnableINTEL
45+
; CHECK-SPIRV: 5 Decorate [[FUNCENTRY]] FuseLoopsInFunctionINTEL 3 1
4346
; CHECK-SPIRV: 5 Function {{.*}} [[FUNCENTRY]] {{.*}}
4447

45-
; CHECK-LLVM: define spir_kernel void {{.*}}kernel_name() {{.*}} !stall_enable ![[ONEMD:[0-9]+]] !max_work_group_size ![[MAXWG:[0-9]+]] !no_global_work_offset ![[OFFSET:[0-9]+]] !max_global_work_dim ![[ONEMD:[0-9]+]] !num_simd_work_items ![[NUMSIMD:[0-9]+]] !scheduler_target_fmax_mhz ![[MAXMHZ:[0-9]+]]
48+
; CHECK-LLVM: define spir_kernel void {{.*}}kernel_name() {{.*}} !stall_enable ![[ONEMD:[0-9]+]] !loop_fuse ![[FUSE:[0-9]+]] !max_work_group_size ![[MAXWG:[0-9]+]] !no_global_work_offset ![[OFFSET:[0-9]+]] !max_global_work_dim ![[ONEMD:[0-9]+]] !num_simd_work_items ![[NUMSIMD:[0-9]+]] !scheduler_target_fmax_mhz ![[MAXMHZ:[0-9]+]]
4649
; CHECK-LLVM-NOT: define spir_kernel void {{.*}}kernel_name2 {{.*}} !no_global_work_offset {{.*}}
4750
; CHECK-LLVM: ![[OFFSET]] = !{}
4851
; CHECK-LLVM: ![[ONEMD]] = !{i32 1}
52+
; CHECK-LLVM: ![[FUSE]] = !{i32 3, i32 1}
4953
; CHECK-LLVM: ![[MAXWG]] = !{i32 1, i32 1, i32 1}
5054
; CHECK-LLVM: ![[NUMSIMD]] = !{i32 8}
5155
; CHECK-LLVM: ![[MAXMHZ]] = !{i32 1000}
@@ -61,7 +65,7 @@ target triple = "spir64-unknown-linux-sycldevice"
6165
$_ZN3FooclEv = comdat any
6266

6367
; Function Attrs: nounwind
64-
define spir_kernel void @_ZTSZ3barvE11kernel_name() #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !4 !kernel_arg_type !4 !kernel_arg_base_type !4 !kernel_arg_type_qual !4 !num_simd_work_items !5 !max_work_group_size !6 !max_global_work_dim !7 !no_global_work_offset !4 !stall_enable !7 !scheduler_target_fmax_mhz !12 {
68+
define spir_kernel void @_ZTSZ3barvE11kernel_name() #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !4 !kernel_arg_type !4 !kernel_arg_base_type !4 !kernel_arg_type_qual !4 !num_simd_work_items !5 !max_work_group_size !6 !max_global_work_dim !7 !no_global_work_offset !4 !stall_enable !7 !scheduler_target_fmax_mhz !12 !loop_fuse !13 {
6569
entry:
6670
%Foo = alloca %class._ZTS3Foo.Foo, align 1
6771
%0 = bitcast %class._ZTS3Foo.Foo* %Foo to i8*
@@ -132,3 +136,4 @@ attributes #4 = { nounwind }
132136
!10 = !{!"omnipotent char", !11, i64 0}
133137
!11 = !{!"Simple C++ TBAA"}
134138
!12 = !{i32 1000}
139+
!13 = !{i32 3, i32 1}

0 commit comments

Comments
 (0)