Skip to content

Commit 23926b0

Browse files
[SYCL] Add Clang support for FPGA loop fusion function attributes (#2877)
This patch adds support for FPGA function attributes loop_fuse and loop_fuse_independent. [[intel::loop_fuse(N)]] is a strong request, to the extent possible, to fuse loops within the function, that are contained in at most N-1 other loops within the function. If the optional parameter N is omitted, it is a strong request, to the extent possible, to fuse loops within the function that are not contained in any other loop within the function. [[intel::loop_fuse_independent(N)]] is used to guarantee that fusion safety analysis can ignore negative-distance dependences between these loops. FrontEnd Specifications: The attributes take one optional parameter, a constant integral expression between 0 and 1024*1024. The paramter may be a template parameter. The same function definition can have atmost one of these two attributes. The attributes can be applied explictly to kernel. However, attributes should not be propagated to callers i.e it should not be propagated from device functions to kernel. LLVM IR is function metadata as follows: define i32 @foo() !loop_fuse !0 !0 = !{i32 N, i32 D} where N is the value specified by the optional attribute argument. If the optional argument is omitted, N is set to 1. D is equal to 0 for [[intel::loop_fuse]] and 1 for [[intel::loop_fuse_independent]]. Signed-off-by: Elizabeth Andrews <elizabeth.andrews@intel.com>
1 parent 7f45963 commit 23926b0

15 files changed

+489
-2
lines changed

clang/include/clang/Basic/Attr.td

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1316,6 +1316,25 @@ def SYCLIntelNoGlobalWorkOffset : InheritableAttr {
13161316
let PragmaAttributeSupport = 0;
13171317
}
13181318

1319+
def SYCLIntelLoopFuse : InheritableAttr {
1320+
let Spellings = [CXX11<"intel", "loop_fuse">,
1321+
CXX11<"intel", "loop_fuse_independent">];
1322+
let Args = [ExprArgument<"Value", /*optional=*/ 1>];
1323+
let LangOpts = [SYCLIsDevice, SYCLIsHost];
1324+
let Subjects = SubjectList<[Function], ErrorDiag>;
1325+
let Accessors = [Accessor<"isIndependent",
1326+
[CXX11<"intel", "loop_fuse_independent">]>];
1327+
let Documentation = [SYCLIntelLoopFuseDocs];
1328+
let AdditionalMembers = [{
1329+
static unsigned getMinValue() {
1330+
return 0;
1331+
}
1332+
static unsigned getMaxValue() {
1333+
return 1024*1024;
1334+
}
1335+
}];
1336+
}
1337+
13191338
def C11NoReturn : InheritableAttr {
13201339
let Spellings = [Keyword<"_Noreturn">];
13211340
let Subjects = SubjectList<[Function], ErrorDiag>;

clang/include/clang/Basic/AttrDocs.td

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2608,6 +2608,34 @@ loop should not be fused with any adjacent loop.
26082608
}];
26092609
}
26102610

2611+
def SYCLIntelLoopFuseDocs : Documentation {
2612+
let Category = DocCatFunction;
2613+
let Heading = "loop_fuse, loop_fuse_independent";
2614+
let Content = [{
2615+
``[[intel::loop_fuse(N)]]`` and ``[[intel::loop_fuse_independent(N)]]`` attributes apply
2616+
to a function/lambda function. It is a strong request, to the extent possible, to fuse
2617+
the loops within the function, that are contained in at most N-1 other loops within the
2618+
function. If the optional parameter N is omitted, it is a strong request, to the extent
2619+
possible, to fuse loops within the function that are not contained in any other loop
2620+
within the function. ``[[intel::loop_fuse_independent(N)]]`` also guarantees that fusion
2621+
safety analysis can ignore negative-distance dependences between these loops.
2622+
2623+
.. code-block:: c++
2624+
2625+
[[intel::loop_fuse(N)]]
2626+
int foo() {}
2627+
2628+
[[intel::loop_fuse_independent(N)]]
2629+
int foo() {}
2630+
2631+
2632+
``[[intel::loop_fuse(N)]]`` and ``[[intel::loop_fuse_independent(N)]]`` takes one optional
2633+
parameter that is a constant unsigned integer expression. The parameter N may be a template
2634+
parameter.
2635+
2636+
}];
2637+
}
2638+
26112639
def SYCLDeviceIndirectlyCallableDocs : Documentation {
26122640
let Category = DocCatFunction;
26132641
let Heading = "intel::device_indirectly_callable";

clang/include/clang/Basic/AttributeCommonInfo.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,8 @@ class AttributeCommonInfo {
166166
ParsedAttr == AT_SYCLIntelMaxWorkGroupSize ||
167167
ParsedAttr == AT_SYCLIntelMaxGlobalWorkDim ||
168168
ParsedAttr == AT_SYCLIntelNoGlobalWorkOffset ||
169-
ParsedAttr == AT_SYCLIntelUseStallEnableClusters)
169+
ParsedAttr == AT_SYCLIntelUseStallEnableClusters ||
170+
ParsedAttr == AT_SYCLIntelLoopFuse)
170171
return true;
171172

172173
return false;

clang/include/clang/Sema/Sema.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3342,6 +3342,8 @@ class Sema final {
33423342
WebAssemblyImportModuleAttr *mergeImportModuleAttr(
33433343
Decl *D, const WebAssemblyImportModuleAttr &AL);
33443344

3345+
SYCLIntelLoopFuseAttr *
3346+
mergeSYCLIntelLoopFuseAttr(Decl *D, const AttributeCommonInfo &CI, Expr *E);
33453347
void mergeDeclAttributes(NamedDecl *New, Decl *Old,
33463348
AvailabilityMergeKind AMK = AMK_Redeclaration);
33473349
void MergeTypedefNameDecl(Scope *S, TypedefNameDecl *New,
@@ -10134,6 +10136,8 @@ class Sema final {
1013410136
/// addSYCLIntelPipeIOAttr - Adds a pipe I/O attribute to a particular
1013510137
/// declaration.
1013610138
void addSYCLIntelPipeIOAttr(Decl *D, const AttributeCommonInfo &CI, Expr *ID);
10139+
void addSYCLIntelLoopFuseAttr(Decl *D, const AttributeCommonInfo &CI,
10140+
Expr *E);
1013710141

1013810142
bool checkNSReturnsRetainedReturnType(SourceLocation loc, QualType type);
1013910143
bool checkAllowedSYCLInitializer(VarDecl *VD,

clang/lib/CodeGen/CodeGenFunction.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -955,6 +955,19 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
955955
if (getLangOpts().SYCLIsHost && D && D->hasAttr<SYCLKernelAttr>())
956956
Fn->addFnAttr("sycl_kernel");
957957

958+
if (getLangOpts().SYCLIsDevice && D) {
959+
if (const auto *A = D->getAttr<SYCLIntelLoopFuseAttr>()) {
960+
Expr *E = A->getValue();
961+
llvm::Metadata *AttrMDArgs[] = {
962+
llvm::ConstantAsMetadata::get(Builder.getInt32(
963+
E->getIntegerConstantExpr(D->getASTContext())->getZExtValue())),
964+
llvm::ConstantAsMetadata::get(
965+
A->isIndependent() ? Builder.getInt32(1) : Builder.getInt32(0))};
966+
Fn->setMetadata("loop_fuse",
967+
llvm::MDNode::get(getLLVMContext(), AttrMDArgs));
968+
}
969+
}
970+
958971
if (getLangOpts().OpenCL || getLangOpts().SYCLIsDevice) {
959972
// Add metadata for a kernel function.
960973
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {

clang/lib/Sema/SemaDecl.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2614,6 +2614,8 @@ static bool mergeDeclAttribute(Sema &S, NamedDecl *D,
26142614
NewAttr = S.mergeImportModuleAttr(D, *IMA);
26152615
else if (const auto *INA = dyn_cast<WebAssemblyImportNameAttr>(Attr))
26162616
NewAttr = S.mergeImportNameAttr(D, *INA);
2617+
else if (const auto *LFA = dyn_cast<SYCLIntelLoopFuseAttr>(Attr))
2618+
NewAttr = S.mergeSYCLIntelLoopFuseAttr(D, *LFA, LFA->getValue());
26172619
else if (Attr->shouldInheritEvenIfAlreadyPresent() || !DeclHasAttr(D, Attr))
26182620
NewAttr = cast<InheritableAttr>(Attr->clone(S.Context));
26192621

clang/lib/Sema/SemaDeclAttr.cpp

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3128,6 +3128,90 @@ static void handleMaxGlobalWorkDimAttr(Sema &S, Decl *D,
31283128
E);
31293129
}
31303130

3131+
SYCLIntelLoopFuseAttr *
3132+
Sema::mergeSYCLIntelLoopFuseAttr(Decl *D, const AttributeCommonInfo &CI,
3133+
Expr *E) {
3134+
3135+
if (const auto ExistingAttr = D->getAttr<SYCLIntelLoopFuseAttr>()) {
3136+
// [[intel::loop_fuse]] and [[intel::loop_fuse_independent]] are
3137+
// incompatible.
3138+
// FIXME: If additional spellings are provided for this attribute,
3139+
// this code will do the wrong thing.
3140+
if (ExistingAttr->getAttributeSpellingListIndex() !=
3141+
CI.getAttributeSpellingListIndex()) {
3142+
Diag(CI.getLoc(), diag::err_attributes_are_not_compatible)
3143+
<< CI << ExistingAttr;
3144+
Diag(ExistingAttr->getLocation(), diag::note_conflicting_attribute);
3145+
return nullptr;
3146+
}
3147+
3148+
if (!E->isValueDependent()) {
3149+
Optional<llvm::APSInt> ArgVal = E->getIntegerConstantExpr(Context);
3150+
Optional<llvm::APSInt> ExistingArgVal =
3151+
ExistingAttr->getValue()->getIntegerConstantExpr(Context);
3152+
3153+
assert(ArgVal && ExistingArgVal &&
3154+
"Argument should be an integer constant expression");
3155+
// Compare attribute argument value and warn if there is a mismatch.
3156+
if (ArgVal->getExtValue() != ExistingArgVal->getExtValue())
3157+
Diag(ExistingAttr->getLoc(), diag::warn_duplicate_attribute)
3158+
<< ExistingAttr;
3159+
}
3160+
3161+
// If there is no mismatch, silently ignore duplicate attribute.
3162+
return nullptr;
3163+
}
3164+
return ::new (Context) SYCLIntelLoopFuseAttr(Context, CI, E);
3165+
}
3166+
3167+
static bool checkSYCLIntelLoopFuseArgument(Sema &S,
3168+
const AttributeCommonInfo &CI,
3169+
Expr *E) {
3170+
// Dependent expressions are checked when instantiated.
3171+
if (E->isValueDependent())
3172+
return false;
3173+
3174+
Optional<llvm::APSInt> ArgVal = E->getIntegerConstantExpr(S.Context);
3175+
if (!ArgVal) {
3176+
S.Diag(E->getExprLoc(), diag::err_attribute_argument_type)
3177+
<< CI << AANT_ArgumentIntegerConstant << E->getSourceRange();
3178+
return true;
3179+
}
3180+
3181+
SYCLIntelLoopFuseAttr TmpAttr(S.Context, CI, E);
3182+
ExprResult ICE;
3183+
3184+
return S.checkRangedIntegralArgument<SYCLIntelLoopFuseAttr>(E, &TmpAttr, ICE);
3185+
}
3186+
3187+
void Sema::addSYCLIntelLoopFuseAttr(Decl *D, const AttributeCommonInfo &CI,
3188+
Expr *E) {
3189+
assert(E && "argument has unexpected null value");
3190+
3191+
if (checkSYCLIntelLoopFuseArgument(*this, CI, E))
3192+
return;
3193+
3194+
// Attribute should not be added during host compilation.
3195+
if (getLangOpts().SYCLIsHost)
3196+
return;
3197+
3198+
SYCLIntelLoopFuseAttr *NewAttr = mergeSYCLIntelLoopFuseAttr(D, CI, E);
3199+
3200+
if (NewAttr)
3201+
D->addAttr(NewAttr);
3202+
}
3203+
3204+
// Handles [[intel::loop_fuse]] and [[intel::loop_fuse_independent]].
3205+
static void handleLoopFuseAttr(Sema &S, Decl *D, const ParsedAttr &Attr) {
3206+
// Default argument value is set to 1.
3207+
Expr *E = Attr.isArgExpr(0)
3208+
? Attr.getArgAsExpr(0)
3209+
: IntegerLiteral::Create(S.Context, llvm::APInt(32, 1),
3210+
S.Context.IntTy, Attr.getLoc());
3211+
3212+
S.addSYCLIntelLoopFuseAttr(D, Attr, E);
3213+
}
3214+
31313215
static void handleVecTypeHint(Sema &S, Decl *D, const ParsedAttr &AL) {
31323216
if (!AL.hasParsedType()) {
31333217
S.Diag(AL.getLoc(), diag::err_attribute_wrong_number_arguments) << AL << 1;
@@ -8499,6 +8583,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
84998583
case ParsedAttr::AT_SYCLIntelUseStallEnableClusters:
85008584
handleUseStallEnableClustersAttr(S, D, AL);
85018585
break;
8586+
case ParsedAttr::AT_SYCLIntelLoopFuse:
8587+
handleLoopFuseAttr(S, D, AL);
8588+
break;
85028589
case ParsedAttr::AT_VecTypeHint:
85038590
handleVecTypeHint(S, D, AL);
85048591
break;

clang/lib/Sema/SemaSYCL.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -570,6 +570,14 @@ class MarkDeviceFunction : public RecursiveASTVisitor<MarkDeviceFunction> {
570570
(KernelBody != FD) && !FD->hasAttr<SYCLSimdAttr>())
571571
FD->addAttr(SYCLSimdAttr::CreateImplicit(SemaRef.getASTContext()));
572572

573+
// Attribute "loop_fuse" can be applied explicitly on kernel function.
574+
// Attribute should not be propagated from device functions to kernel.
575+
if (auto *A = FD->getAttr<SYCLIntelLoopFuseAttr>()) {
576+
if (ParentFD == SYCLKernel) {
577+
Attrs.insert(A);
578+
}
579+
}
580+
573581
// TODO: vec_len_hint should be handled here
574582

575583
CallGraphNode *N = SYCLCG.getNode(FD);
@@ -3335,6 +3343,7 @@ void Sema::MarkDevice(void) {
33353343
case attr::Kind::SYCLIntelMaxGlobalWorkDim:
33363344
case attr::Kind::SYCLIntelNoGlobalWorkOffset:
33373345
case attr::Kind::SYCLIntelUseStallEnableClusters:
3346+
case attr::Kind::SYCLIntelLoopFuse:
33383347
case attr::Kind::SYCLSimd: {
33393348
if ((A->getKind() == attr::Kind::SYCLSimd) && KernelBody &&
33403349
!KernelBody->getAttr<SYCLSimdAttr>()) {

clang/lib/Sema/SemaTemplateInstantiateDecl.cpp

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -592,6 +592,16 @@ static void instantiateSYCLIntelPipeIOAttr(
592592
S.addSYCLIntelPipeIOAttr(New, *Attr, Result.getAs<Expr>());
593593
}
594594

595+
static void instantiateSYCLIntelLoopFuseAttr(
596+
Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
597+
const SYCLIntelLoopFuseAttr *Attr, Decl *New) {
598+
EnterExpressionEvaluationContext Unevaluated(
599+
S, Sema::ExpressionEvaluationContext::ConstantEvaluated);
600+
ExprResult Result = S.SubstExpr(Attr->getValue(), TemplateArgs);
601+
if (!Result.isInvalid())
602+
S.addSYCLIntelLoopFuseAttr(New, *Attr, Result.getAs<Expr>());
603+
}
604+
595605
template <typename AttrName>
596606
static void instantiateIntelSYCLFunctionAttr(
597607
Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
@@ -806,6 +816,12 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs,
806816
*this, TemplateArgs, SYCLIntelMaxGlobalWorkDim, New);
807817
continue;
808818
}
819+
if (const auto *SYCLIntelLoopFuse =
820+
dyn_cast<SYCLIntelLoopFuseAttr>(TmplAttr)) {
821+
instantiateSYCLIntelLoopFuseAttr(*this, TemplateArgs, SYCLIntelLoopFuse,
822+
New);
823+
continue;
824+
}
809825
if (const auto *SYCLIntelNoGlobalWorkOffset =
810826
dyn_cast<SYCLIntelNoGlobalWorkOffsetAttr>(TmplAttr)) {
811827
instantiateIntelSYCLFunctionAttr<SYCLIntelNoGlobalWorkOffsetAttr>(
@@ -6239,7 +6255,10 @@ static void processSYCLKernel(Sema &S, FunctionDecl *FD, MangleContext &MC) {
62396255
if (S.LangOpts.SYCLIsDevice) {
62406256
S.ConstructOpenCLKernel(FD, MC);
62416257
} else if (S.LangOpts.SYCLIsHost) {
6242-
CXXRecordDecl *CRD = (*FD->param_begin())->getType()->getAsCXXRecordDecl();
6258+
QualType KernelParamTy = (*FD->param_begin())->getType();
6259+
const CXXRecordDecl *CRD = (KernelParamTy->isReferenceType()
6260+
? KernelParamTy->getPointeeCXXRecordDecl()
6261+
: KernelParamTy->getAsCXXRecordDecl());
62436262
for (auto *Method : CRD->methods())
62446263
if (Method->getOverloadedOperator() == OO_Call &&
62456264
!Method->hasAttr<AlwaysInlineAttr>())
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
// RUN: %clang_cc1 -fsycl -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown-sycldevice -emit-llvm -o - %s | FileCheck %s
2+
3+
#include "sycl.hpp"
4+
5+
using namespace cl::sycl;
6+
queue q;
7+
8+
[[intel::loop_fuse(5)]] void foo() {}
9+
10+
template <int SIZE>
11+
class KernelFunctor5 {
12+
public:
13+
[[intel::loop_fuse(SIZE)]] void operator()() const {}
14+
};
15+
16+
void bar() {
17+
18+
q.submit([&](handler &h) {
19+
// Test template argument.
20+
KernelFunctor5<5> f5;
21+
h.single_task<class kernel_name_1>(f5);
22+
23+
// Test different argument sizes.
24+
// Emit 1 if there is no argument.
25+
h.single_task<class kernel_name_2>(
26+
[]() [[intel::loop_fuse]]{});
27+
h.single_task<class kernel_name_3>(
28+
[]() [[intel::loop_fuse(0)]]{});
29+
h.single_task<class kernel_name_4>(
30+
[]() [[intel::loop_fuse(1)]]{});
31+
h.single_task<class kernel_name_5>(
32+
[]() [[intel::loop_fuse(10)]]{});
33+
34+
// Test attribute is not propagated.
35+
h.single_task<class kernel_name_6>(
36+
[]() { foo(); });
37+
});
38+
}
39+
40+
// CHECK: define spir_kernel void @"{{.*}}kernel_name_1"() {{.*}} !loop_fuse ![[LF5:[0-9]+]]
41+
// CHECK: define spir_kernel void @"{{.*}}kernel_name_2"() {{.*}} !loop_fuse ![[LF1:[0-9]+]]
42+
// CHECK: define spir_kernel void @"{{.*}}kernel_name_3"() {{.*}} !loop_fuse ![[LF0:[0-9]+]]
43+
// CHECK: define spir_kernel void @"{{.*}}kernel_name_4"() {{.*}} !loop_fuse ![[LF1]]
44+
// CHECK: define spir_kernel void @"{{.*}}kernel_name_5"() {{.*}} !loop_fuse ![[LF10:[0-9]+]]
45+
// CHECK: define spir_kernel void @"{{.*}}kernel_name_6"()
46+
// CHECK-NOT: !loop_fuse
47+
// CHECK-SAME: {
48+
// CHECK: define spir_func void @{{.*}}foo{{.*}} !loop_fuse ![[LF5]]
49+
// CHECK: ![[LF5]] = !{i32 5, i32 0}
50+
// CHECK: ![[LF1]] = !{i32 1, i32 0}
51+
// CHECK: ![[LF0]] = !{i32 0, i32 0}
52+
// CHECK: ![[LF10]] = !{i32 10, i32 0}

0 commit comments

Comments
 (0)