Skip to content

Commit 356a552

Browse files
spallgithub-actions[bot]
authored andcommitted
Automerge: [DirectX] Add support for Raw Buffer Loads and Stores for scalars and vectors of doubles and i64s in SM6.2 and earlier (#146627)
For SM6.2 and earlier, Raw buffer Loads and Stores can't handle 64 bit types. This PR expands Raw Buffer Loads and Stores for 64 bit types double and int64_t. This Adds to the work done in #139996 and #145047 . Raw Buffer Loads and Stores allow for 64 bit type vectors of size 3 and 4, and the code is modified to allow for that. Closes #144747
2 parents 7a0480c + 5e87a71 commit 356a552

File tree

7 files changed

+855
-118
lines changed

7 files changed

+855
-118
lines changed

llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp

Lines changed: 156 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,15 @@ class DXILIntrinsicExpansionLegacy : public ModulePass {
4242
static char ID; // Pass identification.
4343
};
4444

45+
static bool resourceAccessNeeds64BitExpansion(Module *M, Type *OverloadTy,
46+
bool IsRaw) {
47+
if (IsRaw && M->getTargetTriple().getDXILVersion() > VersionTuple(1, 2))
48+
return false;
49+
50+
Type *ScalarTy = OverloadTy->getScalarType();
51+
return ScalarTy->isDoubleTy() || ScalarTy->isIntegerTy(64);
52+
}
53+
4554
static bool isIntrinsicExpansion(Function &F) {
4655
switch (F.getIntrinsicID()) {
4756
case Intrinsic::abs:
@@ -71,17 +80,20 @@ static bool isIntrinsicExpansion(Function &F) {
7180
case Intrinsic::vector_reduce_add:
7281
case Intrinsic::vector_reduce_fadd:
7382
return true;
74-
case Intrinsic::dx_resource_load_typedbuffer: {
75-
// We need to handle i64, doubles, and vectors of them.
76-
Type *ScalarTy =
77-
F.getReturnType()->getStructElementType(0)->getScalarType();
78-
return ScalarTy->isDoubleTy() || ScalarTy->isIntegerTy(64);
79-
}
80-
case Intrinsic::dx_resource_store_typedbuffer: {
81-
// We need to handle i64 and doubles and vectors of i64 and doubles.
82-
Type *ScalarTy = F.getFunctionType()->getParamType(2)->getScalarType();
83-
return ScalarTy->isDoubleTy() || ScalarTy->isIntegerTy(64);
84-
}
83+
case Intrinsic::dx_resource_load_rawbuffer:
84+
return resourceAccessNeeds64BitExpansion(
85+
F.getParent(), F.getReturnType()->getStructElementType(0),
86+
/*IsRaw*/ true);
87+
case Intrinsic::dx_resource_load_typedbuffer:
88+
return resourceAccessNeeds64BitExpansion(
89+
F.getParent(), F.getReturnType()->getStructElementType(0),
90+
/*IsRaw*/ false);
91+
case Intrinsic::dx_resource_store_rawbuffer:
92+
return resourceAccessNeeds64BitExpansion(
93+
F.getParent(), F.getFunctionType()->getParamType(3), /*IsRaw*/ true);
94+
case Intrinsic::dx_resource_store_typedbuffer:
95+
return resourceAccessNeeds64BitExpansion(
96+
F.getParent(), F.getFunctionType()->getParamType(2), /*IsRaw*/ false);
8597
}
8698
return false;
8799
}
@@ -544,63 +556,82 @@ static Value *expandRadiansIntrinsic(CallInst *Orig) {
544556
return Builder.CreateFMul(X, PiOver180);
545557
}
546558

547-
static bool expandTypedBufferLoadIntrinsic(CallInst *Orig) {
559+
static bool expandBufferLoadIntrinsic(CallInst *Orig, bool IsRaw) {
548560
IRBuilder<> Builder(Orig);
549561

550562
Type *BufferTy = Orig->getType()->getStructElementType(0);
551563
Type *ScalarTy = BufferTy->getScalarType();
552564
bool IsDouble = ScalarTy->isDoubleTy();
553565
assert(IsDouble || ScalarTy->isIntegerTy(64) &&
554566
"Only expand double or int64 scalars or vectors");
555-
567+
bool IsVector = false;
556568
unsigned ExtractNum = 2;
557569
if (auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {
558-
assert(VT->getNumElements() == 2 &&
559-
"TypedBufferLoad vector must be size 2");
560-
ExtractNum = 4;
570+
ExtractNum = 2 * VT->getNumElements();
571+
IsVector = true;
572+
assert(IsRaw || ExtractNum == 4 && "TypedBufferLoad vector must be size 2");
561573
}
562574

563-
Type *Ty = VectorType::get(Builder.getInt32Ty(), ExtractNum, false);
564-
565-
Type *LoadType = StructType::get(Ty, Builder.getInt1Ty());
566-
CallInst *Load =
567-
Builder.CreateIntrinsic(LoadType, Intrinsic::dx_resource_load_typedbuffer,
568-
{Orig->getOperand(0), Orig->getOperand(1)});
569-
570-
// extract the buffer load's result
571-
Value *Extract = Builder.CreateExtractValue(Load, {0});
572-
573-
SmallVector<Value *> ExtractElements;
574-
for (unsigned I = 0; I < ExtractNum; ++I)
575-
ExtractElements.push_back(
576-
Builder.CreateExtractElement(Extract, Builder.getInt32(I)));
577-
578-
// combine into double(s) or int64(s)
575+
SmallVector<Value *, 2> Loads;
579576
Value *Result = PoisonValue::get(BufferTy);
580-
for (unsigned I = 0; I < ExtractNum; I += 2) {
581-
Value *Combined = nullptr;
582-
if (IsDouble)
583-
// For doubles, use dx_asdouble intrinsic
584-
Combined =
585-
Builder.CreateIntrinsic(Builder.getDoubleTy(), Intrinsic::dx_asdouble,
586-
{ExtractElements[I], ExtractElements[I + 1]});
587-
else {
588-
// For int64, manually combine two int32s
589-
// First, zero-extend both values to i64
590-
Value *Lo = Builder.CreateZExt(ExtractElements[I], Builder.getInt64Ty());
591-
Value *Hi =
592-
Builder.CreateZExt(ExtractElements[I + 1], Builder.getInt64Ty());
593-
// Shift the high bits left by 32 bits
594-
Value *ShiftedHi = Builder.CreateShl(Hi, Builder.getInt64(32));
595-
// OR the high and low bits together
596-
Combined = Builder.CreateOr(Lo, ShiftedHi);
577+
unsigned Base = 0;
578+
// If we need to extract more than 4 i32; we need to break it up into
579+
// more than one load. LoadNum tells us how many i32s we are loading in
580+
// each load
581+
while (ExtractNum > 0) {
582+
unsigned LoadNum = std::min(ExtractNum, 4u);
583+
Type *Ty = VectorType::get(Builder.getInt32Ty(), LoadNum, false);
584+
585+
Type *LoadType = StructType::get(Ty, Builder.getInt1Ty());
586+
Intrinsic::ID LoadIntrinsic = Intrinsic::dx_resource_load_typedbuffer;
587+
SmallVector<Value *, 3> Args = {Orig->getOperand(0), Orig->getOperand(1)};
588+
if (IsRaw) {
589+
LoadIntrinsic = Intrinsic::dx_resource_load_rawbuffer;
590+
Value *Tmp = Builder.getInt32(4 * Base * 2);
591+
Args.push_back(Builder.CreateAdd(Orig->getOperand(2), Tmp));
597592
}
598593

599-
if (ExtractNum == 4)
600-
Result = Builder.CreateInsertElement(Result, Combined,
601-
Builder.getInt32(I / 2));
602-
else
603-
Result = Combined;
594+
CallInst *Load = Builder.CreateIntrinsic(LoadType, LoadIntrinsic, Args);
595+
Loads.push_back(Load);
596+
597+
// extract the buffer load's result
598+
Value *Extract = Builder.CreateExtractValue(Load, {0});
599+
600+
SmallVector<Value *> ExtractElements;
601+
for (unsigned I = 0; I < LoadNum; ++I)
602+
ExtractElements.push_back(
603+
Builder.CreateExtractElement(Extract, Builder.getInt32(I)));
604+
605+
// combine into double(s) or int64(s)
606+
for (unsigned I = 0; I < LoadNum; I += 2) {
607+
Value *Combined = nullptr;
608+
if (IsDouble)
609+
// For doubles, use dx_asdouble intrinsic
610+
Combined = Builder.CreateIntrinsic(
611+
Builder.getDoubleTy(), Intrinsic::dx_asdouble,
612+
{ExtractElements[I], ExtractElements[I + 1]});
613+
else {
614+
// For int64, manually combine two int32s
615+
// First, zero-extend both values to i64
616+
Value *Lo =
617+
Builder.CreateZExt(ExtractElements[I], Builder.getInt64Ty());
618+
Value *Hi =
619+
Builder.CreateZExt(ExtractElements[I + 1], Builder.getInt64Ty());
620+
// Shift the high bits left by 32 bits
621+
Value *ShiftedHi = Builder.CreateShl(Hi, Builder.getInt64(32));
622+
// OR the high and low bits together
623+
Combined = Builder.CreateOr(Lo, ShiftedHi);
624+
}
625+
626+
if (IsVector)
627+
Result = Builder.CreateInsertElement(Result, Combined,
628+
Builder.getInt32((I / 2) + Base));
629+
else
630+
Result = Combined;
631+
}
632+
633+
ExtractNum -= LoadNum;
634+
Base += LoadNum / 2;
604635
}
605636

606637
Value *CheckBit = nullptr;
@@ -620,8 +651,14 @@ static bool expandTypedBufferLoadIntrinsic(CallInst *Orig) {
620651
} else {
621652
// Use of the check bit
622653
assert(Indices[0] == 1 && "Unexpected type for typedbufferload");
623-
if (!CheckBit)
624-
CheckBit = Builder.CreateExtractValue(Load, {1});
654+
// Note: This does not always match the historical behaviour of DXC.
655+
// See https://github.com/microsoft/DirectXShaderCompiler/issues/7622
656+
if (!CheckBit) {
657+
SmallVector<Value *, 2> CheckBits;
658+
for (Value *L : Loads)
659+
CheckBits.push_back(Builder.CreateExtractValue(L, {1}));
660+
CheckBit = Builder.CreateAnd(CheckBits);
661+
}
625662
EVI->replaceAllUsesWith(CheckBit);
626663
}
627664
EVI->eraseFromParent();
@@ -630,46 +667,52 @@ static bool expandTypedBufferLoadIntrinsic(CallInst *Orig) {
630667
return true;
631668
}
632669

633-
static bool expandTypedBufferStoreIntrinsic(CallInst *Orig) {
670+
static bool expandBufferStoreIntrinsic(CallInst *Orig, bool IsRaw) {
634671
IRBuilder<> Builder(Orig);
635672

636-
Type *BufferTy = Orig->getFunctionType()->getParamType(2);
673+
unsigned ValIndex = IsRaw ? 3 : 2;
674+
Type *BufferTy = Orig->getFunctionType()->getParamType(ValIndex);
637675
Type *ScalarTy = BufferTy->getScalarType();
638676
bool IsDouble = ScalarTy->isDoubleTy();
639677
assert((IsDouble || ScalarTy->isIntegerTy(64)) &&
640678
"Only expand double or int64 scalars or vectors");
641679

642680
// Determine if we're dealing with a vector or scalar
643-
bool IsVector = isa<FixedVectorType>(BufferTy);
644-
if (IsVector) {
645-
assert(cast<FixedVectorType>(BufferTy)->getNumElements() == 2 &&
646-
"TypedBufferStore vector must be size 2");
681+
bool IsVector = false;
682+
unsigned ExtractNum = 2;
683+
unsigned VecLen = 0;
684+
if (auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {
685+
VecLen = VT->getNumElements();
686+
assert(IsRaw || VecLen == 2 && "TypedBufferStore vector must be size 2");
687+
ExtractNum = VecLen * 2;
688+
IsVector = true;
647689
}
648690

649691
// Create the appropriate vector type for the result
650692
Type *Int32Ty = Builder.getInt32Ty();
651-
Type *ResultTy = VectorType::get(Int32Ty, IsVector ? 4 : 2, false);
693+
Type *ResultTy = VectorType::get(Int32Ty, ExtractNum, false);
652694
Value *Val = PoisonValue::get(ResultTy);
653695

654696
Type *SplitElementTy = Int32Ty;
655697
if (IsVector)
656-
SplitElementTy = VectorType::get(SplitElementTy, 2, false);
698+
SplitElementTy = VectorType::get(SplitElementTy, VecLen, false);
657699

658700
Value *LowBits = nullptr;
659701
Value *HighBits = nullptr;
660702
// Split the 64-bit values into 32-bit components
661703
if (IsDouble) {
662704
auto *SplitTy = llvm::StructType::get(SplitElementTy, SplitElementTy);
663705
Value *Split = Builder.CreateIntrinsic(SplitTy, Intrinsic::dx_splitdouble,
664-
{Orig->getOperand(2)});
706+
{Orig->getOperand(ValIndex)});
665707
LowBits = Builder.CreateExtractValue(Split, 0);
666708
HighBits = Builder.CreateExtractValue(Split, 1);
667709
} else {
668710
// Handle int64 type(s)
669-
Value *InputVal = Orig->getOperand(2);
711+
Value *InputVal = Orig->getOperand(ValIndex);
670712
Constant *ShiftAmt = Builder.getInt64(32);
671713
if (IsVector)
672-
ShiftAmt = ConstantVector::getSplat(ElementCount::getFixed(2), ShiftAmt);
714+
ShiftAmt =
715+
ConstantVector::getSplat(ElementCount::getFixed(VecLen), ShiftAmt);
673716

674717
// Split into low and high 32-bit parts
675718
LowBits = Builder.CreateTrunc(InputVal, SplitElementTy);
@@ -678,17 +721,48 @@ static bool expandTypedBufferStoreIntrinsic(CallInst *Orig) {
678721
}
679722

680723
if (IsVector) {
681-
Val = Builder.CreateShuffleVector(LowBits, HighBits, {0, 2, 1, 3});
724+
SmallVector<int, 8> Mask;
725+
for (unsigned I = 0; I < VecLen; ++I) {
726+
Mask.push_back(I);
727+
Mask.push_back(I + VecLen);
728+
}
729+
Val = Builder.CreateShuffleVector(LowBits, HighBits, Mask);
682730
} else {
683731
Val = Builder.CreateInsertElement(Val, LowBits, Builder.getInt32(0));
684732
Val = Builder.CreateInsertElement(Val, HighBits, Builder.getInt32(1));
685733
}
686734

687-
// Create the final intrinsic call
688-
Builder.CreateIntrinsic(Builder.getVoidTy(),
689-
Intrinsic::dx_resource_store_typedbuffer,
690-
{Orig->getOperand(0), Orig->getOperand(1), Val});
735+
// If we need to extract more than 4 i32; we need to break it up into
736+
// more than one store. StoreNum tells us how many i32s we are storing in
737+
// each store
738+
unsigned Base = 0;
739+
while (ExtractNum > 0) {
740+
unsigned StoreNum = std::min(ExtractNum, 4u);
741+
742+
Intrinsic::ID StoreIntrinsic = Intrinsic::dx_resource_store_typedbuffer;
743+
SmallVector<Value *, 4> Args = {Orig->getOperand(0), Orig->getOperand(1)};
744+
if (IsRaw) {
745+
StoreIntrinsic = Intrinsic::dx_resource_store_rawbuffer;
746+
Value *Tmp = Builder.getInt32(4 * Base);
747+
Args.push_back(Builder.CreateAdd(Orig->getOperand(2), Tmp));
748+
}
749+
750+
SmallVector<int, 4> Mask;
751+
for (unsigned I = 0; I < StoreNum; ++I) {
752+
Mask.push_back(Base + I);
753+
}
754+
755+
Value *SubVal = Val;
756+
if (VecLen > 2)
757+
SubVal = Builder.CreateShuffleVector(Val, Mask);
758+
759+
Args.push_back(SubVal);
760+
// Create the final intrinsic call
761+
Builder.CreateIntrinsic(Builder.getVoidTy(), StoreIntrinsic, Args);
691762

763+
ExtractNum -= StoreNum;
764+
Base += StoreNum;
765+
}
692766
Orig->eraseFromParent();
693767
return true;
694768
}
@@ -821,12 +895,20 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
821895
case Intrinsic::dx_radians:
822896
Result = expandRadiansIntrinsic(Orig);
823897
break;
898+
case Intrinsic::dx_resource_load_rawbuffer:
899+
if (expandBufferLoadIntrinsic(Orig, /*IsRaw*/ true))
900+
return true;
901+
break;
902+
case Intrinsic::dx_resource_store_rawbuffer:
903+
if (expandBufferStoreIntrinsic(Orig, /*IsRaw*/ true))
904+
return true;
905+
break;
824906
case Intrinsic::dx_resource_load_typedbuffer:
825-
if (expandTypedBufferLoadIntrinsic(Orig))
907+
if (expandBufferLoadIntrinsic(Orig, /*IsRaw*/ false))
826908
return true;
827909
break;
828910
case Intrinsic::dx_resource_store_typedbuffer:
829-
if (expandTypedBufferStoreIntrinsic(Orig))
911+
if (expandBufferStoreIntrinsic(Orig, /*IsRaw*/ false))
830912
return true;
831913
break;
832914
case Intrinsic::usub_sat:

llvm/test/CodeGen/DirectX/RawBufferLoad-error64.ll

Lines changed: 0 additions & 24 deletions
This file was deleted.

0 commit comments

Comments
 (0)