@@ -42,6 +42,15 @@ class DXILIntrinsicExpansionLegacy : public ModulePass {
42
42
static char ID; // Pass identification.
43
43
};
44
44
45
+ static bool resourceAccessNeeds64BitExpansion (Module *M, Type *OverloadTy,
46
+ bool IsRaw) {
47
+ if (IsRaw && M->getTargetTriple ().getDXILVersion () > VersionTuple (1 , 2 ))
48
+ return false ;
49
+
50
+ Type *ScalarTy = OverloadTy->getScalarType ();
51
+ return ScalarTy->isDoubleTy () || ScalarTy->isIntegerTy (64 );
52
+ }
53
+
45
54
static bool isIntrinsicExpansion (Function &F) {
46
55
switch (F.getIntrinsicID ()) {
47
56
case Intrinsic::abs:
@@ -71,17 +80,20 @@ static bool isIntrinsicExpansion(Function &F) {
71
80
case Intrinsic::vector_reduce_add:
72
81
case Intrinsic::vector_reduce_fadd:
73
82
return true ;
74
- case Intrinsic::dx_resource_load_typedbuffer: {
75
- // We need to handle i64, doubles, and vectors of them.
76
- Type *ScalarTy =
77
- F.getReturnType ()->getStructElementType (0 )->getScalarType ();
78
- return ScalarTy->isDoubleTy () || ScalarTy->isIntegerTy (64 );
79
- }
80
- case Intrinsic::dx_resource_store_typedbuffer: {
81
- // We need to handle i64 and doubles and vectors of i64 and doubles.
82
- Type *ScalarTy = F.getFunctionType ()->getParamType (2 )->getScalarType ();
83
- return ScalarTy->isDoubleTy () || ScalarTy->isIntegerTy (64 );
84
- }
83
+ case Intrinsic::dx_resource_load_rawbuffer:
84
+ return resourceAccessNeeds64BitExpansion (
85
+ F.getParent (), F.getReturnType ()->getStructElementType (0 ),
86
+ /* IsRaw*/ true );
87
+ case Intrinsic::dx_resource_load_typedbuffer:
88
+ return resourceAccessNeeds64BitExpansion (
89
+ F.getParent (), F.getReturnType ()->getStructElementType (0 ),
90
+ /* IsRaw*/ false );
91
+ case Intrinsic::dx_resource_store_rawbuffer:
92
+ return resourceAccessNeeds64BitExpansion (
93
+ F.getParent (), F.getFunctionType ()->getParamType (3 ), /* IsRaw*/ true );
94
+ case Intrinsic::dx_resource_store_typedbuffer:
95
+ return resourceAccessNeeds64BitExpansion (
96
+ F.getParent (), F.getFunctionType ()->getParamType (2 ), /* IsRaw*/ false );
85
97
}
86
98
return false ;
87
99
}
@@ -544,63 +556,82 @@ static Value *expandRadiansIntrinsic(CallInst *Orig) {
544
556
return Builder.CreateFMul (X, PiOver180);
545
557
}
546
558
547
- static bool expandTypedBufferLoadIntrinsic (CallInst *Orig) {
559
+ static bool expandBufferLoadIntrinsic (CallInst *Orig, bool IsRaw ) {
548
560
IRBuilder<> Builder (Orig);
549
561
550
562
Type *BufferTy = Orig->getType ()->getStructElementType (0 );
551
563
Type *ScalarTy = BufferTy->getScalarType ();
552
564
bool IsDouble = ScalarTy->isDoubleTy ();
553
565
assert (IsDouble || ScalarTy->isIntegerTy (64 ) &&
554
566
" Only expand double or int64 scalars or vectors" );
555
-
567
+ bool IsVector = false ;
556
568
unsigned ExtractNum = 2 ;
557
569
if (auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {
558
- assert ( VT->getNumElements () == 2 &&
559
- " TypedBufferLoad vector must be size 2 " ) ;
560
- ExtractNum = 4 ;
570
+ ExtractNum = 2 * VT->getNumElements ();
571
+ IsVector = true ;
572
+ assert (IsRaw || ExtractNum == 4 && " TypedBufferLoad vector must be size 2 " ) ;
561
573
}
562
574
563
- Type *Ty = VectorType::get (Builder.getInt32Ty (), ExtractNum, false );
564
-
565
- Type *LoadType = StructType::get (Ty, Builder.getInt1Ty ());
566
- CallInst *Load =
567
- Builder.CreateIntrinsic (LoadType, Intrinsic::dx_resource_load_typedbuffer,
568
- {Orig->getOperand (0 ), Orig->getOperand (1 )});
569
-
570
- // extract the buffer load's result
571
- Value *Extract = Builder.CreateExtractValue (Load, {0 });
572
-
573
- SmallVector<Value *> ExtractElements;
574
- for (unsigned I = 0 ; I < ExtractNum; ++I)
575
- ExtractElements.push_back (
576
- Builder.CreateExtractElement (Extract, Builder.getInt32 (I)));
577
-
578
- // combine into double(s) or int64(s)
575
+ SmallVector<Value *, 2 > Loads;
579
576
Value *Result = PoisonValue::get (BufferTy);
580
- for (unsigned I = 0 ; I < ExtractNum; I += 2 ) {
581
- Value *Combined = nullptr ;
582
- if (IsDouble)
583
- // For doubles, use dx_asdouble intrinsic
584
- Combined =
585
- Builder.CreateIntrinsic (Builder.getDoubleTy (), Intrinsic::dx_asdouble,
586
- {ExtractElements[I], ExtractElements[I + 1 ]});
587
- else {
588
- // For int64, manually combine two int32s
589
- // First, zero-extend both values to i64
590
- Value *Lo = Builder.CreateZExt (ExtractElements[I], Builder.getInt64Ty ());
591
- Value *Hi =
592
- Builder.CreateZExt (ExtractElements[I + 1 ], Builder.getInt64Ty ());
593
- // Shift the high bits left by 32 bits
594
- Value *ShiftedHi = Builder.CreateShl (Hi, Builder.getInt64 (32 ));
595
- // OR the high and low bits together
596
- Combined = Builder.CreateOr (Lo, ShiftedHi);
577
+ unsigned Base = 0 ;
578
+ // If we need to extract more than 4 i32; we need to break it up into
579
+ // more than one load. LoadNum tells us how many i32s we are loading in
580
+ // each load
581
+ while (ExtractNum > 0 ) {
582
+ unsigned LoadNum = std::min (ExtractNum, 4u );
583
+ Type *Ty = VectorType::get (Builder.getInt32Ty (), LoadNum, false );
584
+
585
+ Type *LoadType = StructType::get (Ty, Builder.getInt1Ty ());
586
+ Intrinsic::ID LoadIntrinsic = Intrinsic::dx_resource_load_typedbuffer;
587
+ SmallVector<Value *, 3 > Args = {Orig->getOperand (0 ), Orig->getOperand (1 )};
588
+ if (IsRaw) {
589
+ LoadIntrinsic = Intrinsic::dx_resource_load_rawbuffer;
590
+ Value *Tmp = Builder.getInt32 (4 * Base * 2 );
591
+ Args.push_back (Builder.CreateAdd (Orig->getOperand (2 ), Tmp));
597
592
}
598
593
599
- if (ExtractNum == 4 )
600
- Result = Builder.CreateInsertElement (Result, Combined,
601
- Builder.getInt32 (I / 2 ));
602
- else
603
- Result = Combined;
594
+ CallInst *Load = Builder.CreateIntrinsic (LoadType, LoadIntrinsic, Args);
595
+ Loads.push_back (Load);
596
+
597
+ // extract the buffer load's result
598
+ Value *Extract = Builder.CreateExtractValue (Load, {0 });
599
+
600
+ SmallVector<Value *> ExtractElements;
601
+ for (unsigned I = 0 ; I < LoadNum; ++I)
602
+ ExtractElements.push_back (
603
+ Builder.CreateExtractElement (Extract, Builder.getInt32 (I)));
604
+
605
+ // combine into double(s) or int64(s)
606
+ for (unsigned I = 0 ; I < LoadNum; I += 2 ) {
607
+ Value *Combined = nullptr ;
608
+ if (IsDouble)
609
+ // For doubles, use dx_asdouble intrinsic
610
+ Combined = Builder.CreateIntrinsic (
611
+ Builder.getDoubleTy (), Intrinsic::dx_asdouble,
612
+ {ExtractElements[I], ExtractElements[I + 1 ]});
613
+ else {
614
+ // For int64, manually combine two int32s
615
+ // First, zero-extend both values to i64
616
+ Value *Lo =
617
+ Builder.CreateZExt (ExtractElements[I], Builder.getInt64Ty ());
618
+ Value *Hi =
619
+ Builder.CreateZExt (ExtractElements[I + 1 ], Builder.getInt64Ty ());
620
+ // Shift the high bits left by 32 bits
621
+ Value *ShiftedHi = Builder.CreateShl (Hi, Builder.getInt64 (32 ));
622
+ // OR the high and low bits together
623
+ Combined = Builder.CreateOr (Lo, ShiftedHi);
624
+ }
625
+
626
+ if (IsVector)
627
+ Result = Builder.CreateInsertElement (Result, Combined,
628
+ Builder.getInt32 ((I / 2 ) + Base));
629
+ else
630
+ Result = Combined;
631
+ }
632
+
633
+ ExtractNum -= LoadNum;
634
+ Base += LoadNum / 2 ;
604
635
}
605
636
606
637
Value *CheckBit = nullptr ;
@@ -620,8 +651,14 @@ static bool expandTypedBufferLoadIntrinsic(CallInst *Orig) {
620
651
} else {
621
652
// Use of the check bit
622
653
assert (Indices[0 ] == 1 && " Unexpected type for typedbufferload" );
623
- if (!CheckBit)
624
- CheckBit = Builder.CreateExtractValue (Load, {1 });
654
+ // Note: This does not always match the historical behaviour of DXC.
655
+ // See https://github.com/microsoft/DirectXShaderCompiler/issues/7622
656
+ if (!CheckBit) {
657
+ SmallVector<Value *, 2 > CheckBits;
658
+ for (Value *L : Loads)
659
+ CheckBits.push_back (Builder.CreateExtractValue (L, {1 }));
660
+ CheckBit = Builder.CreateAnd (CheckBits);
661
+ }
625
662
EVI->replaceAllUsesWith (CheckBit);
626
663
}
627
664
EVI->eraseFromParent ();
@@ -630,46 +667,52 @@ static bool expandTypedBufferLoadIntrinsic(CallInst *Orig) {
630
667
return true ;
631
668
}
632
669
633
- static bool expandTypedBufferStoreIntrinsic (CallInst *Orig) {
670
+ static bool expandBufferStoreIntrinsic (CallInst *Orig, bool IsRaw ) {
634
671
IRBuilder<> Builder (Orig);
635
672
636
- Type *BufferTy = Orig->getFunctionType ()->getParamType (2 );
673
+ unsigned ValIndex = IsRaw ? 3 : 2 ;
674
+ Type *BufferTy = Orig->getFunctionType ()->getParamType (ValIndex);
637
675
Type *ScalarTy = BufferTy->getScalarType ();
638
676
bool IsDouble = ScalarTy->isDoubleTy ();
639
677
assert ((IsDouble || ScalarTy->isIntegerTy (64 )) &&
640
678
" Only expand double or int64 scalars or vectors" );
641
679
642
680
// Determine if we're dealing with a vector or scalar
643
- bool IsVector = isa<FixedVectorType>(BufferTy);
644
- if (IsVector) {
645
- assert (cast<FixedVectorType>(BufferTy)->getNumElements () == 2 &&
646
- " TypedBufferStore vector must be size 2" );
681
+ bool IsVector = false ;
682
+ unsigned ExtractNum = 2 ;
683
+ unsigned VecLen = 0 ;
684
+ if (auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {
685
+ VecLen = VT->getNumElements ();
686
+ assert (IsRaw || VecLen == 2 && " TypedBufferStore vector must be size 2" );
687
+ ExtractNum = VecLen * 2 ;
688
+ IsVector = true ;
647
689
}
648
690
649
691
// Create the appropriate vector type for the result
650
692
Type *Int32Ty = Builder.getInt32Ty ();
651
- Type *ResultTy = VectorType::get (Int32Ty, IsVector ? 4 : 2 , false );
693
+ Type *ResultTy = VectorType::get (Int32Ty, ExtractNum , false );
652
694
Value *Val = PoisonValue::get (ResultTy);
653
695
654
696
Type *SplitElementTy = Int32Ty;
655
697
if (IsVector)
656
- SplitElementTy = VectorType::get (SplitElementTy, 2 , false );
698
+ SplitElementTy = VectorType::get (SplitElementTy, VecLen , false );
657
699
658
700
Value *LowBits = nullptr ;
659
701
Value *HighBits = nullptr ;
660
702
// Split the 64-bit values into 32-bit components
661
703
if (IsDouble) {
662
704
auto *SplitTy = llvm::StructType::get (SplitElementTy, SplitElementTy);
663
705
Value *Split = Builder.CreateIntrinsic (SplitTy, Intrinsic::dx_splitdouble,
664
- {Orig->getOperand (2 )});
706
+ {Orig->getOperand (ValIndex )});
665
707
LowBits = Builder.CreateExtractValue (Split, 0 );
666
708
HighBits = Builder.CreateExtractValue (Split, 1 );
667
709
} else {
668
710
// Handle int64 type(s)
669
- Value *InputVal = Orig->getOperand (2 );
711
+ Value *InputVal = Orig->getOperand (ValIndex );
670
712
Constant *ShiftAmt = Builder.getInt64 (32 );
671
713
if (IsVector)
672
- ShiftAmt = ConstantVector::getSplat (ElementCount::getFixed (2 ), ShiftAmt);
714
+ ShiftAmt =
715
+ ConstantVector::getSplat (ElementCount::getFixed (VecLen), ShiftAmt);
673
716
674
717
// Split into low and high 32-bit parts
675
718
LowBits = Builder.CreateTrunc (InputVal, SplitElementTy);
@@ -678,17 +721,48 @@ static bool expandTypedBufferStoreIntrinsic(CallInst *Orig) {
678
721
}
679
722
680
723
if (IsVector) {
681
- Val = Builder.CreateShuffleVector (LowBits, HighBits, {0 , 2 , 1 , 3 });
724
+ SmallVector<int , 8 > Mask;
725
+ for (unsigned I = 0 ; I < VecLen; ++I) {
726
+ Mask.push_back (I);
727
+ Mask.push_back (I + VecLen);
728
+ }
729
+ Val = Builder.CreateShuffleVector (LowBits, HighBits, Mask);
682
730
} else {
683
731
Val = Builder.CreateInsertElement (Val, LowBits, Builder.getInt32 (0 ));
684
732
Val = Builder.CreateInsertElement (Val, HighBits, Builder.getInt32 (1 ));
685
733
}
686
734
687
- // Create the final intrinsic call
688
- Builder.CreateIntrinsic (Builder.getVoidTy (),
689
- Intrinsic::dx_resource_store_typedbuffer,
690
- {Orig->getOperand (0 ), Orig->getOperand (1 ), Val});
735
+ // If we need to extract more than 4 i32; we need to break it up into
736
+ // more than one store. StoreNum tells us how many i32s we are storing in
737
+ // each store
738
+ unsigned Base = 0 ;
739
+ while (ExtractNum > 0 ) {
740
+ unsigned StoreNum = std::min (ExtractNum, 4u );
741
+
742
+ Intrinsic::ID StoreIntrinsic = Intrinsic::dx_resource_store_typedbuffer;
743
+ SmallVector<Value *, 4 > Args = {Orig->getOperand (0 ), Orig->getOperand (1 )};
744
+ if (IsRaw) {
745
+ StoreIntrinsic = Intrinsic::dx_resource_store_rawbuffer;
746
+ Value *Tmp = Builder.getInt32 (4 * Base);
747
+ Args.push_back (Builder.CreateAdd (Orig->getOperand (2 ), Tmp));
748
+ }
749
+
750
+ SmallVector<int , 4 > Mask;
751
+ for (unsigned I = 0 ; I < StoreNum; ++I) {
752
+ Mask.push_back (Base + I);
753
+ }
754
+
755
+ Value *SubVal = Val;
756
+ if (VecLen > 2 )
757
+ SubVal = Builder.CreateShuffleVector (Val, Mask);
758
+
759
+ Args.push_back (SubVal);
760
+ // Create the final intrinsic call
761
+ Builder.CreateIntrinsic (Builder.getVoidTy (), StoreIntrinsic, Args);
691
762
763
+ ExtractNum -= StoreNum;
764
+ Base += StoreNum;
765
+ }
692
766
Orig->eraseFromParent ();
693
767
return true ;
694
768
}
@@ -821,12 +895,20 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
821
895
case Intrinsic::dx_radians:
822
896
Result = expandRadiansIntrinsic (Orig);
823
897
break ;
898
+ case Intrinsic::dx_resource_load_rawbuffer:
899
+ if (expandBufferLoadIntrinsic (Orig, /* IsRaw*/ true ))
900
+ return true ;
901
+ break ;
902
+ case Intrinsic::dx_resource_store_rawbuffer:
903
+ if (expandBufferStoreIntrinsic (Orig, /* IsRaw*/ true ))
904
+ return true ;
905
+ break ;
824
906
case Intrinsic::dx_resource_load_typedbuffer:
825
- if (expandTypedBufferLoadIntrinsic (Orig))
907
+ if (expandBufferLoadIntrinsic (Orig, /* IsRaw */ false ))
826
908
return true ;
827
909
break ;
828
910
case Intrinsic::dx_resource_store_typedbuffer:
829
- if (expandTypedBufferStoreIntrinsic (Orig))
911
+ if (expandBufferStoreIntrinsic (Orig, /* IsRaw */ false ))
830
912
return true ;
831
913
break ;
832
914
case Intrinsic::usub_sat:
0 commit comments