Handle cases where Multiplier is not divisible by BytesPerElem in variable index calculation

Icohedron · Icohedron · commit 3734f57aa501 · 2025-07-01T02:45:01.000Z
diff --git a/llvm/lib/Target/DirectX/DXILFlattenArrays.cpp b/llvm/lib/Target/DirectX/DXILFlattenArrays.cpp
@@ -20,6 +20,7 @@
 #include "llvm/IR/InstVisitor.h"
 #include "llvm/IR/ReplaceConstant.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <cassert>
 #include <cstddef>
@@ -305,6 +306,8 @@ bool DXILFlattenArraysVisitor::visitGetElementPtrInst(GetElementPtrInst &GEP) {
     unsigned BytesPerElem = Info.RootFlattenedArrayType->getArrayElementType()
                                 ->getPrimitiveSizeInBits() /
                             8;
+    assert(isPowerOf2_32(BytesPerElem) &&
+           "Bytes per element should be a power of 2");
 
     // Compute the 32-bit index for this flattened GEP from the constant and
     // variable byte offsets in the GEPInfo
@@ -316,14 +319,23 @@ bool DXILFlattenArraysVisitor::visitGetElementPtrInst(GetElementPtrInst &GEP) {
            "Constant byte offset for flat GEP index must fit within 32 bits");
     Value *FlattenedIndex = Builder.getInt32(ConstantOffset);
     for (auto [VarIndex, Multiplier] : Info.VariableOffsets) {
-      uint64_t Mul = Multiplier.udiv(BytesPerElem).getZExtValue();
-      assert(Mul < UINT32_MAX &&
-             "Multiplier for flat GEP index must fit within 32 bits");
+      assert(Multiplier.getActiveBits() <= 32 &&
+             "The multiplier for a flat GEP index must fit within 32 bits");
       assert(VarIndex->getType()->isIntegerTy(32) &&
              "Expected i32-typed GEP indices");
-      Value *ConstIntMul = Builder.getInt32(Mul);
-      Value *MulVarIndex = Builder.CreateMul(VarIndex, ConstIntMul);
-      FlattenedIndex = Builder.CreateAdd(FlattenedIndex, MulVarIndex);
+      Value *VI;
+      if (Multiplier.getZExtValue() % BytesPerElem != 0) {
+        // This can happen, e.g., with i8 GEPs. To handle this we just divide
+        // by BytesPerElem using an instruction after multiplying VarIndex by
+        // Multiplier.
+        VI = Builder.CreateMul(VarIndex,
+                               Builder.getInt32(Multiplier.getZExtValue()));
+        VI = Builder.CreateLShr(VI, Builder.getInt32(Log2_32(BytesPerElem)));
+      } else
+        VI = Builder.CreateMul(
+            VarIndex,
+            Builder.getInt32(Multiplier.getZExtValue() / BytesPerElem));
+      FlattenedIndex = Builder.CreateAdd(FlattenedIndex, VI);
     }
 
     // Construct a new GEP for the flattened array to replace the current GEP
diff --git a/llvm/test/CodeGen/DirectX/flatten-array.ll b/llvm/test/CodeGen/DirectX/flatten-array.ll
@@ -273,5 +273,28 @@ define void @gep_scalar_flatten() {
   ret void
 }
 
+define void @gep_scalar_flatten_dynamic(i32 %index) {
+  ; CHECK-LABEL: gep_scalar_flatten_dynamic
+  ; CHECK-SAME: i32 [[INDEX:%.*]]) {
+  ; CHECK-NEXT:  [[ALLOCA:%.*]] = alloca [6 x i32], align 4
+  ; CHECK-NEXT:  [[I8INDEX:%.*]] = mul i32 [[INDEX]], 12
+  ; CHECK-NEXT:  [[MUL:%.*]] = mul i32 [[I8INDEX]], 1
+  ; CHECK-NEXT:  [[DIV:%.*]] = lshr i32 [[MUL]], 2
+  ; CHECK-NEXT:  [[ADD:%.*]] = add i32 0, [[DIV]]
+  ; CHECK-NEXT:  getelementptr inbounds nuw [6 x i32], ptr [[ALLOCA]], i32 0, i32 [[ADD]]
+  ; CHECK-NEXT:  [[I32INDEX:%.*]] = mul i32 [[INDEX]], 3
+  ; CHECK-NEXT:  [[MUL:%.*]] = mul i32 [[I32INDEX]], 1
+  ; CHECK-NEXT:  [[ADD:%.*]] = add i32 0, [[MUL]]
+  ; CHECK-NEXT:  getelementptr inbounds nuw [6 x i32], ptr [[ALLOCA]], i32 0, i32 [[ADD]]
+  ; CHECK-NEXT:  ret void
+  ;
+  %a = alloca [2 x [3 x i32]], align 4
+  %i8index = mul i32 %index, 12
+  %i8root = getelementptr inbounds nuw i8, [2 x [3 x i32]]* %a, i32 %i8index;
+  %i32index = mul i32 %index, 3
+  %i32root = getelementptr inbounds nuw i32, [2 x [3 x i32]]* %a, i32 %i32index;
+  ret void
+}
+
 ; Make sure we don't try to walk the body of a function declaration.
 declare void @opaque_function()