Skip to content

Commit fa9e1a1

Browse files
authored
[AArch64] Expand llvm.histogram intrinsic to support umax, umin, and uadd.sat operations (#138447)
This patch extends the llvm.histogram intrinsic to support additional update operations beyond the existing add. Specifically, the new supported operations are: * umax: unsigned maximum * umin: unsigned minimum * uadd.sat: unsigned saturated addition Based on the discussion from: https://discourse.llvm.org/t/rfc-expanding-the-experimental-histogram-intrinsic/84673
1 parent 76e14de commit fa9e1a1

File tree

4 files changed

+407
-4
lines changed

4 files changed

+407
-4
lines changed

llvm/docs/LangRef.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20545,6 +20545,9 @@ More update operation types may be added in the future.
2054520545

2054620546
declare void @llvm.experimental.vector.histogram.add.v8p0.i32(<8 x ptr> %ptrs, i32 %inc, <8 x i1> %mask)
2054720547
declare void @llvm.experimental.vector.histogram.add.nxv2p0.i64(<vscale x 2 x ptr> %ptrs, i64 %inc, <vscale x 2 x i1> %mask)
20548+
declare void @llvm.experimental.vector.histogram.uadd.sat.v8p0.i32(<8 x ptr> %ptrs, i32 %inc, <8 x i1> %mask)
20549+
declare void @llvm.experimental.vector.histogram.umax.v8p0.i32(<8 x ptr> %ptrs, i32 %val, <8 x i1> %mask)
20550+
declare void @llvm.experimental.vector.histogram.umin.v8p0.i32(<8 x ptr> %ptrs, i32 %val, <8 x i1> %mask)
2054820551

2054920552
Arguments:
2055020553
""""""""""

llvm/include/llvm/IR/Intrinsics.td

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1968,6 +1968,24 @@ def int_experimental_vector_histogram_add : DefaultAttrsIntrinsic<[],
19681968
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], // Mask
19691969
[ IntrArgMemOnly ]>;
19701970

1971+
def int_experimental_vector_histogram_uadd_sat : DefaultAttrsIntrinsic<[],
1972+
[ llvm_anyvector_ty, // Vector of pointers
1973+
llvm_anyint_ty, // Increment
1974+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], // Mask
1975+
[ IntrArgMemOnly ]>;
1976+
1977+
def int_experimental_vector_histogram_umin : DefaultAttrsIntrinsic<[],
1978+
[ llvm_anyvector_ty, // Vector of pointers
1979+
llvm_anyint_ty, // Update value
1980+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], // Mask
1981+
[ IntrArgMemOnly ]>;
1982+
1983+
def int_experimental_vector_histogram_umax : DefaultAttrsIntrinsic<[],
1984+
[ llvm_anyvector_ty, // Vector of pointers
1985+
llvm_anyint_ty, // Update value
1986+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], // Mask
1987+
[ IntrArgMemOnly ]>;
1988+
19711989
// Experimental match
19721990
def int_experimental_vector_match : DefaultAttrsIntrinsic<
19731991
[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ],

llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -968,6 +968,29 @@ static void scalarizeMaskedVectorHistogram(const DataLayout &DL, CallInst *CI,
968968

969969
// FIXME: Do we need to add an alignment parameter to the intrinsic?
970970
unsigned VectorWidth = AddrType->getNumElements();
971+
auto CreateHistogramUpdateValue = [&](IntrinsicInst *CI, Value *Load,
972+
Value *Inc) -> Value * {
973+
Value *UpdateOp;
974+
switch (CI->getIntrinsicID()) {
975+
case Intrinsic::experimental_vector_histogram_add:
976+
UpdateOp = Builder.CreateAdd(Load, Inc);
977+
break;
978+
case Intrinsic::experimental_vector_histogram_uadd_sat:
979+
UpdateOp =
980+
Builder.CreateIntrinsic(Intrinsic::uadd_sat, {EltTy}, {Load, Inc});
981+
break;
982+
case Intrinsic::experimental_vector_histogram_umin:
983+
UpdateOp = Builder.CreateIntrinsic(Intrinsic::umin, {EltTy}, {Load, Inc});
984+
break;
985+
case Intrinsic::experimental_vector_histogram_umax:
986+
UpdateOp = Builder.CreateIntrinsic(Intrinsic::umax, {EltTy}, {Load, Inc});
987+
break;
988+
989+
default:
990+
llvm_unreachable("Unexpected histogram intrinsic");
991+
}
992+
return UpdateOp;
993+
};
971994

972995
// Shorten the way if the mask is a vector of constants.
973996
if (isConstantIntVector(Mask)) {
@@ -976,8 +999,9 @@ static void scalarizeMaskedVectorHistogram(const DataLayout &DL, CallInst *CI,
976999
continue;
9771000
Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
9781001
LoadInst *Load = Builder.CreateLoad(EltTy, Ptr, "Load" + Twine(Idx));
979-
Value *Add = Builder.CreateAdd(Load, Inc);
980-
Builder.CreateStore(Add, Ptr);
1002+
Value *Update =
1003+
CreateHistogramUpdateValue(cast<IntrinsicInst>(CI), Load, Inc);
1004+
Builder.CreateStore(Update, Ptr);
9811005
}
9821006
CI->eraseFromParent();
9831007
return;
@@ -997,8 +1021,9 @@ static void scalarizeMaskedVectorHistogram(const DataLayout &DL, CallInst *CI,
9971021
Builder.SetInsertPoint(CondBlock->getTerminator());
9981022
Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
9991023
LoadInst *Load = Builder.CreateLoad(EltTy, Ptr, "Load" + Twine(Idx));
1000-
Value *Add = Builder.CreateAdd(Load, Inc);
1001-
Builder.CreateStore(Add, Ptr);
1024+
Value *UpdateOp =
1025+
CreateHistogramUpdateValue(cast<IntrinsicInst>(CI), Load, Inc);
1026+
Builder.CreateStore(UpdateOp, Ptr);
10021027

10031028
// Create "else" block, fill it in the next iteration
10041029
BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
@@ -1089,6 +1114,9 @@ static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
10891114
default:
10901115
break;
10911116
case Intrinsic::experimental_vector_histogram_add:
1117+
case Intrinsic::experimental_vector_histogram_uadd_sat:
1118+
case Intrinsic::experimental_vector_histogram_umin:
1119+
case Intrinsic::experimental_vector_histogram_umax:
10921120
if (TTI.isLegalMaskedVectorHistogram(CI->getArgOperand(0)->getType(),
10931121
CI->getArgOperand(1)->getType()))
10941122
return false;

0 commit comments

Comments
 (0)