Skip to content

Commit 33efbc8

Browse files
author
Simon Moll
committed
[VP] llvm.vp.merge intrinsic and LangRef
llvm.vp.merge interprets the %evl operand differently than the other vp intrinsics: all lanes at positions greater or equal than the %evl operand are passed through from the second vector input. Otherwise it behaves like llvm.vp.select. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D116725
1 parent c2426fd commit 33efbc8

File tree

5 files changed

+74
-0
lines changed

5 files changed

+74
-0
lines changed

llvm/docs/LangRef.rst

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17856,6 +17856,67 @@ Example:
1785617856
%also.r = select <4 x i1> %cond, <4 x i32> %on_true, <4 x i32> %on_false
1785717857

1785817858

17859+
.. _int_vp_merge:
17860+
17861+
'``llvm.vp.merge.*``' Intrinsics
17862+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
17863+
17864+
Syntax:
17865+
"""""""
17866+
This is an overloaded intrinsic.
17867+
17868+
::
17869+
17870+
declare <16 x i32> @llvm.vp.merge.v16i32 (<16 x i1> <condition>, <16 x i32> <on_true>, <16 x i32> <on_false>, i32 <pivot>)
17871+
declare <vscale x 4 x i64> @llvm.vp.merge.nxv4i64 (<vscale x 4 x i1> <condition>, <vscale x 4 x i64> <on_true>, <vscale x 4 x i64> <on_false>, i32 <pivot>)
17872+
17873+
Overview:
17874+
"""""""""
17875+
17876+
The '``llvm.vp.merge``' intrinsic is used to choose one value based on a
17877+
condition vector and an index operand, without IR-level branching.
17878+
17879+
Arguments:
17880+
""""""""""
17881+
17882+
The first operand is a vector of ``i1`` and indicates the condition. The
17883+
second operand is the value that is merged where the condition vector is true.
17884+
The third operand is the value that is selected where the condition vector is
17885+
false or the lane position is greater equal than the pivot. The fourth operand
17886+
is the pivot.
17887+
17888+
#. The optional ``fast-math flags`` marker indicates that the merge has one or
17889+
more :ref:`fast-math flags <fastmath>`. These are optimization hints to
17890+
enable otherwise unsafe floating-point optimizations. Fast-math flags are
17891+
only valid for merges that return a floating-point scalar or vector type,
17892+
or an array (nested to any depth) of floating-point scalar or vector types.
17893+
17894+
Semantics:
17895+
""""""""""
17896+
17897+
The intrinsic selects lanes from the second and third operand depending on a
17898+
condition vector and pivot value.
17899+
17900+
For all lanes where the condition vector is true and the lane position is less
17901+
than ``%pivot`` the lane is taken from the second operand. Otherwise, the lane
17902+
is taken from the third operand.
17903+
17904+
Example:
17905+
""""""""
17906+
17907+
.. code-block:: llvm
17908+
17909+
%r = call <4 x i32> @llvm.vp.merge.v4i32(<4 x i1> %cond, <4 x i32> %on_true, <4 x i32> %on_false, i32 %pivot)
17910+
17911+
;;; Expansion.
17912+
;; Lanes at and above %pivot are taken from %on_false
17913+
%atfirst = insertelement <4 x i32> undef, i32 %pivot, i32 0
17914+
%splat = shufflevector <4 x i32> %atfirst, <4 x i32> poison, <4 x i32> zeroinitializer
17915+
%pivotmask = icmp ult <4 x i32> %splat, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17916+
%mergemask = and <4 x i1> %cond, <4 x i1> %pivotmask
17917+
%also.r = select <4 x i1> %mergemask, <4 x i32> %on_true, <4 x i32> %on_false
17918+
17919+
1785917920

1786017921
.. _int_vp_add:
1786117922

llvm/include/llvm/IR/Intrinsics.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1507,6 +1507,12 @@ def int_vp_select : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
15071507
LLVMMatchType<0>,
15081508
llvm_i32_ty]>;
15091509

1510+
def int_vp_merge : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
1511+
[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1512+
LLVMMatchType<0>,
1513+
LLVMMatchType<0>,
1514+
llvm_i32_ty]>;
1515+
15101516
// Reductions
15111517
let IntrProperties = [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] in {
15121518
def int_vp_reduce_fadd : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],

llvm/include/llvm/IR/VPIntrinsics.def

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,10 @@ BEGIN_REGISTER_VP(vp_select, 0, 3, VP_SELECT, -1)
349349
VP_PROPERTY_FUNCTIONAL_OPC(Select)
350350
END_REGISTER_VP(vp_select, VP_SELECT)
351351

352+
// llvm.vp.merge(mask,on_true,on_false,pivot)
353+
BEGIN_REGISTER_VP(vp_merge, 0, 3, VP_MERGE, -1)
354+
END_REGISTER_VP(vp_merge, VP_MERGE)
355+
352356
BEGIN_REGISTER_VP(experimental_vp_splice, 3, 5, EXPERIMENTAL_VP_SPLICE, -1)
353357
END_REGISTER_VP(experimental_vp_splice, EXPERIMENTAL_VP_SPLICE)
354358

llvm/lib/IR/IntrinsicInst.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,7 @@ Function *VPIntrinsic::getDeclarationForParams(Module *M, Intrinsic::ID VPID,
482482
VPFunc = Intrinsic::getDeclaration(M, VPID, OverloadTy);
483483
break;
484484
}
485+
case Intrinsic::vp_merge:
485486
case Intrinsic::vp_select:
486487
VPFunc = Intrinsic::getDeclaration(M, VPID, {Params[1]->getType()});
487488
break;

llvm/unittests/IR/VPIntrinsicTest.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ class VPIntrinsicTest : public testing::Test {
6868
Str << " declare float @llvm.vp.reduce." << ReductionOpcode
6969
<< ".v8f32(float, <8 x float>, <8 x i1>, i32) ";
7070

71+
Str << " declare <8 x i32> @llvm.vp.merge.v8i32(<8 x i1>, <8 x i32>, <8 x "
72+
"i32>, i32)";
7173
Str << " declare <8 x i32> @llvm.vp.select.v8i32(<8 x i1>, <8 x i32>, <8 x "
7274
"i32>, i32)";
7375
Str << " declare <8 x i32> @llvm.experimental.vp.splice.v8i32(<8 x "

0 commit comments

Comments
 (0)