Skip to content

Commit 4aca3dc

Browse files
authored
Reland: [GlobalISel] prevent G_UNMERGE_VALUES for vectors with different elements (#144661)
This commit prevents building a G_UNMERGE_VALUES instruction with different source and destination vector elements in `LegalizationArtifactCombiner::ArtifactValueFinder::tryCombineMergeLike()`, e.g.: `%1:_(<2 x s8>), %2:_(<2 x s8>) = G_UNMERGE_VALUES %0:_(<2 x s16>)` This LLVM defect was identified via the AMD Fuzzing project.
1 parent b596726 commit 4aca3dc

File tree

3 files changed

+53
-6
lines changed

3 files changed

+53
-6
lines changed

llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -997,6 +997,7 @@ class LegalizationArtifactCombiner {
997997

998998
// Recognize UnmergeSrc that can be unmerged to DstTy directly.
999999
// Types have to be either both vector or both non-vector types.
1000+
// In case of vector types, the scalar elements need to match.
10001001
// Merge-like opcodes are combined one at the time. First one creates new
10011002
// unmerge, following should use the same unmerge (builder performs CSE).
10021003
//
@@ -1005,7 +1006,9 @@ class LegalizationArtifactCombiner {
10051006
// %AnotherDst:_(DstTy) = G_merge_like_opcode %2:_(EltTy), %3
10061007
//
10071008
// %Dst:_(DstTy), %AnotherDst = G_UNMERGE_VALUES %UnmergeSrc
1008-
if ((DstTy.isVector() == UnmergeSrcTy.isVector()) &&
1009+
if (((!DstTy.isVector() && !UnmergeSrcTy.isVector()) ||
1010+
(DstTy.isVector() && UnmergeSrcTy.isVector() &&
1011+
DstTy.getScalarType() == UnmergeSrcTy.getScalarType())) &&
10091012
(Elt0UnmergeIdx % NumMIElts == 0) &&
10101013
getCoverTy(UnmergeSrcTy, DstTy) == UnmergeSrcTy) {
10111014
if (!isSequenceFromUnmerge(MI, 0, Unmerge, Elt0UnmergeIdx, NumMIElts,

llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6506,3 +6506,47 @@ entry:
65066506
%insert = insertelement <5 x double> %vec, double %val, i32 %idx
65076507
ret <5 x double> %insert
65086508
}
6509+
6510+
; Found by fuzzer, reduced with llvm-reduce.
6511+
define void @insert_very_small_from_very_large(<32 x i16> %L3, ptr %ptr) {
6512+
; GPRIDX-LABEL: insert_very_small_from_very_large:
6513+
; GPRIDX: ; %bb.0: ; %bb
6514+
; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6515+
; GPRIDX-NEXT: v_lshrrev_b32_e32 v0, 1, v0
6516+
; GPRIDX-NEXT: v_and_b32_e32 v0, 1, v0
6517+
; GPRIDX-NEXT: v_lshlrev_b16_e32 v0, 1, v0
6518+
; GPRIDX-NEXT: v_and_b32_e32 v0, 3, v0
6519+
; GPRIDX-NEXT: flat_store_byte v[16:17], v0
6520+
; GPRIDX-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
6521+
; GPRIDX-NEXT: s_setpc_b64 s[30:31]
6522+
;
6523+
; GFX10-LABEL: insert_very_small_from_very_large:
6524+
; GFX10: ; %bb.0: ; %bb
6525+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6526+
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 1, v0
6527+
; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
6528+
; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0
6529+
; GFX10-NEXT: v_and_b32_e32 v0, 3, v0
6530+
; GFX10-NEXT: flat_store_byte v[16:17], v0
6531+
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
6532+
; GFX10-NEXT: s_setpc_b64 s[30:31]
6533+
;
6534+
; GFX11-LABEL: insert_very_small_from_very_large:
6535+
; GFX11: ; %bb.0: ; %bb
6536+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6537+
; GFX11-NEXT: v_lshrrev_b16 v0.l, 1, v0.l
6538+
; GFX11-NEXT: v_and_b16 v0.l, v0.l, 1
6539+
; GFX11-NEXT: v_lshlrev_b16 v0.l, 1, v0.l
6540+
; GFX11-NEXT: v_and_b32_e32 v0, 3, v0
6541+
; GFX11-NEXT: flat_store_b8 v[16:17], v0
6542+
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
6543+
; GFX11-NEXT: s_setpc_b64 s[30:31]
6544+
bb:
6545+
%a = bitcast <32 x i16> %L3 to i512
6546+
%b = trunc i512 %a to i8
6547+
%c = trunc i8 %b to i2
6548+
%d = bitcast i2 %c to <2 x i1>
6549+
%insert = insertelement <2 x i1> %d, i1 false, i32 0
6550+
store <2 x i1> %insert, ptr %ptr, align 1
6551+
ret void
6552+
}

llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-contents-legalization.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2166,14 +2166,14 @@ define <6 x i8> @load_v6i8(ptr addrspace(8) inreg %buf) {
21662166
; GISEL-LABEL: load_v6i8:
21672167
; GISEL: ; %bb.0:
21682168
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2169-
; GISEL-NEXT: buffer_load_ushort v4, off, s[16:19], 0 offset:4
21702169
; GISEL-NEXT: buffer_load_dword v0, off, s[16:19], 0
2170+
; GISEL-NEXT: buffer_load_ushort v4, off, s[16:19], 0 offset:4
21712171
; GISEL-NEXT: s_waitcnt vmcnt(1)
2172-
; GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v4
2173-
; GISEL-NEXT: s_waitcnt vmcnt(0)
21742172
; GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0
21752173
; GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
21762174
; GISEL-NEXT: v_lshrrev_b32_e32 v3, 24, v0
2175+
; GISEL-NEXT: s_waitcnt vmcnt(0)
2176+
; GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v4
21772177
; GISEL-NEXT: s_setpc_b64 s[30:31]
21782178
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
21792179
%ret = load <6 x i8>, ptr addrspace(7) %p
@@ -3630,10 +3630,10 @@ define <6 x i8> @volatile_load_v6i8(ptr addrspace(8) inreg %buf) {
36303630
; GISEL-NEXT: buffer_load_ushort v4, off, s[16:19], 0 offset:4 glc
36313631
; GISEL-NEXT: s_waitcnt vmcnt(1)
36323632
; GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0
3633-
; GISEL-NEXT: s_waitcnt vmcnt(0)
3634-
; GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v4
36353633
; GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
36363634
; GISEL-NEXT: v_lshrrev_b32_e32 v3, 24, v0
3635+
; GISEL-NEXT: s_waitcnt vmcnt(0)
3636+
; GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v4
36373637
; GISEL-NEXT: s_setpc_b64 s[30:31]
36383638
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
36393639
%ret = load volatile <6 x i8>, ptr addrspace(7) %p

0 commit comments

Comments
 (0)