Skip to content

Commit 2dfcc30

Browse files
committed
[AArch64] Add tests for inefficient LD1lanePost. NFC
1 parent 77a3ae5 commit 2dfcc30

File tree

1 file changed

+117
-0
lines changed

1 file changed

+117
-0
lines changed

llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13337,6 +13337,57 @@ define <16 x i8> @test_v16i8_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <16
1333713337
ret <16 x i8> %tmp2
1333813338
}
1333913339

13340+
define <16 x i8> @test_v16i8_post_reg_ld1lane_zero(ptr %bar, ptr %ptr, i64 %inc) {
13341+
; CHECK-SD-LABEL: test_v16i8_post_reg_ld1lane_zero:
13342+
; CHECK-SD: ; %bb.0:
13343+
; CHECK-SD-NEXT: movi.2d v0, #0000000000000000
13344+
; CHECK-SD-NEXT: ld1.b { v0 }[0], [x0], x2
13345+
; CHECK-SD-NEXT: str x0, [x1]
13346+
; CHECK-SD-NEXT: ret
13347+
;
13348+
; CHECK-GI-LABEL: test_v16i8_post_reg_ld1lane_zero:
13349+
; CHECK-GI: ; %bb.0:
13350+
; CHECK-GI-NEXT: ldr b0, [x0]
13351+
; CHECK-GI-NEXT: mov w8, #0 ; =0x0
13352+
; CHECK-GI-NEXT: mov.b v0[1], w8
13353+
; CHECK-GI-NEXT: mov.b v0[2], w8
13354+
; CHECK-GI-NEXT: mov.b v0[3], w8
13355+
; CHECK-GI-NEXT: mov.b v0[4], w8
13356+
; CHECK-GI-NEXT: mov.b v0[5], w8
13357+
; CHECK-GI-NEXT: mov.b v0[6], w8
13358+
; CHECK-GI-NEXT: mov.b v0[7], w8
13359+
; CHECK-GI-NEXT: mov.b v0[8], w8
13360+
; CHECK-GI-NEXT: mov.b v0[9], w8
13361+
; CHECK-GI-NEXT: mov.b v0[10], w8
13362+
; CHECK-GI-NEXT: mov.b v0[11], w8
13363+
; CHECK-GI-NEXT: mov.b v0[12], w8
13364+
; CHECK-GI-NEXT: mov.b v0[13], w8
13365+
; CHECK-GI-NEXT: mov.b v0[14], w8
13366+
; CHECK-GI-NEXT: mov.b v0[15], w8
13367+
; CHECK-GI-NEXT: add x8, x0, x2
13368+
; CHECK-GI-NEXT: str x8, [x1]
13369+
; CHECK-GI-NEXT: ret
13370+
%tmp1 = load i8, ptr %bar
13371+
%tmp2 = insertelement <16 x i8> zeroinitializer, i8 %tmp1, i32 0
13372+
%tmp3 = getelementptr i8, ptr %bar, i64 %inc
13373+
store ptr %tmp3, ptr %ptr
13374+
ret <16 x i8> %tmp2
13375+
}
13376+
13377+
define <16 x i8> @test_v16i8_post_reg_ld1lane_undef(ptr %bar, ptr %ptr, i64 %inc) {
13378+
; CHECK-LABEL: test_v16i8_post_reg_ld1lane_undef:
13379+
; CHECK: ; %bb.0:
13380+
; CHECK-NEXT: ldr b0, [x0]
13381+
; CHECK-NEXT: add x8, x0, x2
13382+
; CHECK-NEXT: str x8, [x1]
13383+
; CHECK-NEXT: ret
13384+
%tmp1 = load i8, ptr %bar
13385+
%tmp2 = insertelement <16 x i8> poison, i8 %tmp1, i32 0
13386+
%tmp3 = getelementptr i8, ptr %bar, i64 %inc
13387+
store ptr %tmp3, ptr %ptr
13388+
ret <16 x i8> %tmp2
13389+
}
13390+
1334013391
define <8 x i8> @test_v8i8_post_imm_ld1lane(ptr %bar, ptr %ptr, <8 x i8> %A) {
1334113392
; CHECK-SD-LABEL: test_v8i8_post_imm_ld1lane:
1334213393
; CHECK-SD: ; %bb.0:
@@ -14078,3 +14129,69 @@ define i32 @load_single_extract_variable_index_masked2_i32(ptr %A, i32 %idx) {
1407814129
%e = extractelement <4 x i32> %lv, i32 %idx.x
1407914130
ret i32 %e
1408014131
}
14132+
14133+
define void @chained_insert_zero(ptr noundef %fenc, ptr noundef %pred, ptr noundef %residual, i32 noundef %stride) {
14134+
; CHECK-SD-LABEL: chained_insert_zero:
14135+
; CHECK-SD: ; %bb.0: ; %entry
14136+
; CHECK-SD-NEXT: movi.2d v0, #0000000000000000
14137+
; CHECK-SD-NEXT: movi.2d v1, #0000000000000000
14138+
; CHECK-SD-NEXT: ; kill: def $w3 killed $w3 def $x3
14139+
; CHECK-SD-NEXT: sxtw x8, w3
14140+
; CHECK-SD-NEXT: ld1.s { v0 }[0], [x0], x8
14141+
; CHECK-SD-NEXT: ld1.s { v1 }[0], [x1], x8
14142+
; CHECK-SD-NEXT: sbfiz x8, x3, #1, #32
14143+
; CHECK-SD-NEXT: usubl.8h v0, v0, v1
14144+
; CHECK-SD-NEXT: str d0, [x2]
14145+
; CHECK-SD-NEXT: ldr s0, [x0]
14146+
; CHECK-SD-NEXT: ldr s1, [x1]
14147+
; CHECK-SD-NEXT: usubl.8h v0, v0, v1
14148+
; CHECK-SD-NEXT: str d0, [x2, x8]
14149+
; CHECK-SD-NEXT: ret
14150+
;
14151+
; CHECK-GI-LABEL: chained_insert_zero:
14152+
; CHECK-GI: ; %bb.0: ; %entry
14153+
; CHECK-GI-NEXT: ldr s0, [x0]
14154+
; CHECK-GI-NEXT: ldr s1, [x1]
14155+
; CHECK-GI-NEXT: ; kill: def $w3 killed $w3 def $x3
14156+
; CHECK-GI-NEXT: sxtw x8, w3
14157+
; CHECK-GI-NEXT: mov.s v0[1], wzr
14158+
; CHECK-GI-NEXT: mov.s v1[1], wzr
14159+
; CHECK-GI-NEXT: usubl.8h v0, v0, v1
14160+
; CHECK-GI-NEXT: str d0, [x2]
14161+
; CHECK-GI-NEXT: ldr s0, [x0, x8]
14162+
; CHECK-GI-NEXT: ldr s1, [x1, x8]
14163+
; CHECK-GI-NEXT: lsl x8, x8, #1
14164+
; CHECK-GI-NEXT: mov.s v0[1], wzr
14165+
; CHECK-GI-NEXT: mov.s v1[1], wzr
14166+
; CHECK-GI-NEXT: usubl.8h v0, v0, v1
14167+
; CHECK-GI-NEXT: str d0, [x2, x8]
14168+
; CHECK-GI-NEXT: ret
14169+
entry:
14170+
%idx.ext = sext i32 %stride to i64
14171+
%0 = load i32, ptr %fenc, align 4
14172+
%vld1_lane.i = insertelement <2 x i32> <i32 poison, i32 0>, i32 %0, i64 0
14173+
%1 = bitcast <2 x i32> %vld1_lane.i to <8 x i8>
14174+
%2 = load i32, ptr %pred, align 4
14175+
%vld1_lane.i16 = insertelement <2 x i32> <i32 poison, i32 0>, i32 %2, i64 0
14176+
%3 = bitcast <2 x i32> %vld1_lane.i16 to <8 x i8>
14177+
%vmovl.i15 = zext <8 x i8> %1 to <8 x i16>
14178+
%vmovl.i = zext <8 x i8> %3 to <8 x i16>
14179+
%sub.i = sub nsw <8 x i16> %vmovl.i15, %vmovl.i
14180+
%shuffle.i = shufflevector <8 x i16> %sub.i, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
14181+
store <4 x i16> %shuffle.i, ptr %residual, align 2
14182+
%add.ptr = getelementptr inbounds i8, ptr %fenc, i64 %idx.ext
14183+
%add.ptr6 = getelementptr inbounds i8, ptr %pred, i64 %idx.ext
14184+
%add.ptr8 = getelementptr inbounds i16, ptr %residual, i64 %idx.ext
14185+
%4 = load i32, ptr %add.ptr, align 4
14186+
%vld1_lane.i.1 = insertelement <2 x i32> <i32 poison, i32 0>, i32 %4, i64 0
14187+
%5 = bitcast <2 x i32> %vld1_lane.i.1 to <8 x i8>
14188+
%6 = load i32, ptr %add.ptr6, align 4
14189+
%vld1_lane.i16.1 = insertelement <2 x i32> <i32 poison, i32 0>, i32 %6, i64 0
14190+
%7 = bitcast <2 x i32> %vld1_lane.i16.1 to <8 x i8>
14191+
%vmovl.i15.1 = zext <8 x i8> %5 to <8 x i16>
14192+
%vmovl.i.1 = zext <8 x i8> %7 to <8 x i16>
14193+
%sub.i.1 = sub nsw <8 x i16> %vmovl.i15.1, %vmovl.i.1
14194+
%shuffle.i.1 = shufflevector <8 x i16> %sub.i.1, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
14195+
store <4 x i16> %shuffle.i.1, ptr %add.ptr8, align 2
14196+
ret void
14197+
}

0 commit comments

Comments
 (0)