Skip to content

Commit bd4e7f5

Browse files
[LLVM][DAGCombiner] Fix size calculations in calculateByteProvider. (#148425)
calculateByteProvider only cares about scalars or a single element within a vector. For the later there is the VectorIndex parameter to identify the element. All other properties, and specificially Index, are related to the underyling scalar type and thus when taking the size of a type it's the scalar size that matters. Fixes #148387
1 parent 9250139 commit bd4e7f5

File tree

2 files changed

+181
-2
lines changed

2 files changed

+181
-2
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9147,7 +9147,7 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
91479147
if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value())
91489148
return std::nullopt;
91499149

9150-
unsigned BitWidth = Op.getValueSizeInBits();
9150+
unsigned BitWidth = Op.getScalarValueSizeInBits();
91519151
if (BitWidth % 8 != 0)
91529152
return std::nullopt;
91539153
unsigned ByteWidth = BitWidth / 8;
@@ -9246,7 +9246,7 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
92469246
if (!L->isSimple() || L->isIndexed())
92479247
return std::nullopt;
92489248

9249-
unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
9249+
unsigned NarrowBitWidth = L->getMemoryVT().getScalarSizeInBits();
92509250
if (NarrowBitWidth % 8 != 0)
92519251
return std::nullopt;
92529252
uint64_t NarrowByteWidth = NarrowBitWidth / 8;

llvm/test/CodeGen/AArch64/load-combine.ll

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -713,3 +713,182 @@ define void @short_vector_to_i64(ptr %in, ptr %out, ptr %p) {
713713
store i64 %i3, ptr %out
714714
ret void
715715
}
716+
717+
; x1 = x0
718+
define void @scalable_vector_to_i32(ptr %in, ptr %out, ptr %p) #0 {
719+
; CHECK-LABEL: scalable_vector_to_i32:
720+
; CHECK: // %bb.0:
721+
; CHECK-NEXT: ldr w8, [x0]
722+
; CHECK-NEXT: str w8, [x1]
723+
; CHECK-NEXT: ret
724+
%ld = load <vscale x 4 x i8>, ptr %in, align 4
725+
726+
%e1 = extractelement <vscale x 4 x i8> %ld, i32 0
727+
%e2 = extractelement <vscale x 4 x i8> %ld, i32 1
728+
%e3 = extractelement <vscale x 4 x i8> %ld, i32 2
729+
%e4 = extractelement <vscale x 4 x i8> %ld, i32 3
730+
731+
%z0 = zext i8 %e1 to i32
732+
%z1 = zext i8 %e2 to i32
733+
%z2 = zext i8 %e3 to i32
734+
%z3 = zext i8 %e4 to i32
735+
736+
%s1 = shl nuw nsw i32 %z1, 8
737+
%s2 = shl nuw nsw i32 %z2, 16
738+
%s3 = shl nuw i32 %z3, 24
739+
740+
%i1 = or i32 %s1, %z0
741+
%i2 = or i32 %i1, %s2
742+
%i3 = or i32 %i2, %s3
743+
744+
store i32 %i3, ptr %out
745+
ret void
746+
}
747+
748+
define void @scalable_vector_to_i32_unused_low_i8(ptr %in, ptr %out, ptr %p) #0 {
749+
; CHECK-LABEL: scalable_vector_to_i32_unused_low_i8:
750+
; CHECK: // %bb.0:
751+
; CHECK-NEXT: ptrue p0.s
752+
; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0]
753+
; CHECK-NEXT: mov w8, v0.s[1]
754+
; CHECK-NEXT: mov w9, v0.s[2]
755+
; CHECK-NEXT: mov w10, v0.s[3]
756+
; CHECK-NEXT: lsl w8, w8, #8
757+
; CHECK-NEXT: orr w8, w8, w9, lsl #16
758+
; CHECK-NEXT: orr w8, w8, w10, lsl #24
759+
; CHECK-NEXT: str w8, [x1]
760+
; CHECK-NEXT: ret
761+
%ld = load <vscale x 4 x i8>, ptr %in, align 4
762+
763+
%e2 = extractelement <vscale x 4 x i8> %ld, i32 1
764+
%e3 = extractelement <vscale x 4 x i8> %ld, i32 2
765+
%e4 = extractelement <vscale x 4 x i8> %ld, i32 3
766+
767+
%z1 = zext i8 %e2 to i32
768+
%z2 = zext i8 %e3 to i32
769+
%z3 = zext i8 %e4 to i32
770+
771+
%s1 = shl nuw nsw i32 %z1, 8
772+
%s2 = shl nuw nsw i32 %z2, 16
773+
%s3 = shl nuw i32 %z3, 24
774+
775+
%i2 = or i32 %s1, %s2
776+
%i3 = or i32 %i2, %s3
777+
778+
store i32 %i3, ptr %out
779+
ret void
780+
}
781+
782+
define void @scalable_vector_to_i32_unused_high_i8(ptr %in, ptr %out, ptr %p) #0 {
783+
; CHECK-LABEL: scalable_vector_to_i32_unused_high_i8:
784+
; CHECK: // %bb.0:
785+
; CHECK-NEXT: ptrue p0.s
786+
; CHECK-NEXT: ldrh w9, [x0]
787+
; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0]
788+
; CHECK-NEXT: mov w8, v0.s[2]
789+
; CHECK-NEXT: orr w8, w9, w8, lsl #16
790+
; CHECK-NEXT: str w8, [x1]
791+
; CHECK-NEXT: ret
792+
%ld = load <vscale x 4 x i8>, ptr %in, align 4
793+
794+
%e1 = extractelement <vscale x 4 x i8> %ld, i32 0
795+
%e2 = extractelement <vscale x 4 x i8> %ld, i32 1
796+
%e3 = extractelement <vscale x 4 x i8> %ld, i32 2
797+
798+
%z0 = zext i8 %e1 to i32
799+
%z1 = zext i8 %e2 to i32
800+
%z2 = zext i8 %e3 to i32
801+
802+
%s1 = shl nuw nsw i32 %z1, 8
803+
%s2 = shl nuw nsw i32 %z2, 16
804+
805+
%i1 = or i32 %s1, %z0
806+
%i2 = or i32 %i1, %s2
807+
808+
store i32 %i2, ptr %out
809+
ret void
810+
}
811+
812+
define void @scalable_vector_to_i32_unused_low_i16(ptr %in, ptr %out, ptr %p) #0 {
813+
; CHECK-LABEL: scalable_vector_to_i32_unused_low_i16:
814+
; CHECK: // %bb.0:
815+
; CHECK-NEXT: ptrue p0.s
816+
; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0]
817+
; CHECK-NEXT: mov w8, v0.s[2]
818+
; CHECK-NEXT: mov w9, v0.s[3]
819+
; CHECK-NEXT: lsl w8, w8, #16
820+
; CHECK-NEXT: orr w8, w8, w9, lsl #24
821+
; CHECK-NEXT: str w8, [x1]
822+
; CHECK-NEXT: ret
823+
%ld = load <vscale x 4 x i8>, ptr %in, align 4
824+
825+
%e3 = extractelement <vscale x 4 x i8> %ld, i32 2
826+
%e4 = extractelement <vscale x 4 x i8> %ld, i32 3
827+
828+
%z2 = zext i8 %e3 to i32
829+
%z3 = zext i8 %e4 to i32
830+
831+
%s2 = shl nuw nsw i32 %z2, 16
832+
%s3 = shl nuw i32 %z3, 24
833+
834+
%i3 = or i32 %s2, %s3
835+
836+
store i32 %i3, ptr %out
837+
ret void
838+
}
839+
840+
; x1 = x0[0:1]
841+
define void @scalable_vector_to_i32_unused_high_i16(ptr %in, ptr %out, ptr %p) #0 {
842+
; CHECK-LABEL: scalable_vector_to_i32_unused_high_i16:
843+
; CHECK: // %bb.0:
844+
; CHECK-NEXT: ldrh w8, [x0]
845+
; CHECK-NEXT: str w8, [x1]
846+
; CHECK-NEXT: ret
847+
%ld = load <vscale x 4 x i8>, ptr %in, align 4
848+
849+
%e1 = extractelement <vscale x 4 x i8> %ld, i32 0
850+
%e2 = extractelement <vscale x 4 x i8> %ld, i32 1
851+
852+
%z0 = zext i8 %e1 to i32
853+
%z1 = zext i8 %e2 to i32
854+
855+
%s1 = shl nuw nsw i32 %z1, 8
856+
857+
%i1 = or i32 %s1, %z0
858+
859+
store i32 %i1, ptr %out
860+
ret void
861+
}
862+
863+
; x1 = x0
864+
define void @scalable_vector_to_i64(ptr %in, ptr %out, ptr %p) #0 {
865+
; CHECK-LABEL: scalable_vector_to_i64:
866+
; CHECK: // %bb.0:
867+
; CHECK-NEXT: ldr w8, [x0]
868+
; CHECK-NEXT: str x8, [x1]
869+
; CHECK-NEXT: ret
870+
%ld = load <vscale x 4 x i8>, ptr %in, align 4
871+
872+
%e1 = extractelement <vscale x 4 x i8> %ld, i32 0
873+
%e2 = extractelement <vscale x 4 x i8> %ld, i32 1
874+
%e3 = extractelement <vscale x 4 x i8> %ld, i32 2
875+
%e4 = extractelement <vscale x 4 x i8> %ld, i32 3
876+
877+
%z0 = zext i8 %e1 to i64
878+
%z1 = zext i8 %e2 to i64
879+
%z2 = zext i8 %e3 to i64
880+
%z3 = zext i8 %e4 to i64
881+
882+
%s1 = shl nuw nsw i64 %z1, 8
883+
%s2 = shl nuw nsw i64 %z2, 16
884+
%s3 = shl nuw i64 %z3, 24
885+
886+
%i1 = or i64 %s1, %z0
887+
%i2 = or i64 %i1, %s2
888+
%i3 = or i64 %i2, %s3
889+
890+
store i64 %i3, ptr %out
891+
ret void
892+
}
893+
894+
attributes #0 = { "target-features"="+sve" }

0 commit comments

Comments
 (0)