Skip to content

Commit a3afbd3

Browse files
authored
[MSAN] only require needed bits to be initialized for permilvar (#147407)
1 parent e3e7393 commit a3afbd3

File tree

5 files changed

+653
-111
lines changed

5 files changed

+653
-111
lines changed

llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4297,7 +4297,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
42974297
void handleAVXVpermilvar(IntrinsicInst &I) {
42984298
IRBuilder<> IRB(&I);
42994299
Value *Shadow = getShadow(&I, 0);
4300-
insertShadowCheck(I.getArgOperand(1), &I);
4300+
maskedCheckAVXIndexShadow(IRB, I.getArgOperand(1), &I);
43014301

43024302
// Shadows are integer-ish types but some intrinsics require a
43034303
// different (e.g., floating-point) type.

llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll

Lines changed: 86 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -950,16 +950,21 @@ define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1)
950950
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
951951
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
952952
; CHECK-NEXT: call void @llvm.donothing()
953+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
954+
; CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP3]], 1
955+
; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[TMP3]], [[TMP9]]
956+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP2]], i64 1
957+
; CHECK-NEXT: [[TMP7:%.*]] = and i64 [[TMP6]], 1
958+
; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP6]], [[TMP7]]
953959
; CHECK-NEXT: [[A0:%.*]] = bitcast <2 x i64> [[TMP1]] to <2 x double>
954960
; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[A0]], <2 x i64> [[A1:%.*]])
955961
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[RES]] to <2 x i64>
956-
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
957-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP6]], 0
958-
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
959-
; CHECK: 7:
962+
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP8]], 0
963+
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
964+
; CHECK: 12:
960965
; CHECK-NEXT: call void @__msan_warning_noreturn()
961966
; CHECK-NEXT: unreachable
962-
; CHECK: 8:
967+
; CHECK: 13:
963968
; CHECK-NEXT: [[RES1:%.*]] = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[A2:%.*]], <2 x i64> [[A1]])
964969
; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8
965970
; CHECK-NEXT: ret <2 x double> [[RES1]]
@@ -975,16 +980,27 @@ define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64>
975980
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
976981
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
977982
; CHECK-NEXT: call void @llvm.donothing()
983+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[TMP2]], i64 0
984+
; CHECK-NEXT: [[TMP15:%.*]] = and i64 [[TMP3]], 3
985+
; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[TMP3]], [[TMP15]]
986+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP2]], i64 1
987+
; CHECK-NEXT: [[TMP7:%.*]] = and i64 [[TMP6]], 3
988+
; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP6]], [[TMP7]]
989+
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP2]], i64 2
990+
; CHECK-NEXT: [[TMP10:%.*]] = and i64 [[TMP9]], 3
991+
; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP9]], [[TMP10]]
992+
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[TMP2]], i64 3
993+
; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 3
994+
; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP12]], [[TMP13]]
978995
; CHECK-NEXT: [[A0:%.*]] = bitcast <4 x i64> [[TMP1]] to <4 x double>
979996
; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A0]], <4 x i64> [[A1:%.*]])
980997
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x double> [[RES]] to <4 x i64>
981-
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP2]] to i256
982-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP6]], 0
983-
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
984-
; CHECK: 7:
998+
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP14]], 0
999+
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP18:%.*]], label [[TMP19:%.*]], !prof [[PROF1]]
1000+
; CHECK: 18:
9851001
; CHECK-NEXT: call void @__msan_warning_noreturn()
9861002
; CHECK-NEXT: unreachable
987-
; CHECK: 8:
1003+
; CHECK: 19:
9881004
; CHECK-NEXT: [[RES1:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A2:%.*]], <4 x i64> [[A1]])
9891005
; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8
9901006
; CHECK-NEXT: ret <4 x double> [[RES1]]
@@ -1014,16 +1030,27 @@ define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) #
10141030
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
10151031
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
10161032
; CHECK-NEXT: call void @llvm.donothing()
1033+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
1034+
; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP3]], 3
1035+
; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP3]], [[TMP15]]
1036+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP2]], i64 1
1037+
; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP6]], 3
1038+
; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP6]], [[TMP7]]
1039+
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP2]], i64 2
1040+
; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[TMP9]], 3
1041+
; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP9]], [[TMP10]]
1042+
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP2]], i64 3
1043+
; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 3
1044+
; CHECK-NEXT: [[TMP14:%.*]] = or i32 [[TMP12]], [[TMP13]]
10171045
; CHECK-NEXT: [[A0:%.*]] = bitcast <4 x i32> [[TMP1]] to <4 x float>
10181046
; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A0]], <4 x i32> [[A1:%.*]])
10191047
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[RES]] to <4 x i32>
1020-
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1021-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP6]], 0
1022-
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
1023-
; CHECK: 7:
1048+
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP14]], 0
1049+
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP18:%.*]], label [[TMP19:%.*]], !prof [[PROF1]]
1050+
; CHECK: 18:
10241051
; CHECK-NEXT: call void @__msan_warning_noreturn()
10251052
; CHECK-NEXT: unreachable
1026-
; CHECK: 8:
1053+
; CHECK: 19:
10271054
; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A2:%.*]], <4 x i32> [[A1]])
10281055
; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
10291056
; CHECK-NEXT: ret <4 x float> [[RES1]]
@@ -1047,16 +1074,27 @@ define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, ptr %a1) #0
10471074
; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
10481075
; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
10491076
; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
1077+
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[_MSLD]], i64 0
1078+
; CHECK-NEXT: [[TMP9:%.*]] = and i32 [[TMP8]], 3
1079+
; CHECK-NEXT: [[TMP20:%.*]] = or i32 [[TMP8]], [[TMP9]]
1080+
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[_MSLD]], i64 1
1081+
; CHECK-NEXT: [[TMP12:%.*]] = and i32 [[TMP11]], 3
1082+
; CHECK-NEXT: [[TMP13:%.*]] = or i32 [[TMP11]], [[TMP12]]
1083+
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[_MSLD]], i64 2
1084+
; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP14]], 3
1085+
; CHECK-NEXT: [[TMP16:%.*]] = or i32 [[TMP14]], [[TMP15]]
1086+
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[_MSLD]], i64 3
1087+
; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 3
1088+
; CHECK-NEXT: [[TMP19:%.*]] = or i32 [[TMP17]], [[TMP18]]
10501089
; CHECK-NEXT: [[A0:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float>
10511090
; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A0]], <4 x i32> [[A2]])
10521091
; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x float> [[RES]] to <4 x i32>
1053-
; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128
1054-
; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP11]], 0
1055-
; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
1056-
; CHECK: 12:
1092+
; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP19]], 0
1093+
; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP23:%.*]], label [[TMP24:%.*]], !prof [[PROF1]]
1094+
; CHECK: 23:
10571095
; CHECK-NEXT: call void @__msan_warning_noreturn()
10581096
; CHECK-NEXT: unreachable
1059-
; CHECK: 13:
1097+
; CHECK: 24:
10601098
; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A3:%.*]], <4 x i32> [[A2]])
10611099
; CHECK-NEXT: store <4 x i32> [[TMP10]], ptr @__msan_retval_tls, align 8
10621100
; CHECK-NEXT: ret <4 x float> [[RES1]]
@@ -1073,16 +1111,39 @@ define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a
10731111
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
10741112
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
10751113
; CHECK-NEXT: call void @llvm.donothing()
1114+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP2]], i64 0
1115+
; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP3]], 7
1116+
; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP3]], [[TMP27]]
1117+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP2]], i64 1
1118+
; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP6]], 7
1119+
; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP6]], [[TMP7]]
1120+
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP2]], i64 2
1121+
; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[TMP9]], 7
1122+
; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP9]], [[TMP10]]
1123+
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i32> [[TMP2]], i64 3
1124+
; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 7
1125+
; CHECK-NEXT: [[TMP14:%.*]] = or i32 [[TMP12]], [[TMP13]]
1126+
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[TMP2]], i64 4
1127+
; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 7
1128+
; CHECK-NEXT: [[TMP17:%.*]] = or i32 [[TMP15]], [[TMP16]]
1129+
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i32> [[TMP2]], i64 5
1130+
; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 7
1131+
; CHECK-NEXT: [[TMP20:%.*]] = or i32 [[TMP18]], [[TMP19]]
1132+
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP2]], i64 6
1133+
; CHECK-NEXT: [[TMP22:%.*]] = and i32 [[TMP21]], 7
1134+
; CHECK-NEXT: [[TMP23:%.*]] = or i32 [[TMP21]], [[TMP22]]
1135+
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[TMP2]], i64 7
1136+
; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP24]], 7
1137+
; CHECK-NEXT: [[TMP26:%.*]] = or i32 [[TMP24]], [[TMP25]]
10761138
; CHECK-NEXT: [[A0:%.*]] = bitcast <8 x i32> [[TMP1]] to <8 x float>
10771139
; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[A0]], <8 x i32> [[A1:%.*]])
10781140
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x float> [[RES]] to <8 x i32>
1079-
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
1080-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP6]], 0
1081-
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
1082-
; CHECK: 7:
1141+
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP26]], 0
1142+
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP30:%.*]], label [[TMP31:%.*]], !prof [[PROF1]]
1143+
; CHECK: 30:
10831144
; CHECK-NEXT: call void @__msan_warning_noreturn()
10841145
; CHECK-NEXT: unreachable
1085-
; CHECK: 8:
1146+
; CHECK: 31:
10861147
; CHECK-NEXT: [[RES1:%.*]] = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[A2:%.*]], <8 x i32> [[A1]])
10871148
; CHECK-NEXT: store <8 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
10881149
; CHECK-NEXT: ret <8 x float> [[RES1]]

0 commit comments

Comments
 (0)