@@ -7,9 +7,8 @@ define i32 @test_and(<16 x i32> %a, ptr %b) {
7
7
; CHECK-LABEL: @test_and(
8
8
; CHECK-NEXT: entry:
9
9
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[B:%.*]], align 1
10
- ; CHECK-NEXT: [[TMP0:%.*]] = trunc <16 x i32> [[A:%.*]] to <16 x i8>
11
- ; CHECK-NEXT: [[TMP1:%.*]] = and <16 x i8> [[WIDE_LOAD]], [[TMP0]]
12
- ; CHECK-NEXT: [[TMP2:%.*]] = zext <16 x i8> [[TMP1]] to <16 x i32>
10
+ ; CHECK-NEXT: [[TMP0:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
11
+ ; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i32> [[TMP0]], [[A:%.*]]
13
12
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP2]])
14
13
; CHECK-NEXT: ret i32 [[TMP3]]
15
14
;
@@ -26,9 +25,8 @@ define i32 @test_mask_or(<16 x i32> %a, ptr %b) {
26
25
; CHECK-NEXT: entry:
27
26
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[B:%.*]], align 1
28
27
; CHECK-NEXT: [[A_MASKED:%.*]] = and <16 x i32> [[A:%.*]], splat (i32 16)
29
- ; CHECK-NEXT: [[TMP0:%.*]] = trunc <16 x i32> [[A_MASKED]] to <16 x i8>
30
- ; CHECK-NEXT: [[TMP1:%.*]] = or <16 x i8> [[WIDE_LOAD]], [[TMP0]]
31
- ; CHECK-NEXT: [[TMP2:%.*]] = zext <16 x i8> [[TMP1]] to <16 x i32>
28
+ ; CHECK-NEXT: [[TMP0:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
29
+ ; CHECK-NEXT: [[TMP2:%.*]] = or <16 x i32> [[TMP0]], [[A_MASKED]]
32
30
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP2]])
33
31
; CHECK-NEXT: ret i32 [[TMP3]]
34
32
;
@@ -47,15 +45,13 @@ define i32 @multiuse(<16 x i32> %u, <16 x i32> %v, ptr %b) {
47
45
; CHECK-NEXT: [[U_MASKED:%.*]] = and <16 x i32> [[U:%.*]], splat (i32 255)
48
46
; CHECK-NEXT: [[V_MASKED:%.*]] = and <16 x i32> [[V:%.*]], splat (i32 255)
49
47
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[B:%.*]], align 1
50
- ; CHECK-NEXT: [[TMP0:%.*]] = lshr <16 x i8> [[WIDE_LOAD]], splat (i8 4)
51
- ; CHECK-NEXT: [[TMP1:%.*]] = trunc <16 x i32> [[V_MASKED]] to <16 x i8>
52
- ; CHECK-NEXT: [[TMP2:%.*]] = or <16 x i8> [[TMP0]], [[TMP1]]
53
- ; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i32>
54
- ; CHECK-NEXT: [[TMP4:%.*]] = and <16 x i8> [[WIDE_LOAD]], splat (i8 15)
55
- ; CHECK-NEXT: [[TMP5:%.*]] = trunc <16 x i32> [[U_MASKED]] to <16 x i8>
56
- ; CHECK-NEXT: [[TMP6:%.*]] = or <16 x i8> [[TMP4]], [[TMP5]]
48
+ ; CHECK-NEXT: [[TMP0:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
49
+ ; CHECK-NEXT: [[TMP6:%.*]] = lshr <16 x i8> [[WIDE_LOAD]], splat (i8 4)
57
50
; CHECK-NEXT: [[TMP7:%.*]] = zext <16 x i8> [[TMP6]] to <16 x i32>
58
- ; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw <16 x i32> [[TMP3]], [[TMP7]]
51
+ ; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i32> [[TMP7]], [[V_MASKED]]
52
+ ; CHECK-NEXT: [[TMP4:%.*]] = and <16 x i32> [[TMP0]], splat (i32 15)
53
+ ; CHECK-NEXT: [[TMP5:%.*]] = or <16 x i32> [[TMP4]], [[U_MASKED]]
54
+ ; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw <16 x i32> [[TMP3]], [[TMP5]]
59
55
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP8]])
60
56
; CHECK-NEXT: ret i32 [[TMP9]]
61
57
;
@@ -81,9 +77,8 @@ define i32 @phi_bug(<16 x i32> %a, ptr %b) {
81
77
; CHECK: vector.body:
82
78
; CHECK-NEXT: [[A_PHI:%.*]] = phi <16 x i32> [ [[A:%.*]], [[ENTRY:%.*]] ]
83
79
; CHECK-NEXT: [[WIDE_LOAD_PHI:%.*]] = phi <16 x i8> [ [[WIDE_LOAD]], [[ENTRY]] ]
84
- ; CHECK-NEXT: [[TMP0:%.*]] = trunc <16 x i32> [[A_PHI]] to <16 x i8>
85
- ; CHECK-NEXT: [[TMP1:%.*]] = and <16 x i8> [[WIDE_LOAD_PHI]], [[TMP0]]
86
- ; CHECK-NEXT: [[TMP2:%.*]] = zext <16 x i8> [[TMP1]] to <16 x i32>
80
+ ; CHECK-NEXT: [[TMP0:%.*]] = zext <16 x i8> [[WIDE_LOAD_PHI]] to <16 x i32>
81
+ ; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i32> [[TMP0]], [[A_PHI]]
87
82
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP2]])
88
83
; CHECK-NEXT: ret i32 [[TMP3]]
89
84
;
0 commit comments