@@ -1072,35 +1072,37 @@ define amdgpu_kernel void @v8i8_phi_zeroinit(ptr addrspace(1) %src1, ptr addrspa
1072
1072
; FEATURE-NEXT: v_lshlrev_b32_e32 v5, 3, v0
1073
1073
; FEATURE-NEXT: v_cmp_lt_u32_e64 s[0:1], 14, v0
1074
1074
; FEATURE-NEXT: v_cmp_gt_u32_e32 vcc, 15, v0
1075
- ; FEATURE-NEXT: ; implicit-def: $vgpr3_vgpr4
1075
+ ; FEATURE-NEXT: ; implicit-def: $vgpr1_vgpr2
1076
1076
; FEATURE-NEXT: s_waitcnt lgkmcnt(0)
1077
- ; FEATURE-NEXT: global_load_dwordx2 v[1:2 ], v5, s[8:9]
1077
+ ; FEATURE-NEXT: global_load_dwordx2 v[3:4 ], v5, s[8:9]
1078
1078
; FEATURE-NEXT: s_and_saveexec_b64 s[2:3], vcc
1079
1079
; FEATURE-NEXT: s_cbranch_execz .LBB9_2
1080
1080
; FEATURE-NEXT: ; %bb.1: ; %bb.1
1081
- ; FEATURE-NEXT: global_load_dwordx2 v[3:4], v5, s[10:11]
1081
+ ; FEATURE-NEXT: global_load_dwordx2 v[1:2], v5, s[10:11]
1082
+ ; FEATURE-NEXT: s_mov_b32 s4, 0
1082
1083
; FEATURE-NEXT: v_cmp_gt_u32_e32 vcc, 7, v0
1084
+ ; FEATURE-NEXT: s_mov_b32 s5, s4
1083
1085
; FEATURE-NEXT: s_waitcnt vmcnt(1)
1084
- ; FEATURE-NEXT: v_mov_b32_e32 v1, 0
1086
+ ; FEATURE-NEXT: v_mov_b32_e32 v3, s4
1087
+ ; FEATURE-NEXT: v_mov_b32_e32 v4, s5
1085
1088
; FEATURE-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
1086
1089
; FEATURE-NEXT: s_and_b64 s[4:5], vcc, exec
1087
- ; FEATURE-NEXT: v_mov_b32_e32 v2, v1
1088
1090
; FEATURE-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
1089
1091
; FEATURE-NEXT: .LBB9_2: ; %Flow
1090
1092
; FEATURE-NEXT: s_or_b64 exec, exec, s[2:3]
1091
1093
; FEATURE-NEXT: s_and_saveexec_b64 s[2:3], s[0:1]
1092
1094
; FEATURE-NEXT: s_cbranch_execz .LBB9_4
1093
1095
; FEATURE-NEXT: ; %bb.3: ; %bb.2
1094
1096
; FEATURE-NEXT: s_waitcnt vmcnt(0)
1095
- ; FEATURE-NEXT: v_mov_b32_e32 v4, v2
1097
+ ; FEATURE-NEXT: v_mov_b32_e32 v1, v3
1096
1098
; FEATURE-NEXT: v_mov_b32_e32 v0, 0
1097
- ; FEATURE-NEXT: v_mov_b32_e32 v3, v1
1098
- ; FEATURE-NEXT: global_store_dwordx2 v0, v[1:2 ], s[12:13]
1099
+ ; FEATURE-NEXT: v_mov_b32_e32 v2, v4
1100
+ ; FEATURE-NEXT: global_store_dwordx2 v0, v[3:4 ], s[12:13]
1099
1101
; FEATURE-NEXT: .LBB9_4: ; %bb.3
1100
1102
; FEATURE-NEXT: s_or_b64 exec, exec, s[2:3]
1101
1103
; FEATURE-NEXT: v_mov_b32_e32 v0, 0
1102
1104
; FEATURE-NEXT: s_waitcnt vmcnt(0)
1103
- ; FEATURE-NEXT: global_store_dwordx2 v0, v[3:4 ], s[14:15]
1105
+ ; FEATURE-NEXT: global_store_dwordx2 v0, v[1:2 ], s[14:15]
1104
1106
; FEATURE-NEXT: s_endpgm
1105
1107
;
1106
1108
; DEFAULT-LABEL: v8i8_phi_zeroinit:
@@ -1109,35 +1111,37 @@ define amdgpu_kernel void @v8i8_phi_zeroinit(ptr addrspace(1) %src1, ptr addrspa
1109
1111
; DEFAULT-NEXT: v_lshlrev_b32_e32 v5, 3, v0
1110
1112
; DEFAULT-NEXT: v_cmp_lt_u32_e64 s[0:1], 14, v0
1111
1113
; DEFAULT-NEXT: v_cmp_gt_u32_e32 vcc, 15, v0
1112
- ; DEFAULT-NEXT: ; implicit-def: $vgpr3_vgpr4
1114
+ ; DEFAULT-NEXT: ; implicit-def: $vgpr1_vgpr2
1113
1115
; DEFAULT-NEXT: s_waitcnt lgkmcnt(0)
1114
- ; DEFAULT-NEXT: global_load_dwordx2 v[1:2 ], v5, s[8:9]
1116
+ ; DEFAULT-NEXT: global_load_dwordx2 v[3:4 ], v5, s[8:9]
1115
1117
; DEFAULT-NEXT: s_and_saveexec_b64 s[2:3], vcc
1116
1118
; DEFAULT-NEXT: s_cbranch_execz .LBB9_2
1117
1119
; DEFAULT-NEXT: ; %bb.1: ; %bb.1
1118
- ; DEFAULT-NEXT: global_load_dwordx2 v[3:4], v5, s[10:11]
1120
+ ; DEFAULT-NEXT: global_load_dwordx2 v[1:2], v5, s[10:11]
1121
+ ; DEFAULT-NEXT: s_mov_b32 s4, 0
1119
1122
; DEFAULT-NEXT: v_cmp_gt_u32_e32 vcc, 7, v0
1123
+ ; DEFAULT-NEXT: s_mov_b32 s5, s4
1120
1124
; DEFAULT-NEXT: s_waitcnt vmcnt(1)
1121
- ; DEFAULT-NEXT: v_mov_b32_e32 v1, 0
1125
+ ; DEFAULT-NEXT: v_mov_b32_e32 v3, s4
1126
+ ; DEFAULT-NEXT: v_mov_b32_e32 v4, s5
1122
1127
; DEFAULT-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
1123
1128
; DEFAULT-NEXT: s_and_b64 s[4:5], vcc, exec
1124
- ; DEFAULT-NEXT: v_mov_b32_e32 v2, v1
1125
1129
; DEFAULT-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
1126
1130
; DEFAULT-NEXT: .LBB9_2: ; %Flow
1127
1131
; DEFAULT-NEXT: s_or_b64 exec, exec, s[2:3]
1128
1132
; DEFAULT-NEXT: s_and_saveexec_b64 s[2:3], s[0:1]
1129
1133
; DEFAULT-NEXT: s_cbranch_execz .LBB9_4
1130
1134
; DEFAULT-NEXT: ; %bb.3: ; %bb.2
1131
1135
; DEFAULT-NEXT: s_waitcnt vmcnt(0)
1132
- ; DEFAULT-NEXT: v_mov_b32_e32 v4, v2
1136
+ ; DEFAULT-NEXT: v_mov_b32_e32 v1, v3
1133
1137
; DEFAULT-NEXT: v_mov_b32_e32 v0, 0
1134
- ; DEFAULT-NEXT: v_mov_b32_e32 v3, v1
1135
- ; DEFAULT-NEXT: global_store_dwordx2 v0, v[1:2 ], s[12:13]
1138
+ ; DEFAULT-NEXT: v_mov_b32_e32 v2, v4
1139
+ ; DEFAULT-NEXT: global_store_dwordx2 v0, v[3:4 ], s[12:13]
1136
1140
; DEFAULT-NEXT: .LBB9_4: ; %bb.3
1137
1141
; DEFAULT-NEXT: s_or_b64 exec, exec, s[2:3]
1138
1142
; DEFAULT-NEXT: v_mov_b32_e32 v0, 0
1139
1143
; DEFAULT-NEXT: s_waitcnt vmcnt(0)
1140
- ; DEFAULT-NEXT: global_store_dwordx2 v0, v[3:4 ], s[14:15]
1144
+ ; DEFAULT-NEXT: global_store_dwordx2 v0, v[1:2 ], s[14:15]
1141
1145
; DEFAULT-NEXT: s_endpgm
1142
1146
entry:
1143
1147
%idx = call i32 @llvm.amdgcn.workitem.id.x ()
@@ -1277,14 +1281,14 @@ define amdgpu_kernel void @v8i8_phi_const(ptr addrspace(1) %src1, ptr addrspace(
1277
1281
; FEATURE-NEXT: v_cmp_gt_u32_e32 vcc, 7, v0
1278
1282
; FEATURE-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
1279
1283
; FEATURE-NEXT: s_and_b64 s[4:5], vcc, exec
1280
- ; FEATURE-NEXT: v_mov_b32_e32 v5, 8
1281
- ; FEATURE-NEXT: v_mov_b32_e32 v6, 7
1282
- ; FEATURE-NEXT: v_mov_b32_e32 v7, 6
1283
- ; FEATURE-NEXT: v_mov_b32_e32 v2, 5
1284
- ; FEATURE-NEXT: v_mov_b32_e32 v8, 4
1285
- ; FEATURE-NEXT: v_mov_b32_e32 v9, 3
1286
- ; FEATURE-NEXT: v_mov_b32_e32 v10, 2
1287
1284
; FEATURE-NEXT: v_mov_b32_e32 v1, 1
1285
+ ; FEATURE-NEXT: v_mov_b32_e32 v10, 2
1286
+ ; FEATURE-NEXT: v_mov_b32_e32 v9, 3
1287
+ ; FEATURE-NEXT: v_mov_b32_e32 v8, 4
1288
+ ; FEATURE-NEXT: v_mov_b32_e32 v2, 5
1289
+ ; FEATURE-NEXT: v_mov_b32_e32 v7, 6
1290
+ ; FEATURE-NEXT: v_mov_b32_e32 v6, 7
1291
+ ; FEATURE-NEXT: v_mov_b32_e32 v5, 8
1288
1292
; FEATURE-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
1289
1293
; FEATURE-NEXT: s_waitcnt vmcnt(0)
1290
1294
; FEATURE-NEXT: v_lshrrev_b32_e32 v16, 24, v4
@@ -1363,14 +1367,14 @@ define amdgpu_kernel void @v8i8_phi_const(ptr addrspace(1) %src1, ptr addrspace(
1363
1367
; DEFAULT-NEXT: v_cmp_gt_u32_e32 vcc, 7, v0
1364
1368
; DEFAULT-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
1365
1369
; DEFAULT-NEXT: s_and_b64 s[4:5], vcc, exec
1366
- ; DEFAULT-NEXT: v_mov_b32_e32 v5, 8
1367
- ; DEFAULT-NEXT: v_mov_b32_e32 v6, 7
1368
- ; DEFAULT-NEXT: v_mov_b32_e32 v7, 6
1369
- ; DEFAULT-NEXT: v_mov_b32_e32 v2, 5
1370
- ; DEFAULT-NEXT: v_mov_b32_e32 v8, 4
1371
- ; DEFAULT-NEXT: v_mov_b32_e32 v9, 3
1372
- ; DEFAULT-NEXT: v_mov_b32_e32 v10, 2
1373
1370
; DEFAULT-NEXT: v_mov_b32_e32 v1, 1
1371
+ ; DEFAULT-NEXT: v_mov_b32_e32 v10, 2
1372
+ ; DEFAULT-NEXT: v_mov_b32_e32 v9, 3
1373
+ ; DEFAULT-NEXT: v_mov_b32_e32 v8, 4
1374
+ ; DEFAULT-NEXT: v_mov_b32_e32 v2, 5
1375
+ ; DEFAULT-NEXT: v_mov_b32_e32 v7, 6
1376
+ ; DEFAULT-NEXT: v_mov_b32_e32 v6, 7
1377
+ ; DEFAULT-NEXT: v_mov_b32_e32 v5, 8
1374
1378
; DEFAULT-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
1375
1379
; DEFAULT-NEXT: s_waitcnt vmcnt(0)
1376
1380
; DEFAULT-NEXT: v_lshrrev_b32_e32 v16, 24, v4
0 commit comments