Skip to content

Commit e73bff8

Browse files
authored
[AMDGPU] New RegBankSelect: Handle all 32/64 bit pointer types for B32/B64 rule (#142560)
The previous system explicitly enumerated the types. P0 was missing and thus we couldn't handle a select of P0s for example. Generalize the logic to simply check the width of the pointer for 32/64 bit pointers, this should handle all common address spaces
1 parent 50a7511 commit e73bff8

File tree

2 files changed

+199
-5
lines changed

2 files changed

+199
-5
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -199,13 +199,11 @@ UniformityLLTOpPredicateID LLTToId(LLT Ty) {
199199

200200
UniformityLLTOpPredicateID LLTToBId(LLT Ty) {
201201
if (Ty == LLT::scalar(32) || Ty == LLT::fixed_vector(2, 16) ||
202-
Ty == LLT::pointer(3, 32) || Ty == LLT::pointer(5, 32) ||
203-
Ty == LLT::pointer(6, 32))
202+
(Ty.isPointer() && Ty.getSizeInBits() == 32))
204203
return B32;
205204
if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) ||
206-
Ty == LLT::fixed_vector(4, 16) || Ty == LLT::pointer(1, 64) ||
207-
Ty == LLT::pointer(4, 64) ||
208-
(Ty.isPointer() && Ty.getAddressSpace() > AMDGPUAS::MAX_AMDGPU_ADDRESS))
205+
Ty == LLT::fixed_vector(4, 16) ||
206+
(Ty.isPointer() && Ty.getSizeInBits() == 64))
209207
return B64;
210208
if (Ty == LLT::fixed_vector(3, 32))
211209
return B96;

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-select.mir

Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -896,6 +896,31 @@ body: |
896896
%5:_(<4 x s16>) = G_SELECT %4, %2, %3
897897
...
898898

899+
---
900+
name: select_p0_scc_ss
901+
legalized: true
902+
body: |
903+
bb.0:
904+
liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5
905+
; CHECK-LABEL: name: select_p0_scc_ss
906+
; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5
907+
; CHECK-NEXT: {{ $}}
908+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
909+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
910+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(p0) = COPY $sgpr2_sgpr3
911+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(p0) = COPY $sgpr4_sgpr5
912+
; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
913+
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
914+
; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[ICMP]], [[C]]
915+
; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(p0) = G_SELECT [[AND]](s32), [[COPY2]], [[COPY3]]
916+
%0:_(s32) = COPY $sgpr0
917+
%1:_(s32) = COPY $sgpr1
918+
%2:_(p0) = COPY $sgpr2_sgpr3
919+
%3:_(p0) = COPY $sgpr4_sgpr5
920+
%4:_(s1) = G_ICMP intpred(ne), %0, %1
921+
%5:_(p0) = G_SELECT %4, %2, %3
922+
...
923+
899924
---
900925
name: select_p1_scc_ss
901926
legalized: true
@@ -946,6 +971,36 @@ body: |
946971
%5:_(p999) = G_SELECT %4, %2, %3
947972
...
948973

974+
---
975+
name: select_p0_scc_sv
976+
legalized: true
977+
body: |
978+
bb.0:
979+
liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1
980+
; CHECK-LABEL: name: select_p0_scc_sv
981+
; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1
982+
; CHECK-NEXT: {{ $}}
983+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
984+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
985+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(p0) = COPY $sgpr2_sgpr3
986+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
987+
; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
988+
; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[ICMP]](s32)
989+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(p0) = COPY [[COPY2]](p0)
990+
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY4]](p0)
991+
; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p0)
992+
; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[AMDGPU_COPY_VCC_SCC]](s1), [[UV]], [[UV2]]
993+
; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[AMDGPU_COPY_VCC_SCC]](s1), [[UV1]], [[UV3]]
994+
; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p0) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32)
995+
%0:_(s32) = COPY $sgpr0
996+
%1:_(s32) = COPY $sgpr1
997+
%2:_(p0) = COPY $sgpr2_sgpr3
998+
%3:_(p0) = COPY $vgpr0_vgpr1
999+
%4:_(s1) = G_ICMP intpred(ne), %0, %1
1000+
%5:_(p0) = G_SELECT %4, %2, %3
1001+
1002+
...
1003+
9491004
---
9501005
name: select_p1_scc_sv
9511006
legalized: true
@@ -976,6 +1031,35 @@ body: |
9761031
9771032
...
9781033

1034+
---
1035+
name: select_p0_scc_vs
1036+
legalized: true
1037+
body: |
1038+
bb.0:
1039+
liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1
1040+
; CHECK-LABEL: name: select_p0_scc_vs
1041+
; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1
1042+
; CHECK-NEXT: {{ $}}
1043+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
1044+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
1045+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(p0) = COPY $sgpr2_sgpr3
1046+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
1047+
; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
1048+
; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[ICMP]](s32)
1049+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(p0) = COPY [[COPY2]](p0)
1050+
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p0)
1051+
; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY4]](p0)
1052+
; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[AMDGPU_COPY_VCC_SCC]](s1), [[UV]], [[UV2]]
1053+
; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[AMDGPU_COPY_VCC_SCC]](s1), [[UV1]], [[UV3]]
1054+
; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p0) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32)
1055+
%0:_(s32) = COPY $sgpr0
1056+
%1:_(s32) = COPY $sgpr1
1057+
%2:_(p0) = COPY $sgpr2_sgpr3
1058+
%3:_(p0) = COPY $vgpr0_vgpr1
1059+
%4:_(s1) = G_ICMP intpred(ne), %0, %1
1060+
%5:_(p0) = G_SELECT %4, %3, %2
1061+
...
1062+
9791063
---
9801064
name: select_p1_scc_vs
9811065
legalized: true
@@ -1033,6 +1117,35 @@ body: |
10331117
%5:_(p1) = G_SELECT %4, %2, %3
10341118
...
10351119

1120+
---
1121+
name: select_p0_vcc_ss
1122+
legalized: true
1123+
body: |
1124+
bb.0:
1125+
liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $vgpr0, $vgpr1
1126+
; CHECK-LABEL: name: select_p0_vcc_ss
1127+
; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $vgpr0, $vgpr1
1128+
; CHECK-NEXT: {{ $}}
1129+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1
1130+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(p0) = COPY $sgpr2_sgpr3
1131+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
1132+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
1133+
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[COPY3]]
1134+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0)
1135+
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(p0) = COPY [[COPY1]](p0)
1136+
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY4]](p0)
1137+
; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY5]](p0)
1138+
; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]]
1139+
; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]]
1140+
; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p0) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32)
1141+
%0:_(p0) = COPY $sgpr0_sgpr1
1142+
%1:_(p0) = COPY $sgpr2_sgpr3
1143+
%2:_(s32) = COPY $vgpr0
1144+
%3:_(s32) = COPY $vgpr1
1145+
%4:_(s1) = G_ICMP intpred(ne), %2, %3
1146+
%5:_(p0) = G_SELECT %4, %0, %1
1147+
...
1148+
10361149
---
10371150
name: select_p1_vcc_ss
10381151
legalized: true
@@ -1062,6 +1175,34 @@ body: |
10621175
%5:_(p1) = G_SELECT %4, %0, %1
10631176
...
10641177

1178+
---
1179+
name: select_p0_vcc_sv
1180+
legalized: true
1181+
body: |
1182+
bb.0:
1183+
liveins: $sgpr0_sgpr1, $vgpr0, $vgpr1, $vgpr2_vgpr3
1184+
; CHECK-LABEL: name: select_p0_vcc_sv
1185+
; CHECK: liveins: $sgpr0_sgpr1, $vgpr0, $vgpr1, $vgpr2_vgpr3
1186+
; CHECK-NEXT: {{ $}}
1187+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1
1188+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
1189+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
1190+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3
1191+
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]]
1192+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0)
1193+
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY4]](p0)
1194+
; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p0)
1195+
; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]]
1196+
; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]]
1197+
; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p0) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32)
1198+
%0:_(p0) = COPY $sgpr0_sgpr1
1199+
%1:_(s32) = COPY $vgpr0
1200+
%2:_(s32) = COPY $vgpr1
1201+
%3:_(p0) = COPY $vgpr2_vgpr3
1202+
%4:_(s1) = G_ICMP intpred(ne), %1, %2
1203+
%5:_(p0) = G_SELECT %4, %0, %3
1204+
...
1205+
10651206
---
10661207
name: select_p1_vcc_sv
10671208
legalized: true
@@ -1090,6 +1231,34 @@ body: |
10901231
%5:_(p1) = G_SELECT %4, %0, %3
10911232
...
10921233

1234+
---
1235+
name: select_p0_vcc_vs
1236+
legalized: true
1237+
body: |
1238+
bb.0:
1239+
liveins: $sgpr0_sgpr1, $vgpr0, $vgpr1, $vgpr2_vgpr3
1240+
; CHECK-LABEL: name: select_p0_vcc_vs
1241+
; CHECK: liveins: $sgpr0_sgpr1, $vgpr0, $vgpr1, $vgpr2_vgpr3
1242+
; CHECK-NEXT: {{ $}}
1243+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1
1244+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
1245+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
1246+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3
1247+
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]]
1248+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0)
1249+
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p0)
1250+
; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY4]](p0)
1251+
; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]]
1252+
; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]]
1253+
; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p0) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32)
1254+
%0:_(p0) = COPY $sgpr0_sgpr1
1255+
%1:_(s32) = COPY $vgpr0
1256+
%2:_(s32) = COPY $vgpr1
1257+
%3:_(p0) = COPY $vgpr2_vgpr3
1258+
%4:_(s1) = G_ICMP intpred(ne), %1, %2
1259+
%5:_(p0) = G_SELECT %4, %3, %0
1260+
...
1261+
10931262
---
10941263
name: select_p1_vcc_vs
10951264
legalized: true
@@ -1118,6 +1287,33 @@ body: |
11181287
%5:_(p1) = G_SELECT %4, %3, %0
11191288
...
11201289

1290+
---
1291+
name: select_p0_vcc_vv
1292+
legalized: true
1293+
body: |
1294+
bb.0:
1295+
liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
1296+
; CHECK-LABEL: name: select_p0_vcc_vv
1297+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
1298+
; CHECK-NEXT: {{ $}}
1299+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
1300+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
1301+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3
1302+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(p0) = COPY $vgpr4_vgpr5
1303+
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
1304+
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](p0)
1305+
; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p0)
1306+
; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]]
1307+
; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]]
1308+
; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p0) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32)
1309+
%0:_(s32) = COPY $vgpr0
1310+
%1:_(s32) = COPY $vgpr1
1311+
%2:_(p0) = COPY $vgpr2_vgpr3
1312+
%3:_(p0) = COPY $vgpr4_vgpr5
1313+
%4:_(s1) = G_ICMP intpred(ne), %0, %1
1314+
%5:_(p0) = G_SELECT %4, %2, %3
1315+
...
1316+
11211317
---
11221318
name: select_p1_vcc_vv
11231319
legalized: true

0 commit comments

Comments
 (0)