@@ -1256,18 +1256,39 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
1256
1256
EltVT = MVT::i64 ;
1257
1257
NumElts = 2 ;
1258
1258
}
1259
+
1260
+ std::optional<unsigned > Opcode;
1261
+
1259
1262
if (EltVT.isVector ()) {
1260
1263
NumElts = EltVT.getVectorNumElements ();
1261
1264
EltVT = EltVT.getVectorElementType ();
1262
- // vectors of 8/16bits type are loaded/stored as multiples of v4i8/v2x16
1263
- // elements.
1265
+ // vectors of 8/16/32bits type are loaded/stored as multiples of
1266
+ // v4i8/v2x16/v2x32 elements.
1264
1267
if ((EltVT == MVT::f32 && OrigType == MVT::v2f32) ||
1265
1268
(EltVT == MVT::f16 && OrigType == MVT::v2f16) ||
1266
1269
(EltVT == MVT::bf16 && OrigType == MVT::v2bf16) ||
1267
1270
(EltVT == MVT::i16 && OrigType == MVT::v2i16) ||
1268
1271
(EltVT == MVT::i8 && OrigType == MVT::v4i8)) {
1269
1272
assert (NumElts % OrigType.getVectorNumElements () == 0 &&
1270
1273
" NumElts must be divisible by the number of elts in subvectors" );
1274
+ if (N->getOpcode () == ISD::LOAD ||
1275
+ N->getOpcode () == ISD::INTRINSIC_W_CHAIN) {
1276
+ switch (OrigType.getSimpleVT ().SimpleTy ) {
1277
+ case MVT::v2f32:
1278
+ Opcode = N->getOpcode () == ISD::LOAD ? NVPTX::INT_PTX_LDG_GLOBAL_i64
1279
+ : NVPTX::INT_PTX_LDU_GLOBAL_i64;
1280
+ break ;
1281
+ case MVT::v2f16:
1282
+ case MVT::v2bf16:
1283
+ case MVT::v2i16:
1284
+ case MVT::v4i8:
1285
+ Opcode = N->getOpcode () == ISD::LOAD ? NVPTX::INT_PTX_LDG_GLOBAL_i32
1286
+ : NVPTX::INT_PTX_LDU_GLOBAL_i32;
1287
+ break ;
1288
+ default :
1289
+ llvm_unreachable (" Unhandled packed vector type" );
1290
+ }
1291
+ }
1271
1292
EltVT = OrigType;
1272
1293
NumElts /= OrigType.getVectorNumElements ();
1273
1294
}
@@ -1287,57 +1308,58 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
1287
1308
SelectADDR (Op1, Base, Offset);
1288
1309
SDValue Ops[] = {Base, Offset, Chain};
1289
1310
1290
- std::optional<unsigned > Opcode;
1291
- switch (N->getOpcode ()) {
1292
- default :
1293
- return false ;
1294
- case ISD::LOAD:
1295
- Opcode = pickOpcodeForVT (
1296
- EltVT.getSimpleVT ().SimpleTy , NVPTX::INT_PTX_LDG_GLOBAL_i8,
1297
- NVPTX::INT_PTX_LDG_GLOBAL_i16, NVPTX::INT_PTX_LDG_GLOBAL_i32,
1298
- NVPTX::INT_PTX_LDG_GLOBAL_i64, NVPTX::INT_PTX_LDG_GLOBAL_f32,
1299
- NVPTX::INT_PTX_LDG_GLOBAL_f64);
1300
- break ;
1301
- case ISD::INTRINSIC_W_CHAIN:
1302
- Opcode = pickOpcodeForVT (
1303
- EltVT.getSimpleVT ().SimpleTy , NVPTX::INT_PTX_LDU_GLOBAL_i8,
1304
- NVPTX::INT_PTX_LDU_GLOBAL_i16, NVPTX::INT_PTX_LDU_GLOBAL_i32,
1305
- NVPTX::INT_PTX_LDU_GLOBAL_i64, NVPTX::INT_PTX_LDU_GLOBAL_f32,
1306
- NVPTX::INT_PTX_LDU_GLOBAL_f64);
1307
- break ;
1308
- case NVPTXISD::LoadV2:
1309
- Opcode = pickOpcodeForVT (
1310
- EltVT.getSimpleVT ().SimpleTy , NVPTX::INT_PTX_LDG_G_v2i8_ELE,
1311
- NVPTX::INT_PTX_LDG_G_v2i16_ELE, NVPTX::INT_PTX_LDG_G_v2i32_ELE,
1312
- NVPTX::INT_PTX_LDG_G_v2i64_ELE, NVPTX::INT_PTX_LDG_G_v2f32_ELE,
1313
- NVPTX::INT_PTX_LDG_G_v2f64_ELE);
1314
- break ;
1315
- case NVPTXISD::LDUV2:
1316
- Opcode = pickOpcodeForVT (
1317
- EltVT.getSimpleVT ().SimpleTy , NVPTX::INT_PTX_LDU_G_v2i8_ELE,
1318
- NVPTX::INT_PTX_LDU_G_v2i16_ELE, NVPTX::INT_PTX_LDU_G_v2i32_ELE,
1319
- NVPTX::INT_PTX_LDU_G_v2i64_ELE, NVPTX::INT_PTX_LDU_G_v2f32_ELE,
1320
- NVPTX::INT_PTX_LDU_G_v2f64_ELE);
1321
- break ;
1322
- case NVPTXISD::LoadV4:
1323
- Opcode = pickOpcodeForVT (
1324
- EltVT.getSimpleVT ().SimpleTy , NVPTX::INT_PTX_LDG_G_v4i8_ELE,
1325
- NVPTX::INT_PTX_LDG_G_v4i16_ELE, NVPTX::INT_PTX_LDG_G_v4i32_ELE,
1326
- NVPTX::INT_PTX_LDG_G_v4i64_ELE, NVPTX::INT_PTX_LDG_G_v4f32_ELE,
1327
- NVPTX::INT_PTX_LDG_G_v4f64_ELE);
1328
- break ;
1329
- case NVPTXISD::LDUV4:
1330
- Opcode = pickOpcodeForVT (
1331
- EltVT.getSimpleVT ().SimpleTy , NVPTX::INT_PTX_LDU_G_v4i8_ELE,
1332
- NVPTX::INT_PTX_LDU_G_v4i16_ELE, NVPTX::INT_PTX_LDU_G_v4i32_ELE,
1333
- {/* no v4i64 */ }, NVPTX::INT_PTX_LDU_G_v4f32_ELE, {/* no v4f64 */ });
1334
- break ;
1335
- case NVPTXISD::LoadV8:
1336
- Opcode = pickOpcodeForVT (EltVT.getSimpleVT ().SimpleTy , {/* no v8i8 */ },
1337
- {/* no v8i16 */ }, NVPTX::INT_PTX_LDG_G_v8i32_ELE,
1338
- {/* no v8i64 */ }, NVPTX::INT_PTX_LDG_G_v8f32_ELE,
1339
- {/* no v8f64 */ });
1340
- break ;
1311
+ if (!Opcode) {
1312
+ switch (N->getOpcode ()) {
1313
+ default :
1314
+ return false ;
1315
+ case ISD::LOAD:
1316
+ Opcode = pickOpcodeForVT (
1317
+ EltVT.getSimpleVT ().SimpleTy , NVPTX::INT_PTX_LDG_GLOBAL_i8,
1318
+ NVPTX::INT_PTX_LDG_GLOBAL_i16, NVPTX::INT_PTX_LDG_GLOBAL_i32,
1319
+ NVPTX::INT_PTX_LDG_GLOBAL_i64, NVPTX::INT_PTX_LDG_GLOBAL_f32,
1320
+ NVPTX::INT_PTX_LDG_GLOBAL_f64);
1321
+ break ;
1322
+ case ISD::INTRINSIC_W_CHAIN:
1323
+ Opcode = pickOpcodeForVT (
1324
+ EltVT.getSimpleVT ().SimpleTy , NVPTX::INT_PTX_LDU_GLOBAL_i8,
1325
+ NVPTX::INT_PTX_LDU_GLOBAL_i16, NVPTX::INT_PTX_LDU_GLOBAL_i32,
1326
+ NVPTX::INT_PTX_LDU_GLOBAL_i64, NVPTX::INT_PTX_LDU_GLOBAL_f32,
1327
+ NVPTX::INT_PTX_LDU_GLOBAL_f64);
1328
+ break ;
1329
+ case NVPTXISD::LoadV2:
1330
+ Opcode = pickOpcodeForVT (
1331
+ EltVT.getSimpleVT ().SimpleTy , NVPTX::INT_PTX_LDG_G_v2i8_ELE,
1332
+ NVPTX::INT_PTX_LDG_G_v2i16_ELE, NVPTX::INT_PTX_LDG_G_v2i32_ELE,
1333
+ NVPTX::INT_PTX_LDG_G_v2i64_ELE, NVPTX::INT_PTX_LDG_G_v2f32_ELE,
1334
+ NVPTX::INT_PTX_LDG_G_v2f64_ELE);
1335
+ break ;
1336
+ case NVPTXISD::LDUV2:
1337
+ Opcode = pickOpcodeForVT (
1338
+ EltVT.getSimpleVT ().SimpleTy , NVPTX::INT_PTX_LDU_G_v2i8_ELE,
1339
+ NVPTX::INT_PTX_LDU_G_v2i16_ELE, NVPTX::INT_PTX_LDU_G_v2i32_ELE,
1340
+ NVPTX::INT_PTX_LDU_G_v2i64_ELE, NVPTX::INT_PTX_LDU_G_v2f32_ELE,
1341
+ NVPTX::INT_PTX_LDU_G_v2f64_ELE);
1342
+ break ;
1343
+ case NVPTXISD::LoadV4:
1344
+ Opcode = pickOpcodeForVT (
1345
+ EltVT.getSimpleVT ().SimpleTy , NVPTX::INT_PTX_LDG_G_v4i8_ELE,
1346
+ NVPTX::INT_PTX_LDG_G_v4i16_ELE, NVPTX::INT_PTX_LDG_G_v4i32_ELE,
1347
+ NVPTX::INT_PTX_LDG_G_v4i64_ELE, NVPTX::INT_PTX_LDG_G_v4f32_ELE,
1348
+ NVPTX::INT_PTX_LDG_G_v4f64_ELE);
1349
+ break ;
1350
+ case NVPTXISD::LDUV4:
1351
+ Opcode = pickOpcodeForVT (
1352
+ EltVT.getSimpleVT ().SimpleTy , NVPTX::INT_PTX_LDU_G_v4i8_ELE,
1353
+ NVPTX::INT_PTX_LDU_G_v4i16_ELE, NVPTX::INT_PTX_LDU_G_v4i32_ELE,
1354
+ {/* no v4i64 */ }, NVPTX::INT_PTX_LDU_G_v4f32_ELE, {/* no v4f64 */ });
1355
+ break ;
1356
+ case NVPTXISD::LoadV8:
1357
+ Opcode = pickOpcodeForVT (EltVT.getSimpleVT ().SimpleTy , {/* no v8i8 */ },
1358
+ {/* no v8i16 */ }, NVPTX::INT_PTX_LDG_G_v8i32_ELE,
1359
+ {/* no v8i64 */ }, NVPTX::INT_PTX_LDG_G_v8f32_ELE,
1360
+ {/* no v8f64 */ });
1361
+ break ;
1362
+ }
1341
1363
}
1342
1364
if (!Opcode)
1343
1365
return false ;
0 commit comments