From 7e3815bafdcba79ad0f0f66b29b6d15ddf796d13 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Sat, 28 Jun 2025 13:55:04 +0530 Subject: [PATCH 01/19] add prefetch tensormap variant --- llvm/docs/NVPTXUsage.rst | 8 ++++++- llvm/include/llvm/IR/IntrinsicsNVVM.td | 4 ++++ llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 9 +++++++ llvm/test/CodeGen/NVPTX/prefetch.ll | 30 ++++++++++++++++++++++++ 4 files changed, 50 insertions(+), 1 deletion(-) diff --git a/llvm/docs/NVPTXUsage.rst b/llvm/docs/NVPTXUsage.rst index 11017fe4e01b4..ca951811b73dd 100644 --- a/llvm/docs/NVPTXUsage.rst +++ b/llvm/docs/NVPTXUsage.rst @@ -971,6 +971,9 @@ Syntax: declare void @llvm.nvvm.prefetch.L1(ptr %ptr) declare void @llvm.nvvm.prefetch.L2(ptr %ptr) + declare void @llvm.nvvm.prefetch.tensormap.p0(ptr %ptr) + declare void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) %const_ptr) + declare void @llvm.nvvm.prefetch.global.L2.evict.normal(ptr addrspace(1) %global_ptr) declare void @llvm.nvvm.prefetch.global.L2.evict.last(ptr addrspace(1) %global_ptr) @@ -983,7 +986,10 @@ The '``@llvm.nvvm.prefetch.*``' and '``@llvm.nvvm.prefetchu.*``' intrinsic correspond to the '``prefetch.*``;' and '``prefetchu.*``' family of PTX instructions. The '``prefetch.*``' instructions bring the cache line containing the specified address in global or local memory address space into the -specified cache level (L1 or L2). The '`prefetchu.*``' instruction brings the cache line +specified cache level (L1 or L2). If the '``.tensormap``' qualifier is specified then the +prefetch instruction brings the cache line containing the specified address in the +'``.const``' or '``.param memory``' state space for subsequent use by the '``cp.async.bulk.tensor``' +instruction.The '`prefetchu.*``' instruction brings the cache line containing the specified generic address into the specified uniform cache level. If no address space is specified, it is assumed to be generic address. The intrinsic uses and eviction priority which can be accessed by the '``.level::eviction_priority``' modifier. diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 0375f29ad8906..0678bba51e4a3 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -137,6 +137,7 @@ def llvm_global_ptr_ty : LLVMQualPointerType<1>; // (global)ptr def llvm_shared_ptr_ty : LLVMQualPointerType<3>; // (shared)ptr +def llvm_constant_ptr_ty: LLVMQualPointerType<4>; // (const)ptr def llvm_local_ptr_ty : LLVMQualPointerType<5>; // (local)ptr def llvm_tmem_ptr_ty : LLVMQualPointerType<6>; // (tensor memory)ptr def llvm_shared_cluster_ptr_ty : LLVMQualPointerType<7>; // (shared_cluster)ptr @@ -2092,6 +2093,9 @@ let IntrProperties = [IntrArgMemOnly, ReadOnly>, NoCapture; } + def int_nvvm_prefetch_tensormap_p0: DefaultAttrsIntrinsic<[], [llvm_ptr_ty]>; + def int_nvvm_prefetch_tensormap_p4 : DefaultAttrsIntrinsic<[], [llvm_constant_ptr_ty]>; + foreach eviction_priority = ["evict_normal", "evict_last"] in def int_nvvm_prefetch_global_L2_ # eviction_priority : Intrinsic<[], [llvm_global_ptr_ty]>; diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index cc1fd027d8515..8afc7063c363a 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -760,6 +760,15 @@ def PREFETCH_GLOBAL_L1 : PREFETCH_INTRS<"prefetch.global.L1">; def PREFETCH_LOCAL_L1 : PREFETCH_INTRS<"prefetch.local.L1">; def PREFETCH_GLOBAL_L2 : PREFETCH_INTRS<"prefetch.global.L2">; def PREFETCH_LOCAL_L2 : PREFETCH_INTRS<"prefetch.local.L2">; +def PREFETCH_CONST_TENSORMAP : BasicNVPTXInst<(outs), (ins ADDR:$addr), + "prefetch.const.tensormap", + [(int_nvvm_prefetch_tensormap_p4 addr:$addr)]>, + Requires<[hasPTX<80>, hasSM<90>]>; + +def PREFETCH_GENERIC_TENSORMAP : BasicNVPTXInst<(outs), (ins ADDR:$addr), + "prefetch.tensormap", + [(int_nvvm_prefetch_tensormap_p0 addr:$addr)]>, + Requires<[hasPTX<80>, hasSM<90>]>; def PREFETCH_GLOBAL_L2_EVICT_NORMAL : BasicNVPTXInst<(outs), (ins ADDR:$addr), "prefetch.global.L2::evict_normal", diff --git a/llvm/test/CodeGen/NVPTX/prefetch.ll b/llvm/test/CodeGen/NVPTX/prefetch.ll index a64e4fe7a508e..b63155ff49185 100644 --- a/llvm/test/CodeGen/NVPTX/prefetch.ll +++ b/llvm/test/CodeGen/NVPTX/prefetch.ll @@ -12,6 +12,9 @@ declare void @llvm.nvvm.prefetch.local.L2(ptr addrspace(5) %local_ptr) declare void @llvm.nvvm.prefetch.L1(ptr %ptr) declare void @llvm.nvvm.prefetch.L2(ptr %ptr) +declare void @llvm.nvvm.prefetch.tensormap.p0(ptr %ptr) +declare void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) %const_ptr) + declare void @llvm.nvvm.prefetch.global.L2.evict.normal(ptr addrspace(1) %global_ptr) declare void @llvm.nvvm.prefetch.global.L2.evict.last(ptr addrspace(1) %global_ptr) @@ -67,6 +70,33 @@ define void @prefetch_(ptr %ptr) { ret void } + +define void @prefetch_generic_tensormap(ptr %ptr) { +; CHECK-PTX64-LABEL: prefetch_generic_tensormap( +; CHECK-PTX64: { +; CHECK-PTX64-NEXT: .reg .b64 %rd<2>; +; CHECK-PTX64-EMPTY: +; CHECK-PTX64-NEXT: // %bb.0: +; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [prefetch_generic_tensormap_param_0]; +; CHECK-PTX64-NEXT: prefetch.tensormap [%rd1]; +; CHECK-PTX64-NEXT: ret; + tail call void @llvm.nvvm.prefetch.tensormap.p0(ptr %ptr) + ret void +} + +define void @prefetch_const_tensormap(ptr addrspace(4) %const_ptr) { +; CHECK-PTX64-LABEL: prefetch_const_tensormap( +; CHECK-PTX64: { +; CHECK-PTX64-NEXT: .reg .b64 %rd<2>; +; CHECK-PTX64-EMPTY: +; CHECK-PTX64-NEXT: // %bb.0: +; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [prefetch_const_tensormap_param_0]; +; CHECK-PTX64-NEXT: prefetch.const.tensormap [%rd1]; +; CHECK-PTX64-NEXT: ret; + tail call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) %const_ptr) + ret void +} + define void @prefetchu_l1(ptr %ptr) { ; CHECK-PTX64-LABEL: prefetchu_l1( ; CHECK-PTX64: { From 775daa341731a301e9cfbe544685962e5c7122fc Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Tue, 1 Jul 2025 12:04:37 +0530 Subject: [PATCH 02/19] use generic and const names --- llvm/docs/NVPTXUsage.rst | 4 ++-- llvm/include/llvm/IR/IntrinsicsNVVM.td | 4 ++-- llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 8 ++------ llvm/test/CodeGen/NVPTX/prefetch.ll | 8 ++++---- 4 files changed, 10 insertions(+), 14 deletions(-) diff --git a/llvm/docs/NVPTXUsage.rst b/llvm/docs/NVPTXUsage.rst index ca951811b73dd..33d36b9411c1a 100644 --- a/llvm/docs/NVPTXUsage.rst +++ b/llvm/docs/NVPTXUsage.rst @@ -971,8 +971,8 @@ Syntax: declare void @llvm.nvvm.prefetch.L1(ptr %ptr) declare void @llvm.nvvm.prefetch.L2(ptr %ptr) - declare void @llvm.nvvm.prefetch.tensormap.p0(ptr %ptr) - declare void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) %const_ptr) + declare void @llvm.nvvm.prefetch.generic.tensormap(ptr %ptr) + declare void @llvm.nvvm.prefetch.const.tensormap(ptr addrspace(4) %const_ptr) declare void @llvm.nvvm.prefetch.global.L2.evict.normal(ptr addrspace(1) %global_ptr) declare void @llvm.nvvm.prefetch.global.L2.evict.last(ptr addrspace(1) %global_ptr) diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 0678bba51e4a3..c8df95994011b 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -2093,8 +2093,8 @@ let IntrProperties = [IntrArgMemOnly, ReadOnly>, NoCapture; } - def int_nvvm_prefetch_tensormap_p0: DefaultAttrsIntrinsic<[], [llvm_ptr_ty]>; - def int_nvvm_prefetch_tensormap_p4 : DefaultAttrsIntrinsic<[], [llvm_constant_ptr_ty]>; + def int_nvvm_prefetch_generic_tensormap : DefaultAttrsIntrinsic<[], [llvm_ptr_ty]>; + def int_nvvm_prefetch_const_tensormap: DefaultAttrsIntrinsic<[], [llvm_constant_ptr_ty]>; foreach eviction_priority = ["evict_normal", "evict_last"] in def int_nvvm_prefetch_global_L2_ # eviction_priority : Intrinsic<[], [llvm_global_ptr_ty]>; diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 8afc7063c363a..d8446b4b4dbe6 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -760,14 +760,10 @@ def PREFETCH_GLOBAL_L1 : PREFETCH_INTRS<"prefetch.global.L1">; def PREFETCH_LOCAL_L1 : PREFETCH_INTRS<"prefetch.local.L1">; def PREFETCH_GLOBAL_L2 : PREFETCH_INTRS<"prefetch.global.L2">; def PREFETCH_LOCAL_L2 : PREFETCH_INTRS<"prefetch.local.L2">; -def PREFETCH_CONST_TENSORMAP : BasicNVPTXInst<(outs), (ins ADDR:$addr), - "prefetch.const.tensormap", - [(int_nvvm_prefetch_tensormap_p4 addr:$addr)]>, - Requires<[hasPTX<80>, hasSM<90>]>; - +def PREFETCH_CONST_TENSORMAP : PREFETCH_INTRS<"prefetch.const.tensormap">; def PREFETCH_GENERIC_TENSORMAP : BasicNVPTXInst<(outs), (ins ADDR:$addr), "prefetch.tensormap", - [(int_nvvm_prefetch_tensormap_p0 addr:$addr)]>, + [(int_nvvm_prefetch_generic_tensormap addr:$addr)]>, Requires<[hasPTX<80>, hasSM<90>]>; def PREFETCH_GLOBAL_L2_EVICT_NORMAL : BasicNVPTXInst<(outs), (ins ADDR:$addr), diff --git a/llvm/test/CodeGen/NVPTX/prefetch.ll b/llvm/test/CodeGen/NVPTX/prefetch.ll index b63155ff49185..d9b4e48167310 100644 --- a/llvm/test/CodeGen/NVPTX/prefetch.ll +++ b/llvm/test/CodeGen/NVPTX/prefetch.ll @@ -12,8 +12,8 @@ declare void @llvm.nvvm.prefetch.local.L2(ptr addrspace(5) %local_ptr) declare void @llvm.nvvm.prefetch.L1(ptr %ptr) declare void @llvm.nvvm.prefetch.L2(ptr %ptr) -declare void @llvm.nvvm.prefetch.tensormap.p0(ptr %ptr) -declare void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) %const_ptr) +declare void @llvm.nvvm.prefetch.generic.tensormap(ptr %ptr) +declare void @llvm.nvvm.prefetch.const.tensormap(ptr addrspace(4) %const_ptr) declare void @llvm.nvvm.prefetch.global.L2.evict.normal(ptr addrspace(1) %global_ptr) declare void @llvm.nvvm.prefetch.global.L2.evict.last(ptr addrspace(1) %global_ptr) @@ -80,7 +80,7 @@ define void @prefetch_generic_tensormap(ptr %ptr) { ; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [prefetch_generic_tensormap_param_0]; ; CHECK-PTX64-NEXT: prefetch.tensormap [%rd1]; ; CHECK-PTX64-NEXT: ret; - tail call void @llvm.nvvm.prefetch.tensormap.p0(ptr %ptr) + tail call void @llvm.nvvm.prefetch.generic.tensormap(ptr %ptr) ret void } @@ -93,7 +93,7 @@ define void @prefetch_const_tensormap(ptr addrspace(4) %const_ptr) { ; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [prefetch_const_tensormap_param_0]; ; CHECK-PTX64-NEXT: prefetch.const.tensormap [%rd1]; ; CHECK-PTX64-NEXT: ret; - tail call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) %const_ptr) + tail call void @llvm.nvvm.prefetch.const.tensormap(ptr addrspace(4) %const_ptr) ret void } From aa8e4d019341f2bde555e2cc5de636c865ecea22 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Tue, 1 Jul 2025 12:06:26 +0530 Subject: [PATCH 03/19] format --- llvm/include/llvm/IR/IntrinsicsNVVM.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index c8df95994011b..a26a35bb0d947 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -2093,8 +2093,8 @@ let IntrProperties = [IntrArgMemOnly, ReadOnly>, NoCapture; } - def int_nvvm_prefetch_generic_tensormap : DefaultAttrsIntrinsic<[], [llvm_ptr_ty]>; - def int_nvvm_prefetch_const_tensormap: DefaultAttrsIntrinsic<[], [llvm_constant_ptr_ty]>; + def int_nvvm_prefetch_generic_tensormap : DefaultAttrsIntrinsic<[], [llvm_ptr_ty]>; + def int_nvvm_prefetch_const_tensormap : DefaultAttrsIntrinsic<[], [llvm_constant_ptr_ty]>; foreach eviction_priority = ["evict_normal", "evict_last"] in def int_nvvm_prefetch_global_L2_ # eviction_priority : Intrinsic<[], [llvm_global_ptr_ty]>; From 6931c80342ccc5acfc2101d93b61c10cf79051d0 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Tue, 1 Jul 2025 13:23:17 +0530 Subject: [PATCH 04/19] refresh --- llvm/docs/NVPTXUsage.rst | 2 +- llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/docs/NVPTXUsage.rst b/llvm/docs/NVPTXUsage.rst index 33d36b9411c1a..2c7a531f34a8f 100644 --- a/llvm/docs/NVPTXUsage.rst +++ b/llvm/docs/NVPTXUsage.rst @@ -989,7 +989,7 @@ specified address in global or local memory address space into the specified cache level (L1 or L2). If the '``.tensormap``' qualifier is specified then the prefetch instruction brings the cache line containing the specified address in the '``.const``' or '``.param memory``' state space for subsequent use by the '``cp.async.bulk.tensor``' -instruction.The '`prefetchu.*``' instruction brings the cache line +instruction. The '`prefetchu.*``' instruction brings the cache line containing the specified generic address into the specified uniform cache level. If no address space is specified, it is assumed to be generic address. The intrinsic uses and eviction priority which can be accessed by the '``.level::eviction_priority``' modifier. diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index d8446b4b4dbe6..a4ee24aaf4ce3 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -760,11 +760,11 @@ def PREFETCH_GLOBAL_L1 : PREFETCH_INTRS<"prefetch.global.L1">; def PREFETCH_LOCAL_L1 : PREFETCH_INTRS<"prefetch.local.L1">; def PREFETCH_GLOBAL_L2 : PREFETCH_INTRS<"prefetch.global.L2">; def PREFETCH_LOCAL_L2 : PREFETCH_INTRS<"prefetch.local.L2">; -def PREFETCH_CONST_TENSORMAP : PREFETCH_INTRS<"prefetch.const.tensormap">; -def PREFETCH_GENERIC_TENSORMAP : BasicNVPTXInst<(outs), (ins ADDR:$addr), - "prefetch.tensormap", - [(int_nvvm_prefetch_generic_tensormap addr:$addr)]>, - Requires<[hasPTX<80>, hasSM<90>]>; +def PREFETCH_CONST_TENSORMAP : PREFETCH_INTRS<"prefetch.const.tensormap">; +def PREFETCH_GENERIC_TENSORMAP : BasicNVPTXInst<(outs), (ins ADDR:$addr), + "prefetch.tensormap", + [(int_nvvm_prefetch_generic_tensormap addr:$addr)]>, + Requires<[hasPTX<80>, hasSM<90>]>; def PREFETCH_GLOBAL_L2_EVICT_NORMAL : BasicNVPTXInst<(outs), (ins ADDR:$addr), "prefetch.global.L2::evict_normal", From 887e139b515d60936c834e6149c6ebded8ba860c Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Tue, 1 Jul 2025 13:39:41 +0530 Subject: [PATCH 05/19] refactor and refresh --- llvm/include/llvm/IR/IntrinsicsNVVM.td | 10 +++--- llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 41 ++++++++---------------- 2 files changed, 19 insertions(+), 32 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index a26a35bb0d947..1be79bb5525ae 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -2088,18 +2088,18 @@ foreach dim = 1...5 in { // Intrinsics for Prefetch and Prefetchu let IntrProperties = [IntrArgMemOnly, ReadOnly>, NoCapture>] in { foreach level = ["L1", "L2"] in { - def int_nvvm_prefetch_ # level : Intrinsic<[], [llvm_ptr_ty]>; - def int_nvvm_prefetch_global_ # level : Intrinsic<[], [llvm_global_ptr_ty]>; - def int_nvvm_prefetch_local_ # level : Intrinsic<[], [llvm_local_ptr_ty]>; + def int_nvvm_prefetch_ # level : DefaultAttrsIntrinsic<[], [llvm_ptr_ty]>; + def int_nvvm_prefetch_global_ # level : DefaultAttrsIntrinsic<[], [llvm_global_ptr_ty]>; + def int_nvvm_prefetch_local_ # level : DefaultAttrsIntrinsic<[], [llvm_local_ptr_ty]>; } def int_nvvm_prefetch_generic_tensormap : DefaultAttrsIntrinsic<[], [llvm_ptr_ty]>; def int_nvvm_prefetch_const_tensormap : DefaultAttrsIntrinsic<[], [llvm_constant_ptr_ty]>; foreach eviction_priority = ["evict_normal", "evict_last"] in - def int_nvvm_prefetch_global_L2_ # eviction_priority : Intrinsic<[], [llvm_global_ptr_ty]>; + def int_nvvm_prefetch_global_L2_ # eviction_priority : DefaultAttrsIntrinsic<[], [llvm_global_ptr_ty]>; - def int_nvvm_prefetchu_L1 : Intrinsic<[], [llvm_ptr_ty]>; + def int_nvvm_prefetchu_L1 : DefaultAttrsIntrinsic<[], [llvm_ptr_ty]>; } // applypriority diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index a4ee24aaf4ce3..58990bfc1f1a1 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -746,38 +746,25 @@ foreach dim = [1, 2, 3, 4, 5] in { //Prefetch and Prefetchu -class PREFETCH_INTRS : +class PREFETCH_INTRS : BasicNVPTXInst<(outs), (ins ADDR:$addr), InstName, - [(!cast(!strconcat("int_nvvm_", - !subst(".", "_", InstName))) addr:$addr)]>, + [(!cast(IntrName) addr:$addr)]>, Requires<[hasPTX<80>, hasSM<90>]>; -def PREFETCH_L1 : PREFETCH_INTRS<"prefetch.L1">; -def PREFETCH_L2 : PREFETCH_INTRS<"prefetch.L2">; -def PREFETCH_GLOBAL_L1 : PREFETCH_INTRS<"prefetch.global.L1">; -def PREFETCH_LOCAL_L1 : PREFETCH_INTRS<"prefetch.local.L1">; -def PREFETCH_GLOBAL_L2 : PREFETCH_INTRS<"prefetch.global.L2">; -def PREFETCH_LOCAL_L2 : PREFETCH_INTRS<"prefetch.local.L2">; -def PREFETCH_CONST_TENSORMAP : PREFETCH_INTRS<"prefetch.const.tensormap">; -def PREFETCH_GENERIC_TENSORMAP : BasicNVPTXInst<(outs), (ins ADDR:$addr), - "prefetch.tensormap", - [(int_nvvm_prefetch_generic_tensormap addr:$addr)]>, - Requires<[hasPTX<80>, hasSM<90>]>; - -def PREFETCH_GLOBAL_L2_EVICT_NORMAL : BasicNVPTXInst<(outs), (ins ADDR:$addr), - "prefetch.global.L2::evict_normal", - [(int_nvvm_prefetch_global_L2_evict_normal addr:$addr)]>, - Requires<[hasPTX<80>, hasSM<90>]>; - -def PREFETCH_GLOBAL_L2_EVICT_LAST : BasicNVPTXInst<(outs), (ins ADDR:$addr), - "prefetch.global.L2::evict_last", - [(int_nvvm_prefetch_global_L2_evict_last addr:$addr)]>, - Requires<[hasPTX<80>, hasSM<90>]>; - - -def PREFETCHU_L1 : PREFETCH_INTRS<"prefetchu.L1">; +def PREFETCH_L1 : PREFETCH_INTRS<"prefetch.L1", "int_nvvm_prefetch_L1">; +def PREFETCH_L2 : PREFETCH_INTRS<"prefetch.L2", "int_nvvm_prefetch_L2">; +def PREFETCH_GLOBAL_L1 : PREFETCH_INTRS<"prefetch.global.L1", "int_nvvm_prefetch_global_L1">; +def PREFETCH_LOCAL_L1 : PREFETCH_INTRS<"prefetch.local.L1", "int_nvvm_prefetch_local_L1">; +def PREFETCH_GLOBAL_L2 : PREFETCH_INTRS<"prefetch.global.L2", "int_nvvm_prefetch_global_L2">; +def PREFETCH_LOCAL_L2 : PREFETCH_INTRS<"prefetch.local.L2", "int_nvvm_prefetch_local_L2">; +def PREFETCH_CONST_TENSORMAP : PREFETCH_INTRS<"prefetch.const.tensormap", "int_nvvm_prefetch_const_tensormap">; +def PREFETCH_GENERIC_TENSORMAP : PREFETCH_INTRS<"prefetch.tensormap", "int_nvvm_prefetch_generic_tensormap">; +def PREFETCH_GLOBAL_L2_EVICT_NORMAL : PREFETCH_INTRS<"prefetch.global.L2::evict_normal", "int_nvvm_prefetch_global_L2_evict_normal">; +def PREFETCH_GLOBAL_L2_EVICT_LAST : PREFETCH_INTRS<"prefetch.global.L2::evict_last", "int_nvvm_prefetch_global_L2_evict_last">; + +def PREFETCHU_L1 : PREFETCH_INTRS<"prefetchu.L1", "int_nvvm_prefetchu_L1">; //Applypriority intrinsics class APPLYPRIORITY_L2_INTRS : From 9ddbcfe86d6282227b6462e09d16d31b82b831fb Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Tue, 1 Jul 2025 13:42:22 +0530 Subject: [PATCH 06/19] format --- llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 58990bfc1f1a1..e38a31b572415 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -759,10 +759,14 @@ def PREFETCH_GLOBAL_L1 : PREFETCH_INTRS<"prefetch.global.L1", "int_nvvm_prefetch def PREFETCH_LOCAL_L1 : PREFETCH_INTRS<"prefetch.local.L1", "int_nvvm_prefetch_local_L1">; def PREFETCH_GLOBAL_L2 : PREFETCH_INTRS<"prefetch.global.L2", "int_nvvm_prefetch_global_L2">; def PREFETCH_LOCAL_L2 : PREFETCH_INTRS<"prefetch.local.L2", "int_nvvm_prefetch_local_L2">; -def PREFETCH_CONST_TENSORMAP : PREFETCH_INTRS<"prefetch.const.tensormap", "int_nvvm_prefetch_const_tensormap">; -def PREFETCH_GENERIC_TENSORMAP : PREFETCH_INTRS<"prefetch.tensormap", "int_nvvm_prefetch_generic_tensormap">; -def PREFETCH_GLOBAL_L2_EVICT_NORMAL : PREFETCH_INTRS<"prefetch.global.L2::evict_normal", "int_nvvm_prefetch_global_L2_evict_normal">; -def PREFETCH_GLOBAL_L2_EVICT_LAST : PREFETCH_INTRS<"prefetch.global.L2::evict_last", "int_nvvm_prefetch_global_L2_evict_last">; +def PREFETCH_CONST_TENSORMAP : PREFETCH_INTRS<"prefetch.const.tensormap", + "int_nvvm_prefetch_const_tensormap">; +def PREFETCH_GENERIC_TENSORMAP : PREFETCH_INTRS<"prefetch.tensormap", + "int_nvvm_prefetch_generic_tensormap">; +def PREFETCH_GLOBAL_L2_EVICT_NORMAL : PREFETCH_INTRS<"prefetch.global.L2::evict_normal", + "int_nvvm_prefetch_global_L2_evict_normal">; +def PREFETCH_GLOBAL_L2_EVICT_LAST : PREFETCH_INTRS<"prefetch.global.L2::evict_last", + "int_nvvm_prefetch_global_L2_evict_last">; def PREFETCHU_L1 : PREFETCH_INTRS<"prefetchu.L1", "int_nvvm_prefetchu_L1">; From 383d07ee427c04f4299a3e8325b2cea274f4d747 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Tue, 1 Jul 2025 13:43:20 +0530 Subject: [PATCH 07/19] format --- llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index e38a31b572415..1bb869160fd1e 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -765,7 +765,7 @@ def PREFETCH_GENERIC_TENSORMAP : PREFETCH_INTRS<"prefetch.tensormap", "int_nvvm_prefetch_generic_tensormap">; def PREFETCH_GLOBAL_L2_EVICT_NORMAL : PREFETCH_INTRS<"prefetch.global.L2::evict_normal", "int_nvvm_prefetch_global_L2_evict_normal">; -def PREFETCH_GLOBAL_L2_EVICT_LAST : PREFETCH_INTRS<"prefetch.global.L2::evict_last", +def PREFETCH_GLOBAL_L2_EVICT_LAST : PREFETCH_INTRS<"prefetch.global.L2::evict_last", "int_nvvm_prefetch_global_L2_evict_last">; def PREFETCHU_L1 : PREFETCH_INTRS<"prefetchu.L1", "int_nvvm_prefetchu_L1">; From 0851ae04824843439cfc79215280a798ba238dc9 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Tue, 1 Jul 2025 13:44:22 +0530 Subject: [PATCH 08/19] format spaces --- llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 1bb869160fd1e..1e07c1bf62234 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -752,7 +752,6 @@ class PREFETCH_INTRS : [(!cast(IntrName) addr:$addr)]>, Requires<[hasPTX<80>, hasSM<90>]>; - def PREFETCH_L1 : PREFETCH_INTRS<"prefetch.L1", "int_nvvm_prefetch_L1">; def PREFETCH_L2 : PREFETCH_INTRS<"prefetch.L2", "int_nvvm_prefetch_L2">; def PREFETCH_GLOBAL_L1 : PREFETCH_INTRS<"prefetch.global.L1", "int_nvvm_prefetch_global_L1">; @@ -766,8 +765,7 @@ def PREFETCH_GENERIC_TENSORMAP : PREFETCH_INTRS<"prefetch.tensormap", def PREFETCH_GLOBAL_L2_EVICT_NORMAL : PREFETCH_INTRS<"prefetch.global.L2::evict_normal", "int_nvvm_prefetch_global_L2_evict_normal">; def PREFETCH_GLOBAL_L2_EVICT_LAST : PREFETCH_INTRS<"prefetch.global.L2::evict_last", - "int_nvvm_prefetch_global_L2_evict_last">; - + "int_nvvm_prefetch_global_L2_evict_last">; def PREFETCHU_L1 : PREFETCH_INTRS<"prefetchu.L1", "int_nvvm_prefetchu_L1">; //Applypriority intrinsics From 9ea1ca35c90748088cb5cbc0451e63cbd85c3e06 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Tue, 1 Jul 2025 13:50:11 +0530 Subject: [PATCH 09/19] refresh --- llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 30 ++++++++++++------------ 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 1e07c1bf62234..1dbafbb2885a7 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -744,29 +744,29 @@ foreach dim = [1, 2, 3, 4, 5] in { } } -//Prefetch and Prefetchu +//Prefetchu and Prefetch -class PREFETCH_INTRS : +class PREFETCH_INTRS : BasicNVPTXInst<(outs), (ins ADDR:$addr), InstName, - [(!cast(IntrName) addr:$addr)]>, + [(Intr addr:$addr)]>, Requires<[hasPTX<80>, hasSM<90>]>; - -def PREFETCH_L1 : PREFETCH_INTRS<"prefetch.L1", "int_nvvm_prefetch_L1">; -def PREFETCH_L2 : PREFETCH_INTRS<"prefetch.L2", "int_nvvm_prefetch_L2">; -def PREFETCH_GLOBAL_L1 : PREFETCH_INTRS<"prefetch.global.L1", "int_nvvm_prefetch_global_L1">; -def PREFETCH_LOCAL_L1 : PREFETCH_INTRS<"prefetch.local.L1", "int_nvvm_prefetch_local_L1">; -def PREFETCH_GLOBAL_L2 : PREFETCH_INTRS<"prefetch.global.L2", "int_nvvm_prefetch_global_L2">; -def PREFETCH_LOCAL_L2 : PREFETCH_INTRS<"prefetch.local.L2", "int_nvvm_prefetch_local_L2">; + +def PREFETCHU_L1 : PREFETCH_INTRS<"prefetchu.L1", int_nvvm_prefetchu_L1>; +def PREFETCH_L1 : PREFETCH_INTRS<"prefetch.L1", int_nvvm_prefetch_L1>; +def PREFETCH_L2 : PREFETCH_INTRS<"prefetch.L2", int_nvvm_prefetch_L2>; +def PREFETCH_GLOBAL_L1 : PREFETCH_INTRS<"prefetch.global.L1", int_nvvm_prefetch_global_L1>; +def PREFETCH_LOCAL_L1 : PREFETCH_INTRS<"prefetch.local.L1", int_nvvm_prefetch_local_L1>; +def PREFETCH_GLOBAL_L2 : PREFETCH_INTRS<"prefetch.global.L2", int_nvvm_prefetch_global_L2>; +def PREFETCH_LOCAL_L2 : PREFETCH_INTRS<"prefetch.local.L2", int_nvvm_prefetch_local_L2>; def PREFETCH_CONST_TENSORMAP : PREFETCH_INTRS<"prefetch.const.tensormap", - "int_nvvm_prefetch_const_tensormap">; + int_nvvm_prefetch_const_tensormap>; def PREFETCH_GENERIC_TENSORMAP : PREFETCH_INTRS<"prefetch.tensormap", - "int_nvvm_prefetch_generic_tensormap">; + int_nvvm_prefetch_generic_tensormap>; def PREFETCH_GLOBAL_L2_EVICT_NORMAL : PREFETCH_INTRS<"prefetch.global.L2::evict_normal", - "int_nvvm_prefetch_global_L2_evict_normal">; + int_nvvm_prefetch_global_L2_evict_normal>; def PREFETCH_GLOBAL_L2_EVICT_LAST : PREFETCH_INTRS<"prefetch.global.L2::evict_last", - "int_nvvm_prefetch_global_L2_evict_last">; -def PREFETCHU_L1 : PREFETCH_INTRS<"prefetchu.L1", "int_nvvm_prefetchu_L1">; + int_nvvm_prefetch_global_L2_evict_last>; //Applypriority intrinsics class APPLYPRIORITY_L2_INTRS : From 0d26914c6e78f8a8adf9e12fe788651a7d7b2857 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Tue, 1 Jul 2025 13:50:54 +0530 Subject: [PATCH 10/19] refresh --- llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 1dbafbb2885a7..1aecbcee5d093 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -756,7 +756,7 @@ def PREFETCHU_L1 : PREFETCH_INTRS<"prefetchu.L1", int_nvvm_prefetchu_L1>; def PREFETCH_L1 : PREFETCH_INTRS<"prefetch.L1", int_nvvm_prefetch_L1>; def PREFETCH_L2 : PREFETCH_INTRS<"prefetch.L2", int_nvvm_prefetch_L2>; def PREFETCH_GLOBAL_L1 : PREFETCH_INTRS<"prefetch.global.L1", int_nvvm_prefetch_global_L1>; -def PREFETCH_LOCAL_L1 : PREFETCH_INTRS<"prefetch.local.L1", int_nvvm_prefetch_local_L1>; +def PREFETCH_LOCAL_L1 : PREFETCH_INTRS<"prefetch.local.L1", int_nvvm_prefetch_local_L1>; def PREFETCH_GLOBAL_L2 : PREFETCH_INTRS<"prefetch.global.L2", int_nvvm_prefetch_global_L2>; def PREFETCH_LOCAL_L2 : PREFETCH_INTRS<"prefetch.local.L2", int_nvvm_prefetch_local_L2>; def PREFETCH_CONST_TENSORMAP : PREFETCH_INTRS<"prefetch.const.tensormap", From bab560bc685f3b1236eaf66de44f2fbbb0cf4544 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Fri, 4 Jul 2025 15:37:30 +0530 Subject: [PATCH 11/19] refresh with addrspace --- llvm/include/llvm/IR/IntrinsicsNVVM.td | 3 +- llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 60 +++++++++++++++++++++--- llvm/test/CodeGen/NVPTX/prefetch.ll | 36 +++++++++----- 3 files changed, 79 insertions(+), 20 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 1be79bb5525ae..303a807f408cf 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -2093,8 +2093,7 @@ let IntrProperties = [IntrArgMemOnly, ReadOnly>, NoCapture; } - def int_nvvm_prefetch_generic_tensormap : DefaultAttrsIntrinsic<[], [llvm_ptr_ty]>; - def int_nvvm_prefetch_const_tensormap : DefaultAttrsIntrinsic<[], [llvm_constant_ptr_ty]>; + def int_nvvm_prefetch_tensormap : DefaultAttrsIntrinsic<[], [llvm_anyptr_ty]>; foreach eviction_priority = ["evict_normal", "evict_last"] in def int_nvvm_prefetch_global_L2_ # eviction_priority : DefaultAttrsIntrinsic<[], [llvm_global_ptr_ty]>; diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 1aecbcee5d093..1fd7a94dd0f19 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -39,6 +39,12 @@ def AS_match { code global = [{ return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL); }]; + code const = [{ + return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_CONST); + }]; + code param = [{ + return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_PARAM); + }]; } // A node that will be replaced with the current PTX version. @@ -744,13 +750,57 @@ foreach dim = [1, 2, 3, 4, 5] in { } } -//Prefetchu and Prefetch +//Prefetchu and Prefetch + +class PREFETCH_CONST_CHK + : PatFraggetOperand(2).getNode(); + auto *MemNode = dyn_cast(Addr); + bool result = MemNode->getMemOperand()->getAddrSpace() == llvm::ADDRESS_SPACE_CONST; + return result; + }]>; + + +class PREFETCH_GENERIC_CHK + : PatFraggetOperand(2).getNode(); + auto *MemNode = dyn_cast(Addr); + bool result= MemNode->getMemOperand()->getAddrSpace() == llvm::ADDRESS_SPACE_GENERIC; + return result; + }]>; + + +class PREFETCH_PARAM_CHK + : PatFraggetOperand(2).getNode(); + auto *MemNode = dyn_cast(Addr); + bool result = MemNode->getMemOperand()->getAddrSpace() == llvm::ADDRESS_SPACE_PARAM; + return result; + }]>; +defvar frag_pat = (int_nvvm_prefetch_tensormap node:$addr); + +def prefetch_tensormap_const : PREFETCH_CONST_CHK; +def prefetch_tensormap_gen : PREFETCH_GENERIC_CHK; +def prefetch_tensormap_param : PREFETCH_PARAM_CHK; + +def PREFETCH_CONST_TENSORMAP : NVPTXInst<(outs), (ins ADDR:$addr), + "prefetch.const.tensormap [$addr];", + [(prefetch_tensormap_const addr:$addr)]>; + +def PREFETCH_GENERIC_TENSORMAP : NVPTXInst<(outs), (ins ADDR:$addr), + "prefetch.tensormap [$addr];", + [(prefetch_tensormap_gen addr:$addr)]>; + +def PREFETCH_PARAM_TENSORMAP : NVPTXInst<(outs), (ins ADDR:$addr), + "prefetch.param.tensormap [$addr];", + [(prefetch_tensormap_param addr:$addr)]>; + + class PREFETCH_INTRS : BasicNVPTXInst<(outs), (ins ADDR:$addr), InstName, - [(Intr addr:$addr)]>, - Requires<[hasPTX<80>, hasSM<90>]>; + [(Intr addr:$addr)]>; def PREFETCHU_L1 : PREFETCH_INTRS<"prefetchu.L1", int_nvvm_prefetchu_L1>; def PREFETCH_L1 : PREFETCH_INTRS<"prefetch.L1", int_nvvm_prefetch_L1>; @@ -759,10 +809,6 @@ def PREFETCH_GLOBAL_L1 : PREFETCH_INTRS<"prefetch.global.L1", int_nvvm_prefetch_ def PREFETCH_LOCAL_L1 : PREFETCH_INTRS<"prefetch.local.L1", int_nvvm_prefetch_local_L1>; def PREFETCH_GLOBAL_L2 : PREFETCH_INTRS<"prefetch.global.L2", int_nvvm_prefetch_global_L2>; def PREFETCH_LOCAL_L2 : PREFETCH_INTRS<"prefetch.local.L2", int_nvvm_prefetch_local_L2>; -def PREFETCH_CONST_TENSORMAP : PREFETCH_INTRS<"prefetch.const.tensormap", - int_nvvm_prefetch_const_tensormap>; -def PREFETCH_GENERIC_TENSORMAP : PREFETCH_INTRS<"prefetch.tensormap", - int_nvvm_prefetch_generic_tensormap>; def PREFETCH_GLOBAL_L2_EVICT_NORMAL : PREFETCH_INTRS<"prefetch.global.L2::evict_normal", int_nvvm_prefetch_global_L2_evict_normal>; def PREFETCH_GLOBAL_L2_EVICT_LAST : PREFETCH_INTRS<"prefetch.global.L2::evict_last", diff --git a/llvm/test/CodeGen/NVPTX/prefetch.ll b/llvm/test/CodeGen/NVPTX/prefetch.ll index d9b4e48167310..4c1a6d94297e5 100644 --- a/llvm/test/CodeGen/NVPTX/prefetch.ll +++ b/llvm/test/CodeGen/NVPTX/prefetch.ll @@ -12,8 +12,9 @@ declare void @llvm.nvvm.prefetch.local.L2(ptr addrspace(5) %local_ptr) declare void @llvm.nvvm.prefetch.L1(ptr %ptr) declare void @llvm.nvvm.prefetch.L2(ptr %ptr) -declare void @llvm.nvvm.prefetch.generic.tensormap(ptr %ptr) -declare void @llvm.nvvm.prefetch.const.tensormap(ptr addrspace(4) %const_ptr) +declare void @llvm.nvvm.prefetch.tensormap.p0(ptr %ptr) +declare void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) %const_ptr) +declare void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101) %const_ptr) declare void @llvm.nvvm.prefetch.global.L2.evict.normal(ptr addrspace(1) %global_ptr) declare void @llvm.nvvm.prefetch.global.L2.evict.last(ptr addrspace(1) %global_ptr) @@ -70,9 +71,22 @@ define void @prefetch_(ptr %ptr) { ret void } +define void @prefetchu_l1(ptr %ptr) { +; CHECK-PTX64-LABEL: prefetchu_l1( +; CHECK-PTX64: { +; CHECK-PTX64-NEXT: .reg .b64 %rd<2>; +; CHECK-PTX64-EMPTY: +; CHECK-PTX64-NEXT: // %bb.0: +; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [prefetchu_l1_param_0]; +; CHECK-PTX64-NEXT: prefetchu.L1 [%rd1]; +; CHECK-PTX64-NEXT: ret; + tail call void @llvm.nvvm.prefetchu.L1(ptr %ptr) + ret void +} + -define void @prefetch_generic_tensormap(ptr %ptr) { -; CHECK-PTX64-LABEL: prefetch_generic_tensormap( +define void @prefetch_tensormap(ptr %ptr) { +; CHECK-PTX64-LABEL: prefetch_tensormap( ; CHECK-PTX64: { ; CHECK-PTX64-NEXT: .reg .b64 %rd<2>; ; CHECK-PTX64-EMPTY: @@ -80,7 +94,7 @@ define void @prefetch_generic_tensormap(ptr %ptr) { ; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [prefetch_generic_tensormap_param_0]; ; CHECK-PTX64-NEXT: prefetch.tensormap [%rd1]; ; CHECK-PTX64-NEXT: ret; - tail call void @llvm.nvvm.prefetch.generic.tensormap(ptr %ptr) + tail call void @llvm.nvvm.prefetch.tensormap.p0(ptr addrspace(0) %ptr) ret void } @@ -93,19 +107,19 @@ define void @prefetch_const_tensormap(ptr addrspace(4) %const_ptr) { ; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [prefetch_const_tensormap_param_0]; ; CHECK-PTX64-NEXT: prefetch.const.tensormap [%rd1]; ; CHECK-PTX64-NEXT: ret; - tail call void @llvm.nvvm.prefetch.const.tensormap(ptr addrspace(4) %const_ptr) + tail call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) %const_ptr) ret void } -define void @prefetchu_l1(ptr %ptr) { -; CHECK-PTX64-LABEL: prefetchu_l1( +define void @prefetch_param_tensormap(ptr addrspace(101) %const_ptr) { +; CHECK-PTX64-LABEL: prefetch_param_tensormap( ; CHECK-PTX64: { ; CHECK-PTX64-NEXT: .reg .b64 %rd<2>; ; CHECK-PTX64-EMPTY: ; CHECK-PTX64-NEXT: // %bb.0: -; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [prefetchu_l1_param_0]; -; CHECK-PTX64-NEXT: prefetchu.L1 [%rd1]; +; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [prefetch_const_tensormap_param_0]; +; CHECK-PTX64-NEXT: prefetch.param.tensormap [%rd1]; ; CHECK-PTX64-NEXT: ret; - tail call void @llvm.nvvm.prefetchu.L1(ptr %ptr) + tail call void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101) %const_ptr) ret void } \ No newline at end of file From f00929175181a96cbc52ea5e2e8b5467bca10af1 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Fri, 4 Jul 2025 16:01:09 +0530 Subject: [PATCH 12/19] update docs --- llvm/docs/NVPTXUsage.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/docs/NVPTXUsage.rst b/llvm/docs/NVPTXUsage.rst index 2c7a531f34a8f..ab2d1b4e7eaca 100644 --- a/llvm/docs/NVPTXUsage.rst +++ b/llvm/docs/NVPTXUsage.rst @@ -971,8 +971,9 @@ Syntax: declare void @llvm.nvvm.prefetch.L1(ptr %ptr) declare void @llvm.nvvm.prefetch.L2(ptr %ptr) - declare void @llvm.nvvm.prefetch.generic.tensormap(ptr %ptr) - declare void @llvm.nvvm.prefetch.const.tensormap(ptr addrspace(4) %const_ptr) + declare void @llvm.nvvm.prefetch.tensormap.p0(ptr %ptr) + declare void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) %const_ptr) + declare void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101) %param_ptr) declare void @llvm.nvvm.prefetch.global.L2.evict.normal(ptr addrspace(1) %global_ptr) declare void @llvm.nvvm.prefetch.global.L2.evict.last(ptr addrspace(1) %global_ptr) From 2457b1a683b40e6bb18880d445853ba1650c51e3 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Mon, 7 Jul 2025 14:38:08 +0530 Subject: [PATCH 13/19] add isel --- llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 11 +++++++++ llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 25 +++------------------ 2 files changed, 14 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index d9192fbfceff1..4ea75a69d5f58 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -3973,6 +3973,17 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( return true; } + case Intrinsic::nvvm_prefetch_tensormap: { + auto &DL = I.getDataLayout(); + Info.opc = ISD::INTRINSIC_VOID; + Info.memVT = getPointerTy(DL); + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable; + Info.align.reset(); + return true; + } + case Intrinsic::nvvm_ldu_global_i: case Intrinsic::nvvm_ldu_global_f: case Intrinsic::nvvm_ldu_global_p: { diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 1fd7a94dd0f19..43878b52f732c 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -753,30 +753,11 @@ foreach dim = [1, 2, 3, 4, 5] in { //Prefetchu and Prefetch class PREFETCH_CONST_CHK - : PatFraggetOperand(2).getNode(); - auto *MemNode = dyn_cast(Addr); - bool result = MemNode->getMemOperand()->getAddrSpace() == llvm::ADDRESS_SPACE_CONST; - return result; - }]>; - - + : PatFrag; class PREFETCH_GENERIC_CHK - : PatFraggetOperand(2).getNode(); - auto *MemNode = dyn_cast(Addr); - bool result= MemNode->getMemOperand()->getAddrSpace() == llvm::ADDRESS_SPACE_GENERIC; - return result; - }]>; - - + : PatFrag; class PREFETCH_PARAM_CHK - : PatFraggetOperand(2).getNode(); - auto *MemNode = dyn_cast(Addr); - bool result = MemNode->getMemOperand()->getAddrSpace() == llvm::ADDRESS_SPACE_PARAM; - return result; - }]>; + : PatFrag; defvar frag_pat = (int_nvvm_prefetch_tensormap node:$addr); From ee0e82c2bdeae915778116901e5f9c4352f4df7b Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Mon, 7 Jul 2025 15:45:28 +0530 Subject: [PATCH 14/19] add InferAS --- llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index 3ae2d9d5181a3..a825c16d5b07e 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -564,7 +564,8 @@ bool NVPTXTTIImpl::collectFlatAddressOperands(SmallVectorImpl &OpIndexes, case Intrinsic::nvvm_isspacep_global: case Intrinsic::nvvm_isspacep_local: case Intrinsic::nvvm_isspacep_shared: - case Intrinsic::nvvm_isspacep_shared_cluster: { + case Intrinsic::nvvm_isspacep_shared_cluster: + case Intrinsic::nvvm_prefetch_tensormap:{ OpIndexes.push_back(0); return true; } @@ -587,6 +588,9 @@ Value *NVPTXTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, return ConstantInt::get(II->getType(), *R); return nullptr; } + case Intrinsic::nvvm_prefetch_tensormap: { + return nullptr; + } } return nullptr; } From 45f5af020253997267f972a6e331b84d937b43ad Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Mon, 7 Jul 2025 17:45:30 +0530 Subject: [PATCH 15/19] refresh tests --- llvm/test/CodeGen/NVPTX/prefetch.ll | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/llvm/test/CodeGen/NVPTX/prefetch.ll b/llvm/test/CodeGen/NVPTX/prefetch.ll index 4c1a6d94297e5..862e26d704679 100644 --- a/llvm/test/CodeGen/NVPTX/prefetch.ll +++ b/llvm/test/CodeGen/NVPTX/prefetch.ll @@ -14,7 +14,7 @@ declare void @llvm.nvvm.prefetch.L2(ptr %ptr) declare void @llvm.nvvm.prefetch.tensormap.p0(ptr %ptr) declare void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) %const_ptr) -declare void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101) %const_ptr) +declare void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101) %param_ptr) declare void @llvm.nvvm.prefetch.global.L2.evict.normal(ptr addrspace(1) %global_ptr) declare void @llvm.nvvm.prefetch.global.L2.evict.last(ptr addrspace(1) %global_ptr) @@ -84,17 +84,16 @@ define void @prefetchu_l1(ptr %ptr) { ret void } - define void @prefetch_tensormap(ptr %ptr) { ; CHECK-PTX64-LABEL: prefetch_tensormap( ; CHECK-PTX64: { ; CHECK-PTX64-NEXT: .reg .b64 %rd<2>; ; CHECK-PTX64-EMPTY: ; CHECK-PTX64-NEXT: // %bb.0: -; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [prefetch_generic_tensormap_param_0]; +; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [prefetch_tensormap_param_0]; ; CHECK-PTX64-NEXT: prefetch.tensormap [%rd1]; ; CHECK-PTX64-NEXT: ret; - tail call void @llvm.nvvm.prefetch.tensormap.p0(ptr addrspace(0) %ptr) + tail call void @llvm.nvvm.prefetch.tensormap.p0(ptr %ptr) ret void } @@ -107,19 +106,19 @@ define void @prefetch_const_tensormap(ptr addrspace(4) %const_ptr) { ; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [prefetch_const_tensormap_param_0]; ; CHECK-PTX64-NEXT: prefetch.const.tensormap [%rd1]; ; CHECK-PTX64-NEXT: ret; - tail call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) %const_ptr) + tail call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) %const_ptr) ret void } -define void @prefetch_param_tensormap(ptr addrspace(101) %const_ptr) { +define void @prefetch_param_tensormap(ptr addrspace(101) %param_ptr) { ; CHECK-PTX64-LABEL: prefetch_param_tensormap( ; CHECK-PTX64: { ; CHECK-PTX64-NEXT: .reg .b64 %rd<2>; ; CHECK-PTX64-EMPTY: ; CHECK-PTX64-NEXT: // %bb.0: -; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [prefetch_const_tensormap_param_0]; +; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [prefetch_param_tensormap_param_0]; ; CHECK-PTX64-NEXT: prefetch.param.tensormap [%rd1]; ; CHECK-PTX64-NEXT: ret; - tail call void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101) %const_ptr) + tail call void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101) %param_ptr) ret void } \ No newline at end of file From ec1e1a09ada786248ccacf11d54c51ab9eca8868 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Mon, 7 Jul 2025 19:25:34 +0530 Subject: [PATCH 16/19] clang format --- llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 9 +++++---- llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 4ea75a69d5f58..f3b2329b302ae 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -3974,16 +3974,17 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( } case Intrinsic::nvvm_prefetch_tensormap: { - auto &DL = I.getDataLayout(); + auto &DL = I.getDataLayout(); Info.opc = ISD::INTRINSIC_VOID; - Info.memVT = getPointerTy(DL); + Info.memVT = getPointerTy(DL); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable; + Info.flags = + MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable; Info.align.reset(); return true; } - + case Intrinsic::nvvm_ldu_global_i: case Intrinsic::nvvm_ldu_global_f: case Intrinsic::nvvm_ldu_global_p: { diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index a825c16d5b07e..f6e20fdc630a9 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -565,7 +565,7 @@ bool NVPTXTTIImpl::collectFlatAddressOperands(SmallVectorImpl &OpIndexes, case Intrinsic::nvvm_isspacep_local: case Intrinsic::nvvm_isspacep_shared: case Intrinsic::nvvm_isspacep_shared_cluster: - case Intrinsic::nvvm_prefetch_tensormap:{ + case Intrinsic::nvvm_prefetch_tensormap: { OpIndexes.push_back(0); return true; } From 7256bc6d74cc8b3a7bf3c0d274b9d00e380bab8b Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Mon, 7 Jul 2025 19:42:36 +0530 Subject: [PATCH 17/19] clang-format --- llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 6 +++--- llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index f3b2329b302ae..d6c9bff9dc42b 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -3974,12 +3974,12 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( } case Intrinsic::nvvm_prefetch_tensormap: { - auto &DL = I.getDataLayout(); + auto &DL = I.getDataLayout(); Info.opc = ISD::INTRINSIC_VOID; - Info.memVT = getPointerTy(DL); + Info.memVT = getPointerTy(DL); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.flags = + Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable; Info.align.reset(); return true; diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index f6e20fdc630a9..74b6b83f513da 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -564,7 +564,7 @@ bool NVPTXTTIImpl::collectFlatAddressOperands(SmallVectorImpl &OpIndexes, case Intrinsic::nvvm_isspacep_global: case Intrinsic::nvvm_isspacep_local: case Intrinsic::nvvm_isspacep_shared: - case Intrinsic::nvvm_isspacep_shared_cluster: + case Intrinsic::nvvm_isspacep_shared_cluster: case Intrinsic::nvvm_prefetch_tensormap: { OpIndexes.push_back(0); return true; From 4745db5f4974fa019a44e3601a7b6e44d60350c8 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Mon, 7 Jul 2025 23:58:30 +0530 Subject: [PATCH 18/19] refresh 1 --- llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 28 ++++++++++-------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 43878b52f732c..c0cedfda6feff 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -752,30 +752,26 @@ foreach dim = [1, 2, 3, 4, 5] in { //Prefetchu and Prefetch -class PREFETCH_CONST_CHK - : PatFrag; -class PREFETCH_GENERIC_CHK - : PatFrag; -class PREFETCH_PARAM_CHK - : PatFrag; - defvar frag_pat = (int_nvvm_prefetch_tensormap node:$addr); -def prefetch_tensormap_const : PREFETCH_CONST_CHK; -def prefetch_tensormap_gen : PREFETCH_GENERIC_CHK; -def prefetch_tensormap_param : PREFETCH_PARAM_CHK; +def prefetch_tensormap_const : PatFrag; +def prefetch_tensormap_gen : PatFrag; +def prefetch_tensormap_param : PatFrag; -def PREFETCH_CONST_TENSORMAP : NVPTXInst<(outs), (ins ADDR:$addr), +def PREFETCH_CONST_TENSORMAP : BasicNVPTXInst<(outs), (ins ADDR:$addr), "prefetch.const.tensormap [$addr];", - [(prefetch_tensormap_const addr:$addr)]>; + [(prefetch_tensormap_const addr:$addr)]>, + Requires<[hasPTX<80>, hasSM<90>]>; -def PREFETCH_GENERIC_TENSORMAP : NVPTXInst<(outs), (ins ADDR:$addr), +def PREFETCH_GENERIC_TENSORMAP : BasicNVPTXInst<(outs), (ins ADDR:$addr), "prefetch.tensormap [$addr];", - [(prefetch_tensormap_gen addr:$addr)]>; + [(prefetch_tensormap_gen addr:$addr)]>, + Requires<[hasPTX<80>, hasSM<90>]>; -def PREFETCH_PARAM_TENSORMAP : NVPTXInst<(outs), (ins ADDR:$addr), +def PREFETCH_PARAM_TENSORMAP : BasicNVPTXInst<(outs), (ins ADDR:$addr), "prefetch.param.tensormap [$addr];", - [(prefetch_tensormap_param addr:$addr)]>; + [(prefetch_tensormap_param addr:$addr)]>, + Requires<[hasPTX<80>, hasSM<90>]>; class PREFETCH_INTRS : From 1102a58fc1bb83cf973e1c552a7f25f03d6a0588 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Tue, 8 Jul 2025 12:13:14 +0530 Subject: [PATCH 19/19] refresh 2 --- llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 24 ++++++++----------- .../Target/NVPTX/NVPTXTargetTransformInfo.cpp | 8 ++++++- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index c0cedfda6feff..4a306ce87fcc7 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -758,21 +758,17 @@ def prefetch_tensormap_const : PatFrag; def prefetch_tensormap_param : PatFrag; -def PREFETCH_CONST_TENSORMAP : BasicNVPTXInst<(outs), (ins ADDR:$addr), - "prefetch.const.tensormap [$addr];", - [(prefetch_tensormap_const addr:$addr)]>, - Requires<[hasPTX<80>, hasSM<90>]>; - -def PREFETCH_GENERIC_TENSORMAP : BasicNVPTXInst<(outs), (ins ADDR:$addr), - "prefetch.tensormap [$addr];", - [(prefetch_tensormap_gen addr:$addr)]>, - Requires<[hasPTX<80>, hasSM<90>]>; - -def PREFETCH_PARAM_TENSORMAP : BasicNVPTXInst<(outs), (ins ADDR:$addr), - "prefetch.param.tensormap [$addr];", - [(prefetch_tensormap_param addr:$addr)]>, - Requires<[hasPTX<80>, hasSM<90>]>; +multiclass PREFETCH_TENSORMAP_INST { + def "" : BasicNVPTXInst<(outs), (ins ADDR:$addr), + !strconcat("prefetch", !if(!eq(addrspace_name, ""), "", + !strconcat(".", addrspace_name)), ".tensormap [$addr];"), + [(pattern_frag addr:$addr)]>, + Requires<[hasPTX<80>, hasSM<90>]>; +} +defm PREFETCH_CONST_TENSORMAP : PREFETCH_TENSORMAP_INST<"const", prefetch_tensormap_const>; +defm PREFETCH_GENERIC_TENSORMAP : PREFETCH_TENSORMAP_INST<"", prefetch_tensormap_gen>; +defm PREFETCH_PARAM_TENSORMAP : PREFETCH_TENSORMAP_INST<"param", prefetch_tensormap_param>; class PREFETCH_INTRS : BasicNVPTXInst<(outs), (ins ADDR:$addr), diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index 74b6b83f513da..28cb4ad17580f 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -589,7 +589,13 @@ Value *NVPTXTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, return nullptr; } case Intrinsic::nvvm_prefetch_tensormap: { - return nullptr; + IRBuilder<> Builder(II); + Module *M = II->getModule(); + Function *NewIntrinsic = Intrinsic::getDeclaration( + M, Intrinsic::nvvm_prefetch_tensormap, {NewV->getType()}); + CallInst *NewCall = Builder.CreateCall(NewIntrinsic, {NewV}); + NewCall->setAttributes(II->getAttributes()); + return NewCall; } } return nullptr;