Skip to content

Commit 472e239

Browse files
durga4githubtomtor
authored andcommitted
[NVPTX] Add pm_event intrinsics (llvm#141278)
This patch adds the pm_event.mask intrinsic and its clang-builtin. Signed-off-by: Durgadoss R <durgadossr@nvidia.com>
1 parent e5053b7 commit 472e239

File tree

6 files changed

+61
-0
lines changed

6 files changed

+61
-0
lines changed

clang/include/clang/Basic/BuiltinsNVPTX.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@ let Attributes = [NoReturn] in {
177177
}
178178
let Attributes = [NoThrow] in {
179179
def __nvvm_nanosleep : NVPTXBuiltinSMAndPTX<"void(unsigned int)", SM_70, PTX63>;
180+
def __nvvm_pm_event_mask : NVPTXBuiltin<"void(_Constant unsigned short)">;
180181
}
181182

182183
// Min Max

clang/test/CodeGen/builtins-nvptx.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -883,6 +883,13 @@ __device__ void nvvm_vote(int pred) {
883883
// CHECK: ret void
884884
}
885885

886+
// CHECK-LABEL: nvvm_pm_event_mask
887+
__device__ void nvvm_pm_event_mask() {
888+
// CHECK: call void @llvm.nvvm.pm.event.mask(i16 255)
889+
__nvvm_pm_event_mask(255);
890+
// CHECK: ret void
891+
}
892+
886893
// CHECK-LABEL: nvvm_nanosleep
887894
__device__ void nvvm_nanosleep(int d) {
888895
#if __CUDA_ARCH__ >= 700

llvm/docs/NVPTXUsage.rst

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1868,6 +1868,29 @@ If the request failed, the behavior of these intrinsics is undefined.
18681868

18691869
For more information, refer `PTX ISA <https://docs.nvidia.com/cuda/parallel-thread-execution/?a#parallel-synchronization-and-communication-instructions-clusterlaunchcontrol-query-cancel>`__.
18701870

1871+
Perf Monitor Event Intrinsics
1872+
-----------------------------
1873+
1874+
'``llvm.nvvm.pm.event.mask``' Intrinsic
1875+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1876+
1877+
Syntax:
1878+
"""""""
1879+
1880+
.. code-block:: llvm
1881+
1882+
declare void @llvm.nvvm.pm.event.mask(i16 immarg %mask_val)
1883+
1884+
Overview:
1885+
"""""""""
1886+
1887+
The '``llvm.nvvm.pm.event.mask``' intrinsic triggers one or more
1888+
performance monitor events. Each bit in the 16-bit immediate operand
1889+
``%mask_val`` controls an event.
1890+
1891+
For more information on the pmevent instructions, refer to the PTX ISA
1892+
`<https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#miscellaneous-instructions-pmevent>`_.
1893+
18711894
Other Intrinsics
18721895
----------------
18731896

llvm/include/llvm/IR/IntrinsicsNVVM.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -768,6 +768,11 @@ let TargetPrefix = "nvvm" in {
768768
DefaultAttrsIntrinsic<[], [llvm_i32_ty],
769769
[IntrConvergent, IntrNoMem, IntrHasSideEffects]>;
770770

771+
// Performance Monitor Events (pm events) intrinsics
772+
def int_nvvm_pm_event_mask : NVVMBuiltin,
773+
DefaultAttrsIntrinsic<[], [llvm_i16_ty],
774+
[IntrConvergent, IntrNoMem, IntrHasSideEffects,
775+
ImmArg<ArgIndex<0>>]>;
771776
//
772777
// Min Max
773778
//

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -997,6 +997,16 @@ def INT_NVVM_NANOSLEEP_I : BasicNVPTXInst<(outs), (ins i32imm:$i), "nanosleep.u3
997997
def INT_NVVM_NANOSLEEP_R : BasicNVPTXInst<(outs), (ins Int32Regs:$i), "nanosleep.u32",
998998
[(int_nvvm_nanosleep i32:$i)]>,
999999
Requires<[hasPTX<63>, hasSM<70>]>;
1000+
1001+
let hasSideEffects = 1 in {
1002+
// Performance Monitor events
1003+
def INT_PM_EVENT_MASK : BasicNVPTXInst<(outs),
1004+
(ins i16imm:$mask),
1005+
"pmevent.mask",
1006+
[(int_nvvm_pm_event_mask timm:$mask)]>,
1007+
Requires<[hasSM<20>, hasPTX<30>]>;
1008+
} // hasSideEffects
1009+
10001010
//
10011011
// Min Max
10021012
//

llvm/test/CodeGen/NVPTX/pm-event.ll

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s
2+
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
3+
4+
declare void @llvm.nvvm.pm.event.mask(i16 %mask)
5+
6+
; CHECK-LABEL: test_pm_event
7+
define void @test_pm_event() {
8+
; CHECK: pmevent.mask 255;
9+
call void @llvm.nvvm.pm.event.mask(i16 u0xff)
10+
11+
; CHECK: pmevent.mask 4096;
12+
call void @llvm.nvvm.pm.event.mask(i16 u0x1000)
13+
14+
ret void
15+
}

0 commit comments

Comments
 (0)