File tree Expand file tree Collapse file tree 2 files changed +19
-5
lines changed Expand file tree Collapse file tree 2 files changed +19
-5
lines changed Original file line number Diff line number Diff line change @@ -112,3 +112,11 @@ More information about PMT can be found here: https://git.astron.nl/RD/pmt/
112
112
113
113
114
114
115
+ NCUObserver
116
+ ~~~~~~~~~~~
117
+
118
+ The NCUObserver can be used to automatically extract performance counters during tuning using Nvidia's NsightCompute profiler.
119
+ The NCUObserver relies on an intermediate library, which can be found here: https://github.com/nlesc-recruit/nvmetrics
120
+
121
+ .. autoclass :: kernel_tuner.observers.ncu.NCUObserver
122
+
Original file line number Diff line number Diff line change 4
4
import nvmetrics
5
5
except (ImportError ):
6
6
nvmetrics = None
7
- pass
8
7
9
8
class NCUObserver (PrologueObserver ):
10
9
"""``NCUObserver`` measures performance counters.
11
10
12
- """
11
+ The exact performance counters supported differ per GPU, some examples:
13
12
14
- def __init__ (self , metrics = None , device = 0 ):
15
- """Create a new ``NCUObserver``.
13
+ * "dram__bytes.sum", # Counter byte # of bytes accessed in DRAM
14
+ * "dram__bytes_read.sum", # Counter byte # of bytes read from DRAM
15
+ * "dram__bytes_write.sum", # Counter byte # of bytes written to DRAM
16
+ * "smsp__sass_thread_inst_executed_op_fadd_pred_on.sum", # Counter inst # of FADD thread instructions executed where all predicates were true
17
+ * "smsp__sass_thread_inst_executed_op_ffma_pred_on.sum", # Counter inst # of FFMA thread instructions executed where all predicates were true
18
+ * "smsp__sass_thread_inst_executed_op_fmul_pred_on.sum", # Counter inst # of FMUL thread instructions executed where all predicates were true
16
19
17
20
:param metrics: The metrics to observe. This should be a list of strings.
18
21
You can use ``ncu --query-metrics`` to get a list of valid metrics.
19
- """
22
+ :type metrics: list[str]
20
23
24
+ """
25
+
26
+ def __init__ (self , metrics = None , device = 0 ):
21
27
if not nvmetrics :
22
28
print ("NCUObserver is not available." )
23
29
You can’t perform that action at this time.
0 commit comments