Skip to content

Commit 4fe1843

Browse files
committed
Add NCUObserver
1 parent b4b5b33 commit 4fe1843

File tree

4 files changed

+58
-3
lines changed

4 files changed

+58
-3
lines changed

kernel_tuner/core.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
from kernel_tuner.backends.opencl import OpenCLFunctions
2424
from kernel_tuner.backends.hip import HipFunctions
2525
from kernel_tuner.observers.nvml import NVMLObserver
26-
from kernel_tuner.observers.observer import ContinuousObserver, OutputObserver
26+
from kernel_tuner.observers.observer import ContinuousObserver, OutputObserver, PrologueObserver
2727

2828
try:
2929
import torch
@@ -319,6 +319,7 @@ def __init__(
319319
self.use_nvml = False
320320
self.continuous_observers = []
321321
self.output_observers = []
322+
self.prologue_observers = []
322323
if observers:
323324
for obs in observers:
324325
if isinstance(obs, NVMLObserver):
@@ -328,7 +329,8 @@ def __init__(
328329
self.continuous_observers.append(obs.continuous_observer)
329330
if isinstance(obs, OutputObserver):
330331
self.output_observers.append(obs)
331-
332+
if isinstance(obs, PrologueObserver):
333+
self.prologue_observers.append(obs)
332334

333335
self.iterations = iterations
334336

@@ -346,6 +348,12 @@ def benchmark_default(self, func, gpu_args, threads, grid, result):
346348
obs for obs in self.dev.observers if not isinstance(obs, ContinuousObserver)
347349
]
348350

351+
for obs in self.prologue_observers:
352+
obs.before_start()
353+
self.dev.run_kernel(func, gpu_args, threads, grid)
354+
self.dev.synchronize()
355+
obs.after_finish()
356+
349357
self.dev.synchronize()
350358
for _ in range(self.iterations):
351359
for obs in observers:

kernel_tuner/observers/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
from .observer import BenchmarkObserver, IterationObserver, ContinuousObserver, OutputObserver
1+
from .observer import BenchmarkObserver, IterationObserver, ContinuousObserver, OutputObserver, PrologueObserver

kernel_tuner/observers/ncu.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
from kernel_tuner.observers import PrologueObserver
2+
3+
try:
4+
import nvmetrics
5+
except (ImportError):
6+
nvmetrics = None
7+
pass
8+
9+
class NCUObserver(PrologueObserver):
10+
"""``NCUObserver`` measures performance counters.
11+
12+
"""
13+
14+
def __init__(self, metrics=None):
15+
"""Create a new ``NCUObserver``.
16+
17+
:param metrics: The metrics to observe. This should be a list of strings.
18+
You can use ``ncu --query-metrics`` to get a list of valid metrics.
19+
"""
20+
21+
if not nvmetrics:
22+
print("NCUObserver is not available.")
23+
24+
self.metrics = metrics
25+
self.results = dict()
26+
27+
def before_start(self):
28+
if nvmetrics:
29+
nvmetrics.measureMetricsStart(self.metrics)
30+
31+
def after_finish(self):
32+
if nvmetrics:
33+
self.results = nvmetrics.measureMetricsStop()
34+
35+
def get_results(self):
36+
return dict(zip(self.metrics, self.results))

kernel_tuner/observers/observer.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,4 +57,15 @@ def process_output(self, answer, output):
5757
"""
5858
pass
5959

60+
class PrologueObserver(BenchmarkObserver):
61+
"""Observer that measures something in a seperate kernel invocation prior to the normal benchmark."""
6062

63+
@abstractmethod
64+
def before_start(self):
65+
"""before start is called before the kernel starts"""
66+
pass
67+
68+
@abstractmethod
69+
def after_finish(self):
70+
"""after finish is called after the kernel has finished execution"""
71+
pass

0 commit comments

Comments
 (0)