Skip to content

Commit 75b74a2

Browse files
Merge pull request #243 from loostrum/tegra_observer
Add Tegra Observer to control clocks on Jetson devices
2 parents 4dbcb66 + 818e65a commit 75b74a2

File tree

2 files changed

+218
-3
lines changed

2 files changed

+218
-3
lines changed

kernel_tuner/core.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,8 @@
2020
from kernel_tuner.backends.nvcuda import CudaFunctions
2121
from kernel_tuner.backends.opencl import OpenCLFunctions
2222
from kernel_tuner.backends.compiler import CompilerFunctions
23-
from kernel_tuner.backends.opencl import OpenCLFunctions
24-
from kernel_tuner.backends.hip import HipFunctions
2523
from kernel_tuner.observers.nvml import NVMLObserver
24+
from kernel_tuner.observers.tegra import TegraObserver
2625
from kernel_tuner.observers.observer import ContinuousObserver, OutputObserver
2726

2827
try:
@@ -315,15 +314,19 @@ def __init__(
315314
else:
316315
raise ValueError("Sorry, support for languages other than CUDA, OpenCL, HIP, C, and Fortran is not implemented yet")
317316

318-
# look for NVMLObserver in observers, if present, enable special tunable parameters through nvml
317+
# look for NVMLObserver and TegraObserver in observers, if present, enable special tunable parameters through nvml/tegra
319318
self.use_nvml = False
319+
self.use_tegra = False
320320
self.continuous_observers = []
321321
self.output_observers = []
322322
if observers:
323323
for obs in observers:
324324
if isinstance(obs, NVMLObserver):
325325
self.nvml = obs.nvml
326326
self.use_nvml = True
327+
if isinstance(obs, TegraObserver):
328+
self.tegra = obs.tegra
329+
self.use_tegra = True
327330
if hasattr(obs, "continuous_observer"):
328331
self.continuous_observers.append(obs.continuous_observer)
329332
if isinstance(obs, OutputObserver):
@@ -409,6 +412,10 @@ def benchmark(self, func, gpu_args, instance, verbose, objective):
409412
if "nvml_mem_clock" in instance.params:
410413
self.nvml.mem_clock = instance.params["nvml_mem_clock"]
411414

415+
if self.use_tegra:
416+
if "tegra_gr_clock" in instance.params:
417+
self.tegra.gr_clock = instance.params["tegra_gr_clock"]
418+
412419
# Call the observers to register the configuration to be benchmarked
413420
for obs in self.dev.observers:
414421
obs.register_configuration(instance.params)

kernel_tuner/observers/tegra.py

Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
import subprocess
2+
import time
3+
from pathlib import Path
4+
5+
import numpy as np
6+
7+
from kernel_tuner.observers.observer import BenchmarkObserver
8+
9+
10+
class tegra:
11+
"""Class that gathers the Tegra functionality for one device."""
12+
13+
def __init__(self):
14+
"""Create object to control GPU core clock on a Tegra device."""
15+
16+
self.dev_path = self.get_dev_path()
17+
self.default_min_gr_clock = self._read_clock_file("min_freq")
18+
self.default_max_gr_clock = self._read_clock_file("max_freq")
19+
self.supported_gr_clocks = self._read_clock_file("available_frequencies")
20+
21+
self.default_railgate_status = self._read_railgate_file()
22+
23+
self.has_changed_clocks = False
24+
25+
@staticmethod
26+
def get_dev_path():
27+
"""Get the path to device core clock control in /sys"""
28+
# loop to find GPU device name based on jetson_clocks
29+
for dev in Path("/sys/class/devfreq").iterdir():
30+
with open(dev / Path("device/of_node/name")) as fp:
31+
name = fp.read().strip().rstrip("\x00")
32+
if name in ("gv11b", "gp10b", "ga10b", "gpu"):
33+
root_path = dev
34+
break
35+
else:
36+
raise FileNotFoundError("No internal tegra GPU found")
37+
return root_path
38+
39+
def _read_railgate_file(self):
40+
"""Read railgate status"""
41+
with open(self.dev_path / Path("device/railgate_enable")) as fp:
42+
data = int(fp.read().strip())
43+
return data
44+
45+
def _write_railgate_file(self, value):
46+
"""Set railgate status"""
47+
if value not in (0, 1):
48+
raise ValueError(f"Illegal governor value {value}, must be 0 or 1")
49+
full_path = self.dev_path / Path("device/railgate_enable")
50+
args = [
51+
"sudo",
52+
"sh",
53+
"-c",
54+
f"echo {value} > {str(full_path)}"
55+
]
56+
subprocess.run(args, check=True)
57+
58+
def _read_clock_file(self, fname):
59+
"""Read current or available frequency value(s) from a frequency control file"""
60+
with open(self.dev_path / Path(fname)) as fp:
61+
raw_data = np.array(fp.read().strip().split())
62+
if len(raw_data) > 1:
63+
data = raw_data.astype(int)
64+
else:
65+
data = int(raw_data)
66+
return data
67+
68+
def _write_clock_file(self, fname, value):
69+
"""Write a frequency value to a core clock control file"""
70+
self.has_changed_clocks = True
71+
available_files = ("min_freq", "max_freq")
72+
if fname not in available_files:
73+
raise ValueError(f"Illegal filename value: {fname}, must be one of {available_files}")
74+
75+
if value not in self.supported_gr_clocks:
76+
raise ValueError(f"Illegal frequency value {value}, must be one of {self.supported_gr_clocks}")
77+
78+
full_path = self.dev_path / Path(fname)
79+
args = [
80+
"sudo",
81+
"sh",
82+
"-c",
83+
f"echo {value} > {str(full_path)}"
84+
]
85+
subprocess.run(args, check=True)
86+
87+
@property
88+
def gr_clock(self):
89+
"""Control the core clock frequency"""
90+
return self._read_clock_file("cur_freq")
91+
92+
@gr_clock.setter
93+
def gr_clock(self, new_clock):
94+
self._write_railgate_file(0)
95+
cur_clock = self._read_clock_file("cur_freq")
96+
if new_clock > cur_clock:
97+
self._write_clock_file("max_freq", new_clock)
98+
self._write_clock_file("min_freq", new_clock)
99+
elif new_clock < cur_clock:
100+
self._write_clock_file("min_freq", new_clock)
101+
self._write_clock_file("max_freq", new_clock)
102+
# wait for the new clock to be applied
103+
while (self._read_clock_file("cur_freq") != new_clock):
104+
time.sleep(.001)
105+
106+
def reset_clock(self):
107+
"""Reset the core clock frequency to the original values"""
108+
self._write_clock_file("min_freq", self.default_min_gr_clock)
109+
self._write_clock_file("max_freq", self.default_max_gr_clock)
110+
self._write_railgate_file(self.default_railgate_status)
111+
112+
def __del__(self):
113+
# restore original core clocks, if changed
114+
if self.has_changed_clocks:
115+
self.reset_clock()
116+
117+
118+
class TegraObserver(BenchmarkObserver):
119+
"""Observer that uses /sys/ to monitor and control graphics clock frequencies on a Tegra device.
120+
121+
:param observables: List of quantities should be observed during tuning, supported is: "core_freq"
122+
:type observables: list of strings
123+
124+
:param save_all: If set to True, all data collected by the TegraObserver for every iteration during benchmarking will be returned.
125+
If set to False, data will be aggregated over multiple iterations during benchmarking. False by default.
126+
:type save_all: boolean
127+
128+
"""
129+
130+
def __init__(
131+
self,
132+
observables,
133+
save_all=False
134+
):
135+
"""Create a TegraObserver"""
136+
self.tegra = tegra()
137+
self.save_all = save_all
138+
139+
supported = ["core_freq"]
140+
for obs in observables:
141+
if obs not in supported:
142+
raise ValueError(f"Observable {obs} not in supported: {supported}")
143+
self.observables = observables
144+
145+
self.results = {}
146+
for obs in self.observables:
147+
self.results[obs + "s"] = []
148+
149+
self.during_obs = [
150+
obs
151+
for obs in observables
152+
if obs in ["core_freq"]
153+
]
154+
155+
self.iteration = {obs: [] for obs in self.during_obs}
156+
157+
def before_start(self):
158+
# clear results of the observables for next measurement
159+
self.iteration = {obs: [] for obs in self.during_obs}
160+
161+
def after_start(self):
162+
# ensure during is called at least once
163+
self.during()
164+
165+
def during(self):
166+
if "core_freq" in self.observables:
167+
self.iteration["core_freq"].append(self.tegra.gr_clock)
168+
169+
def after_finish(self):
170+
if "core_freq" in self.observables:
171+
self.results["core_freqs"].append(np.average(self.iteration["core_freq"]))
172+
173+
def get_results(self):
174+
averaged_results = {}
175+
176+
# return averaged results, except when save_all is True
177+
for obs in self.observables:
178+
# save all information, if the user requested
179+
if self.save_all:
180+
averaged_results[obs + "s"] = self.results[obs + "s"]
181+
# save averaged results, default
182+
averaged_results[obs] = np.average(self.results[obs + "s"])
183+
184+
# clear results for next round
185+
for obs in self.observables:
186+
self.results[obs + "s"] = []
187+
188+
return averaged_results
189+
190+
191+
# High-level Helper functions
192+
193+
194+
def get_tegra_gr_clocks(n=None, quiet=False):
195+
"""Get tunable parameter for Tegra graphics clock, n is desired number of values."""
196+
d = tegra()
197+
gr_clocks = d.supported_gr_clocks
198+
199+
if n and (len(gr_clocks) > n):
200+
indices = np.array(np.ceil(np.linspace(0, len(gr_clocks) - 1, n)), dtype=int)
201+
gr_clocks = np.array(gr_clocks)[indices]
202+
203+
tune_params = dict()
204+
tune_params["tegra_gr_clock"] = list(gr_clocks)
205+
206+
if not quiet:
207+
print("Using gr frequencies:", tune_params["tegra_gr_clock"])
208+
return tune_params

0 commit comments

Comments
 (0)