Skip to content

Commit 24dd06d

Browse files
committed
Added temperature and energy readings + continuous observer
1 parent c550b33 commit 24dd06d

File tree

1 file changed

+177
-14
lines changed

1 file changed

+177
-14
lines changed

kernel_tuner/observers/tegra.py

Lines changed: 177 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,40 @@
11
import subprocess
22
import time
33
from pathlib import Path
4+
import os
45

56
import numpy as np
67

7-
from kernel_tuner.observers.observer import BenchmarkObserver
8+
from kernel_tuner.observers.observer import BenchmarkObserver, ContinuousObserver
9+
from kernel_tuner.observers.pmt import PMTObserver
10+
from kernel_tuner.observers.powersensor import PowerSensorObserver
811

912

1013
class tegra:
1114
"""Class that gathers the Tegra functionality for one device."""
1215

13-
def __init__(self):
16+
def __init__(self, powerPath, tempPath):
17+
self.has_changed_clocks = False
1418
"""Create object to control GPU core clock on a Tegra device."""
15-
19+
# Get paths
1620
self.dev_path = self.get_dev_path()
21+
if tempPath == "":
22+
self.gpu_temp_path = self.get_temp_path()
23+
else:
24+
self.gpu_temp_path = tempPath
25+
if powerPath == "":
26+
self.gpu_power_path = self.get_power_path()
27+
else:
28+
self.gpu_power_path = powerPath
29+
self.gpu_channel = self.get_gpu_channel()
30+
31+
# Read default clock values
1732
self.default_min_gr_clock = self._read_clock_file("min_freq")
1833
self.default_max_gr_clock = self._read_clock_file("max_freq")
1934
self.supported_gr_clocks = self._read_clock_file("available_frequencies")
2035

2136
self.default_railgate_status = self._read_railgate_file()
22-
23-
self.has_changed_clocks = False
24-
37+
2538
@staticmethod
2639
def get_dev_path():
2740
"""Get the path to device core clock control in /sys"""
@@ -36,6 +49,49 @@ def get_dev_path():
3649
raise FileNotFoundError("No internal tegra GPU found")
3750
return root_path
3851

52+
def get_temp_path(self):
53+
"""Find the file which holds the GPU temperature"""
54+
for zone in Path("/sys/class/thermal").iterdir():
55+
with open(zone / Path("type")) as fp:
56+
name = fp.read().strip()
57+
if name == "GPU-therm":
58+
gpu_temp_path = zone + "/"
59+
break
60+
else:
61+
raise FileNotFoundError("No GPU sensor for temperature found")
62+
63+
return gpu_temp_path
64+
65+
def get_power_path(self, start_path="/sys/bus/i2c/drivers/ina3221"):
66+
"""Recursively search for a file which holds power readings
67+
starting from start_path."""
68+
for entry in os.listdir(start_path):
69+
path = os.path.join(start_path, entry)
70+
if os.path.isfile(path) and entry == "curr1_input":
71+
return start_path + "/"
72+
elif entry in start_path:
73+
continue
74+
elif os.path.isdir(path):
75+
result = self.get_power_path(path)
76+
if result:
77+
return result
78+
return None
79+
80+
def get_gpu_channel(self):
81+
"""Get the channel number of the sensor which measures the GPU power"""
82+
83+
# Iterate over all channels in the of_node dir of the power path to
84+
# find the channel which holds GPU power information
85+
for channel_dir in Path(self.gpu_power_path + "of_node/").iterdir():
86+
if("channel@" in channel_dir.name):
87+
with open(channel_dir / Path("label")) as fp:
88+
channel_label = fp.read().strip()
89+
if "GPU" in channel_label:
90+
return str(int(channel_dir.name[-1])+1)
91+
92+
# If this statement is reached, no channel for the GPU was found
93+
raise FileNotFoundError("No channel found with GPU power readings")
94+
3995
def _read_railgate_file(self):
4096
"""Read railgate status"""
4197
with open(self.dev_path / Path("device/railgate_enable")) as fp:
@@ -115,7 +171,22 @@ def __del__(self):
115171
if self.has_changed_clocks:
116172
self.reset_clock()
117173

118-
174+
def read_gpu_temp(self):
175+
"""Read GPU temperature"""
176+
with open(self.gpu_temp_path + "temp") as fp:
177+
temp = int(fp.read())
178+
return temp / 1000
179+
180+
def read_gpu_power(self):
181+
"""Read the current and voltage to calculate and return the power int watt"""
182+
183+
result_cur = subprocess.run(["sudo", "cat", f"{self.gpu_power_path}curr{self.gpu_channel}_input"], capture_output=True, text=True)
184+
current = int(result_cur.stdout.strip()) / 1000
185+
result_vol = subprocess.run(["sudo", "cat", f"{self.gpu_power_path}in{self.gpu_channel}_input"], capture_output=True, text=True)
186+
voltage = int(result_vol.stdout.strip()) / 1000
187+
188+
return current * voltage
189+
119190
class TegraObserver(BenchmarkObserver):
120191
"""Observer that uses /sys/ to monitor and control graphics clock frequencies on a Tegra device.
121192
@@ -131,46 +202,71 @@ class TegraObserver(BenchmarkObserver):
131202
def __init__(
132203
self,
133204
observables,
134-
save_all=False
205+
save_all=False,
206+
powerPath="",
207+
tempPath=""
135208
):
136209
"""Create a TegraObserver"""
137-
self.tegra = tegra()
210+
self.tegra = tegra(powerPath=powerPath, tempPath=tempPath)
138211
self.save_all = save_all
139-
140-
supported = ["core_freq"]
212+
self._set_units = False
213+
214+
supported = ["core_freq", "gpu_temp", "gpu_power", "gpu_energy"]
141215
for obs in observables:
142216
if obs not in supported:
143217
raise ValueError(f"Observable {obs} not in supported: {supported}")
144218
self.observables = observables
145-
219+
220+
# Observe power measurements with the continuous observer
221+
self.measure_power = False
222+
self.needs_power = ["gpu_power", "gpu_energy"]
223+
if any([obs in self.needs_power for obs in observables]):
224+
self.measure_power = True
225+
power_observables = [obs for obs in observables if obs in self.needs_power]
226+
self.continuous_observer = tegraPowerObserver(
227+
power_observables, self, continous_duration=3
228+
)
229+
# remove power observables
230+
self.observables = [obs for obs in observables if obs not in self.needs_power]
231+
146232
self.results = {}
147233
for obs in self.observables:
148234
self.results[obs + "s"] = []
149235

150236
self.during_obs = [
151237
obs
152238
for obs in observables
153-
if obs in ["core_freq"]
239+
if obs in ["core_freq", "gpu_temp"]
154240
]
155241

156242
self.iteration = {obs: [] for obs in self.during_obs}
243+
157244

158245
def before_start(self):
159246
# clear results of the observables for next measurement
160247
self.iteration = {obs: [] for obs in self.during_obs}
248+
# Set the power unit to Watts
249+
if self._set_units == False:
250+
self.dev.units["power"] = "W"
251+
self._set_units = True
161252

162253
def after_start(self):
254+
self.t0 = time.perf_counter()
163255
# ensure during is called at least once
164256
self.during()
165257

166258
def during(self):
167259
if "core_freq" in self.observables:
168260
self.iteration["core_freq"].append(self.tegra.gr_clock)
261+
if "gpu_temp" in self.observables:
262+
self.iteration["gpu_temp"].append(self.tegra.read_gpu_temp())
169263

170264
def after_finish(self):
171265
if "core_freq" in self.observables:
172266
self.results["core_freqs"].append(np.average(self.iteration["core_freq"]))
173-
267+
if "gpu_temp" in self.observables:
268+
self.results["gpu_temps"].append(np.average(self.iteration["gpu_temp"]))
269+
174270
def get_results(self):
175271
averaged_results = {}
176272

@@ -207,3 +303,70 @@ def get_tegra_gr_clocks(n=None, quiet=False):
207303
if not quiet:
208304
print("Using gr frequencies:", tune_params["tegra_gr_clock"])
209305
return tune_params
306+
307+
308+
class tegraPowerObserver(ContinuousObserver):
309+
"""Observer that measures power using tegra and continuous benchmarking."""
310+
def __init__(self, observables, parent, continous_duration=1):
311+
self.parent = parent
312+
313+
supported = ["gpu_power", "gpu_energy"]
314+
for obs in observables:
315+
if obs not in supported:
316+
raise ValueError(f"Observable {obs} not in supported: {supported}")
317+
self.observables = observables
318+
319+
# duration in seconds
320+
self.continuous_duration = continous_duration
321+
322+
self.power = 0
323+
self.energy = 0
324+
self.power_readings = []
325+
self.t0 = 0
326+
327+
# results from the last iteration-based benchmark
328+
self.results = None
329+
330+
def before_start(self):
331+
self.parent.before_start()
332+
self.power = 0
333+
self.energy = 0
334+
self.power_readings = []
335+
336+
def after_start(self):
337+
self.parent.after_start()
338+
self.t0 = time.perf_counter()
339+
340+
def during(self):
341+
self.parent.during()
342+
power_usage = self.parent.tegra.read_gpu_power()
343+
timestamp = time.perf_counter() - self.t0
344+
# only store the result if we get a new measurement from tegra
345+
if len(self.power_readings) == 0 or (
346+
self.power_readings[-1][1] != power_usage
347+
or timestamp - self.power_readings[-1][0] > 0.01
348+
):
349+
self.power_readings.append([timestamp, power_usage])
350+
351+
def after_finish(self):
352+
self.parent.after_finish()
353+
# safeguard in case we have no measurements, perhaps the kernel was too short to measure anything
354+
if not self.power_readings:
355+
return
356+
357+
# convert to seconds from milliseconds
358+
execution_time = self.results["time"] / 1e3
359+
self.power = np.median([d[1] for d in self.power_readings])
360+
self.energy = self.power * execution_time
361+
362+
def get_results(self):
363+
results = self.parent.get_results()
364+
keys = list(results.keys())
365+
for key in keys:
366+
results["pwr_" + key] = results.pop(key)
367+
if "gpu_power" in self.observables:
368+
results["gpu_power"] = self.power
369+
if "gpu_energy" in self.observables:
370+
results["gpu_energy"] = self.energy
371+
372+
return results

0 commit comments

Comments
 (0)