1
1
import subprocess
2
2
import time
3
3
from pathlib import Path
4
+ import os
4
5
5
6
import numpy as np
6
7
7
- from kernel_tuner .observers .observer import BenchmarkObserver
8
+ from kernel_tuner .observers .observer import BenchmarkObserver , ContinuousObserver
9
+ from kernel_tuner .observers .pmt import PMTObserver
10
+ from kernel_tuner .observers .powersensor import PowerSensorObserver
8
11
9
12
10
13
class tegra :
11
14
"""Class that gathers the Tegra functionality for one device."""
12
15
13
- def __init__ (self ):
16
+ def __init__ (self , powerPath , tempPath ):
17
+ self .has_changed_clocks = False
14
18
"""Create object to control GPU core clock on a Tegra device."""
15
-
19
+ # Get paths
16
20
self .dev_path = self .get_dev_path ()
21
+ if tempPath == "" :
22
+ self .gpu_temp_path = self .get_temp_path ()
23
+ else :
24
+ self .gpu_temp_path = tempPath
25
+ if powerPath == "" :
26
+ self .gpu_power_path = self .get_power_path ()
27
+ else :
28
+ self .gpu_power_path = powerPath
29
+ self .gpu_channel = self .get_gpu_channel ()
30
+
31
+ # Read default clock values
17
32
self .default_min_gr_clock = self ._read_clock_file ("min_freq" )
18
33
self .default_max_gr_clock = self ._read_clock_file ("max_freq" )
19
34
self .supported_gr_clocks = self ._read_clock_file ("available_frequencies" )
20
35
21
36
self .default_railgate_status = self ._read_railgate_file ()
22
-
23
- self .has_changed_clocks = False
24
-
37
+
25
38
@staticmethod
26
39
def get_dev_path ():
27
40
"""Get the path to device core clock control in /sys"""
@@ -36,6 +49,49 @@ def get_dev_path():
36
49
raise FileNotFoundError ("No internal tegra GPU found" )
37
50
return root_path
38
51
52
+ def get_temp_path (self ):
53
+ """Find the file which holds the GPU temperature"""
54
+ for zone in Path ("/sys/class/thermal" ).iterdir ():
55
+ with open (zone / Path ("type" )) as fp :
56
+ name = fp .read ().strip ()
57
+ if name == "GPU-therm" :
58
+ gpu_temp_path = zone + "/"
59
+ break
60
+ else :
61
+ raise FileNotFoundError ("No GPU sensor for temperature found" )
62
+
63
+ return gpu_temp_path
64
+
65
+ def get_power_path (self , start_path = "/sys/bus/i2c/drivers/ina3221" ):
66
+ """Recursively search for a file which holds power readings
67
+ starting from start_path."""
68
+ for entry in os .listdir (start_path ):
69
+ path = os .path .join (start_path , entry )
70
+ if os .path .isfile (path ) and entry == "curr1_input" :
71
+ return start_path + "/"
72
+ elif entry in start_path :
73
+ continue
74
+ elif os .path .isdir (path ):
75
+ result = self .get_power_path (path )
76
+ if result :
77
+ return result
78
+ return None
79
+
80
+ def get_gpu_channel (self ):
81
+ """Get the channel number of the sensor which measures the GPU power"""
82
+
83
+ # Iterate over all channels in the of_node dir of the power path to
84
+ # find the channel which holds GPU power information
85
+ for channel_dir in Path (self .gpu_power_path + "of_node/" ).iterdir ():
86
+ if ("channel@" in channel_dir .name ):
87
+ with open (channel_dir / Path ("label" )) as fp :
88
+ channel_label = fp .read ().strip ()
89
+ if "GPU" in channel_label :
90
+ return str (int (channel_dir .name [- 1 ])+ 1 )
91
+
92
+ # If this statement is reached, no channel for the GPU was found
93
+ raise FileNotFoundError ("No channel found with GPU power readings" )
94
+
39
95
def _read_railgate_file (self ):
40
96
"""Read railgate status"""
41
97
with open (self .dev_path / Path ("device/railgate_enable" )) as fp :
@@ -115,7 +171,22 @@ def __del__(self):
115
171
if self .has_changed_clocks :
116
172
self .reset_clock ()
117
173
118
-
174
+ def read_gpu_temp (self ):
175
+ """Read GPU temperature"""
176
+ with open (self .gpu_temp_path + "temp" ) as fp :
177
+ temp = int (fp .read ())
178
+ return temp / 1000
179
+
180
+ def read_gpu_power (self ):
181
+ """Read the current and voltage to calculate and return the power int watt"""
182
+
183
+ result_cur = subprocess .run (["sudo" , "cat" , f"{ self .gpu_power_path } curr{ self .gpu_channel } _input" ], capture_output = True , text = True )
184
+ current = int (result_cur .stdout .strip ()) / 1000
185
+ result_vol = subprocess .run (["sudo" , "cat" , f"{ self .gpu_power_path } in{ self .gpu_channel } _input" ], capture_output = True , text = True )
186
+ voltage = int (result_vol .stdout .strip ()) / 1000
187
+
188
+ return current * voltage
189
+
119
190
class TegraObserver (BenchmarkObserver ):
120
191
"""Observer that uses /sys/ to monitor and control graphics clock frequencies on a Tegra device.
121
192
@@ -131,46 +202,71 @@ class TegraObserver(BenchmarkObserver):
131
202
def __init__ (
132
203
self ,
133
204
observables ,
134
- save_all = False
205
+ save_all = False ,
206
+ powerPath = "" ,
207
+ tempPath = ""
135
208
):
136
209
"""Create a TegraObserver"""
137
- self .tegra = tegra ()
210
+ self .tegra = tegra (powerPath = powerPath , tempPath = tempPath )
138
211
self .save_all = save_all
139
-
140
- supported = ["core_freq" ]
212
+ self ._set_units = False
213
+
214
+ supported = ["core_freq" , "gpu_temp" , "gpu_power" , "gpu_energy" ]
141
215
for obs in observables :
142
216
if obs not in supported :
143
217
raise ValueError (f"Observable { obs } not in supported: { supported } " )
144
218
self .observables = observables
145
-
219
+
220
+ # Observe power measurements with the continuous observer
221
+ self .measure_power = False
222
+ self .needs_power = ["gpu_power" , "gpu_energy" ]
223
+ if any ([obs in self .needs_power for obs in observables ]):
224
+ self .measure_power = True
225
+ power_observables = [obs for obs in observables if obs in self .needs_power ]
226
+ self .continuous_observer = tegraPowerObserver (
227
+ power_observables , self , continous_duration = 3
228
+ )
229
+ # remove power observables
230
+ self .observables = [obs for obs in observables if obs not in self .needs_power ]
231
+
146
232
self .results = {}
147
233
for obs in self .observables :
148
234
self .results [obs + "s" ] = []
149
235
150
236
self .during_obs = [
151
237
obs
152
238
for obs in observables
153
- if obs in ["core_freq" ]
239
+ if obs in ["core_freq" , "gpu_temp" ]
154
240
]
155
241
156
242
self .iteration = {obs : [] for obs in self .during_obs }
243
+
157
244
158
245
def before_start (self ):
159
246
# clear results of the observables for next measurement
160
247
self .iteration = {obs : [] for obs in self .during_obs }
248
+ # Set the power unit to Watts
249
+ if self ._set_units == False :
250
+ self .dev .units ["power" ] = "W"
251
+ self ._set_units = True
161
252
162
253
def after_start (self ):
254
+ self .t0 = time .perf_counter ()
163
255
# ensure during is called at least once
164
256
self .during ()
165
257
166
258
def during (self ):
167
259
if "core_freq" in self .observables :
168
260
self .iteration ["core_freq" ].append (self .tegra .gr_clock )
261
+ if "gpu_temp" in self .observables :
262
+ self .iteration ["gpu_temp" ].append (self .tegra .read_gpu_temp ())
169
263
170
264
def after_finish (self ):
171
265
if "core_freq" in self .observables :
172
266
self .results ["core_freqs" ].append (np .average (self .iteration ["core_freq" ]))
173
-
267
+ if "gpu_temp" in self .observables :
268
+ self .results ["gpu_temps" ].append (np .average (self .iteration ["gpu_temp" ]))
269
+
174
270
def get_results (self ):
175
271
averaged_results = {}
176
272
@@ -207,3 +303,70 @@ def get_tegra_gr_clocks(n=None, quiet=False):
207
303
if not quiet :
208
304
print ("Using gr frequencies:" , tune_params ["tegra_gr_clock" ])
209
305
return tune_params
306
+
307
+
308
+ class tegraPowerObserver (ContinuousObserver ):
309
+ """Observer that measures power using tegra and continuous benchmarking."""
310
+ def __init__ (self , observables , parent , continous_duration = 1 ):
311
+ self .parent = parent
312
+
313
+ supported = ["gpu_power" , "gpu_energy" ]
314
+ for obs in observables :
315
+ if obs not in supported :
316
+ raise ValueError (f"Observable { obs } not in supported: { supported } " )
317
+ self .observables = observables
318
+
319
+ # duration in seconds
320
+ self .continuous_duration = continous_duration
321
+
322
+ self .power = 0
323
+ self .energy = 0
324
+ self .power_readings = []
325
+ self .t0 = 0
326
+
327
+ # results from the last iteration-based benchmark
328
+ self .results = None
329
+
330
+ def before_start (self ):
331
+ self .parent .before_start ()
332
+ self .power = 0
333
+ self .energy = 0
334
+ self .power_readings = []
335
+
336
+ def after_start (self ):
337
+ self .parent .after_start ()
338
+ self .t0 = time .perf_counter ()
339
+
340
+ def during (self ):
341
+ self .parent .during ()
342
+ power_usage = self .parent .tegra .read_gpu_power ()
343
+ timestamp = time .perf_counter () - self .t0
344
+ # only store the result if we get a new measurement from tegra
345
+ if len (self .power_readings ) == 0 or (
346
+ self .power_readings [- 1 ][1 ] != power_usage
347
+ or timestamp - self .power_readings [- 1 ][0 ] > 0.01
348
+ ):
349
+ self .power_readings .append ([timestamp , power_usage ])
350
+
351
+ def after_finish (self ):
352
+ self .parent .after_finish ()
353
+ # safeguard in case we have no measurements, perhaps the kernel was too short to measure anything
354
+ if not self .power_readings :
355
+ return
356
+
357
+ # convert to seconds from milliseconds
358
+ execution_time = self .results ["time" ] / 1e3
359
+ self .power = np .median ([d [1 ] for d in self .power_readings ])
360
+ self .energy = self .power * execution_time
361
+
362
+ def get_results (self ):
363
+ results = self .parent .get_results ()
364
+ keys = list (results .keys ())
365
+ for key in keys :
366
+ results ["pwr_" + key ] = results .pop (key )
367
+ if "gpu_power" in self .observables :
368
+ results ["gpu_power" ] = self .power
369
+ if "gpu_energy" in self .observables :
370
+ results ["gpu_energy" ] = self .energy
371
+
372
+ return results
0 commit comments