13
13
class tegra :
14
14
"""Class that gathers the Tegra functionality for one device."""
15
15
16
- def __init__ (self , powerPath , tempPath ):
17
- self .has_changed_clocks = False
16
+ def __init__ (self , power_path , temp_path ):
18
17
"""Create object to control GPU core clock on a Tegra device."""
18
+ self .has_changed_clocks = False
19
+
19
20
# Get paths
20
21
self .dev_path = self .get_dev_path ()
21
- if tempPath == "" :
22
+ if temp_path == "" :
22
23
self .gpu_temp_path = self .get_temp_path ()
23
24
else :
24
- self .gpu_temp_path = tempPath
25
- if powerPath == "" :
25
+ self .gpu_temp_path = temp_path
26
+ if power_path == "" :
26
27
self .gpu_power_path = self .get_power_path ()
27
28
else :
28
- self .gpu_power_path = powerPath
29
+ self .gpu_power_path = power_path
29
30
self .gpu_channel = self .get_gpu_channel ()
30
-
31
+
31
32
# Read default clock values
32
33
self .default_min_gr_clock = self ._read_clock_file ("min_freq" )
33
34
self .default_max_gr_clock = self ._read_clock_file ("max_freq" )
34
35
self .supported_gr_clocks = self ._read_clock_file ("available_frequencies" )
35
36
36
37
self .default_railgate_status = self ._read_railgate_file ()
37
-
38
+
38
39
@staticmethod
39
40
def get_dev_path ():
40
41
"""Get the path to device core clock control in /sys"""
@@ -55,20 +56,20 @@ def get_temp_path(self):
55
56
with open (zone / Path ("type" )) as fp :
56
57
name = fp .read ().strip ()
57
58
if name == "GPU-therm" :
58
- gpu_temp_path = str (zone ) + "/"
59
+ gpu_temp_path = str (zone )
59
60
break
60
- else :
61
+
62
+ if gpu_temp_path is None :
61
63
raise FileNotFoundError ("No GPU sensor for temperature found" )
62
-
64
+
63
65
return gpu_temp_path
64
66
65
67
def get_power_path (self , start_path = "/sys/bus/i2c/drivers/ina3221" ):
66
- """Recursively search for a file which holds power readings
67
- starting from start_path."""
68
+ """Search for a file which holds power readings"""
68
69
for entry in os .listdir (start_path ):
69
70
path = os .path .join (start_path , entry )
70
71
if os .path .isfile (path ) and entry == "curr1_input" :
71
- return start_path + "/"
72
+ return start_path
72
73
elif entry in start_path :
73
74
continue
74
75
elif os .path .isdir (path ):
@@ -79,10 +80,9 @@ def get_power_path(self, start_path="/sys/bus/i2c/drivers/ina3221"):
79
80
80
81
def get_gpu_channel (self ):
81
82
"""Get the channel number of the sensor which measures the GPU power"""
82
-
83
- # Iterate over all channels in the of_node dir of the power path to
84
- # find the channel which holds GPU power information
85
- for channel_dir in Path (self .gpu_power_path + "of_node/" ).iterdir ():
83
+ # Iterate over all channels in the of_node dir of the power path to
84
+ # find the channel which holds GPU power information
85
+ for channel_dir in Path (self .gpu_power_path + "/of_node/" ).iterdir ():
86
86
if ("channel@" in channel_dir .name ):
87
87
with open (channel_dir / Path ("label" )) as fp :
88
88
channel_label = fp .read ().strip ()
@@ -173,18 +173,18 @@ def __del__(self):
173
173
174
174
def read_gpu_temp (self ):
175
175
"""Read GPU temperature"""
176
- with open (self .gpu_temp_path + "temp" ) as fp :
176
+ with open (self .gpu_temp_path + "/ temp" ) as fp :
177
177
temp = int (fp .read ())
178
178
return temp / 1000
179
-
179
+
180
180
def read_gpu_power (self ):
181
181
"""Read the current and voltage to calculate and return the power int watt"""
182
-
183
- result_cur = subprocess .run (["sudo" , "cat" , f"{ self .gpu_power_path } curr{ self .gpu_channel } _input" ], capture_output = True , text = True )
182
+
183
+ result_cur = subprocess .run (["sudo" , "cat" , f"{ self .gpu_power_path } / curr{ self .gpu_channel } _input" ], capture_output = True , text = True )
184
184
current = int (result_cur .stdout .strip ()) / 1000
185
- result_vol = subprocess .run (["sudo" , "cat" , f"{ self .gpu_power_path } in{ self .gpu_channel } _input" ], capture_output = True , text = True )
185
+ result_vol = subprocess .run (["sudo" , "cat" , f"{ self .gpu_power_path } / in{ self .gpu_channel } _input" ], capture_output = True , text = True )
186
186
voltage = int (result_vol .stdout .strip ()) / 1000
187
-
187
+
188
188
return current * voltage
189
189
190
190
class TegraObserver (BenchmarkObserver ):
@@ -203,20 +203,20 @@ def __init__(
203
203
self ,
204
204
observables ,
205
205
save_all = False ,
206
- powerPath = "" ,
207
- tempPath = ""
206
+ power_path = "" ,
207
+ temp_path = ""
208
208
):
209
209
"""Create a TegraObserver"""
210
- self .tegra = tegra (powerPath = powerPath , tempPath = tempPath )
210
+ self .tegra = tegra (power_path = power_path , temp_path = temp_path )
211
211
self .save_all = save_all
212
212
self ._set_units = False
213
-
213
+
214
214
supported = ["core_freq" , "gpu_temp" , "gpu_power" , "gpu_energy" ]
215
215
for obs in observables :
216
216
if obs not in supported :
217
217
raise ValueError (f"Observable { obs } not in supported: { supported } " )
218
218
self .observables = observables
219
-
219
+
220
220
# Observe power measurements with the continuous observer
221
221
self .measure_power = False
222
222
self .needs_power = ["gpu_power" , "gpu_energy" ]
@@ -228,7 +228,7 @@ def __init__(
228
228
)
229
229
# remove power observables
230
230
self .observables = [obs for obs in observables if obs not in self .needs_power ]
231
-
231
+
232
232
self .results = {}
233
233
for obs in self .observables :
234
234
self .results [obs + "s" ] = []
@@ -309,13 +309,13 @@ class tegraPowerObserver(ContinuousObserver):
309
309
"""Observer that measures power using tegra and continuous benchmarking."""
310
310
def __init__ (self , observables , parent , continous_duration = 1 ):
311
311
self .parent = parent
312
-
312
+
313
313
supported = ["gpu_power" , "gpu_energy" ]
314
314
for obs in observables :
315
315
if obs not in supported :
316
316
raise ValueError (f"Observable { obs } not in supported: { supported } " )
317
317
self .observables = observables
318
-
318
+
319
319
# duration in seconds
320
320
self .continuous_duration = continous_duration
321
321
@@ -326,17 +326,17 @@ def __init__(self, observables, parent, continous_duration=1):
326
326
327
327
# results from the last iteration-based benchmark
328
328
self .results = None
329
-
329
+
330
330
def before_start (self ):
331
331
self .parent .before_start ()
332
332
self .power = 0
333
333
self .energy = 0
334
334
self .power_readings = []
335
-
335
+
336
336
def after_start (self ):
337
337
self .parent .after_start ()
338
338
self .t0 = time .perf_counter ()
339
-
339
+
340
340
def during (self ):
341
341
self .parent .during ()
342
342
power_usage = self .parent .tegra .read_gpu_power ()
@@ -347,7 +347,7 @@ def during(self):
347
347
or timestamp - self .power_readings [- 1 ][0 ] > 0.01
348
348
):
349
349
self .power_readings .append ([timestamp , power_usage ])
350
-
350
+
351
351
def after_finish (self ):
352
352
self .parent .after_finish ()
353
353
# safeguard in case we have no measurements, perhaps the kernel was too short to measure anything
@@ -358,7 +358,7 @@ def after_finish(self):
358
358
execution_time = self .results ["time" ] / 1e3
359
359
self .power = np .median ([d [1 ] for d in self .power_readings ])
360
360
self .energy = self .power * execution_time
361
-
361
+
362
362
def get_results (self ):
363
363
results = self .parent .get_results ()
364
364
keys = list (results .keys ())
@@ -368,5 +368,5 @@ def get_results(self):
368
368
results ["gpu_power" ] = self .power
369
369
if "gpu_energy" in self .observables :
370
370
results ["gpu_energy" ] = self .energy
371
-
371
+
372
372
return results
0 commit comments