|
| 1 | +import subprocess |
| 2 | +import time |
| 3 | +from pathlib import Path |
| 4 | + |
| 5 | +import numpy as np |
| 6 | + |
| 7 | +from kernel_tuner.observers.observer import BenchmarkObserver |
| 8 | + |
| 9 | + |
| 10 | +class tegra: |
| 11 | + """Class that gathers the Tegra functionality for one device.""" |
| 12 | + |
| 13 | + def __init__(self): |
| 14 | + """Create object to control GPU core clock on a Tegra device.""" |
| 15 | + |
| 16 | + self.dev_path = self.get_dev_path() |
| 17 | + self.default_min_gr_clock = self._read_clock_file("min_freq") |
| 18 | + self.default_max_gr_clock = self._read_clock_file("max_freq") |
| 19 | + self.supported_gr_clocks = self._read_clock_file("available_frequencies") |
| 20 | + |
| 21 | + self.default_railgate_status = self._read_railgate_file() |
| 22 | + |
| 23 | + self.has_changed_clocks = False |
| 24 | + |
| 25 | + @staticmethod |
| 26 | + def get_dev_path(): |
| 27 | + """Get the path to device core clock control in /sys""" |
| 28 | + # loop to find GPU device name based on jetson_clocks |
| 29 | + for dev in Path("/sys/class/devfreq").iterdir(): |
| 30 | + with open(dev / Path("device/of_node/name")) as fp: |
| 31 | + name = fp.read().strip().rstrip("\x00") |
| 32 | + if name in ("gv11b", "gp10b", "ga10b", "gpu"): |
| 33 | + root_path = dev |
| 34 | + break |
| 35 | + else: |
| 36 | + raise FileNotFoundError("No internal tegra GPU found") |
| 37 | + return root_path |
| 38 | + |
| 39 | + def _read_railgate_file(self): |
| 40 | + """Read railgate status""" |
| 41 | + with open(self.dev_path / Path("device/railgate_enable")) as fp: |
| 42 | + data = int(fp.read().strip()) |
| 43 | + return data |
| 44 | + |
| 45 | + def _write_railgate_file(self, value): |
| 46 | + """Set railgate status""" |
| 47 | + if value not in (0, 1): |
| 48 | + raise ValueError(f"Illegal governor value {value}, must be 0 or 1") |
| 49 | + full_path = self.dev_path / Path("device/railgate_enable") |
| 50 | + args = [ |
| 51 | + "sudo", |
| 52 | + "sh", |
| 53 | + "-c", |
| 54 | + f"echo {value} > {str(full_path)}" |
| 55 | + ] |
| 56 | + subprocess.run(args, check=True) |
| 57 | + |
| 58 | + def _read_clock_file(self, fname): |
| 59 | + """Read current or available frequency value(s) from a frequency control file""" |
| 60 | + with open(self.dev_path / Path(fname)) as fp: |
| 61 | + raw_data = np.array(fp.read().strip().split()) |
| 62 | + if len(raw_data) > 1: |
| 63 | + data = raw_data.astype(int) |
| 64 | + else: |
| 65 | + data = int(raw_data) |
| 66 | + return data |
| 67 | + |
| 68 | + def _write_clock_file(self, fname, value): |
| 69 | + """Write a frequency value to a core clock control file""" |
| 70 | + self.has_changed_clocks = True |
| 71 | + available_files = ("min_freq", "max_freq") |
| 72 | + if fname not in available_files: |
| 73 | + raise ValueError(f"Illegal filename value: {fname}, must be one of {available_files}") |
| 74 | + |
| 75 | + if value not in self.supported_gr_clocks: |
| 76 | + raise ValueError(f"Illegal frequency value {value}, must be one of {self.supported_gr_clocks}") |
| 77 | + |
| 78 | + full_path = self.dev_path / Path(fname) |
| 79 | + args = [ |
| 80 | + "sudo", |
| 81 | + "sh", |
| 82 | + "-c", |
| 83 | + f"echo {value} > {str(full_path)}" |
| 84 | + ] |
| 85 | + subprocess.run(args, check=True) |
| 86 | + |
| 87 | + @property |
| 88 | + def gr_clock(self): |
| 89 | + """Control the core clock frequency""" |
| 90 | + return self._read_clock_file("cur_freq") |
| 91 | + |
| 92 | + @gr_clock.setter |
| 93 | + def gr_clock(self, new_clock): |
| 94 | + self._write_railgate_file(0) |
| 95 | + cur_clock = self._read_clock_file("cur_freq") |
| 96 | + if new_clock > cur_clock: |
| 97 | + self._write_clock_file("max_freq", new_clock) |
| 98 | + self._write_clock_file("min_freq", new_clock) |
| 99 | + elif new_clock < cur_clock: |
| 100 | + self._write_clock_file("min_freq", new_clock) |
| 101 | + self._write_clock_file("max_freq", new_clock) |
| 102 | + # wait for the new clock to be applied |
| 103 | + while (self._read_clock_file("cur_freq") != new_clock): |
| 104 | + time.sleep(.001) |
| 105 | + |
| 106 | + def reset_clock(self): |
| 107 | + """Reset the core clock frequency to the original values""" |
| 108 | + self._write_clock_file("min_freq", self.default_min_gr_clock) |
| 109 | + self._write_clock_file("max_freq", self.default_max_gr_clock) |
| 110 | + self._write_railgate_file(self.default_railgate_status) |
| 111 | + |
| 112 | + def __del__(self): |
| 113 | + # restore original core clocks, if changed |
| 114 | + if self.has_changed_clocks: |
| 115 | + self.reset_clock() |
| 116 | + |
| 117 | + |
| 118 | +class TegraObserver(BenchmarkObserver): |
| 119 | + """Observer that uses /sys/ to monitor and control graphics clock frequencies on a Tegra device. |
| 120 | +
|
| 121 | + :param observables: List of quantities should be observed during tuning, supported is: "core_freq" |
| 122 | + :type observables: list of strings |
| 123 | +
|
| 124 | + :param save_all: If set to True, all data collected by the TegraObserver for every iteration during benchmarking will be returned. |
| 125 | + If set to False, data will be aggregated over multiple iterations during benchmarking. False by default. |
| 126 | + :type save_all: boolean |
| 127 | +
|
| 128 | + """ |
| 129 | + |
| 130 | + def __init__( |
| 131 | + self, |
| 132 | + observables, |
| 133 | + save_all=False |
| 134 | + ): |
| 135 | + """Create a TegraObserver""" |
| 136 | + self.tegra = tegra() |
| 137 | + self.save_all = save_all |
| 138 | + |
| 139 | + supported = ["core_freq"] |
| 140 | + for obs in observables: |
| 141 | + if obs not in supported: |
| 142 | + raise ValueError(f"Observable {obs} not in supported: {supported}") |
| 143 | + self.observables = observables |
| 144 | + |
| 145 | + self.results = {} |
| 146 | + for obs in self.observables: |
| 147 | + self.results[obs + "s"] = [] |
| 148 | + |
| 149 | + self.during_obs = [ |
| 150 | + obs |
| 151 | + for obs in observables |
| 152 | + if obs in ["core_freq"] |
| 153 | + ] |
| 154 | + |
| 155 | + self.iteration = {obs: [] for obs in self.during_obs} |
| 156 | + |
| 157 | + def before_start(self): |
| 158 | + # clear results of the observables for next measurement |
| 159 | + self.iteration = {obs: [] for obs in self.during_obs} |
| 160 | + |
| 161 | + def after_start(self): |
| 162 | + # ensure during is called at least once |
| 163 | + self.during() |
| 164 | + |
| 165 | + def during(self): |
| 166 | + if "core_freq" in self.observables: |
| 167 | + self.iteration["core_freq"].append(self.tegra.gr_clock) |
| 168 | + |
| 169 | + def after_finish(self): |
| 170 | + if "core_freq" in self.observables: |
| 171 | + self.results["core_freqs"].append(np.average(self.iteration["core_freq"])) |
| 172 | + |
| 173 | + def get_results(self): |
| 174 | + averaged_results = {} |
| 175 | + |
| 176 | + # return averaged results, except when save_all is True |
| 177 | + for obs in self.observables: |
| 178 | + # save all information, if the user requested |
| 179 | + if self.save_all: |
| 180 | + averaged_results[obs + "s"] = self.results[obs + "s"] |
| 181 | + # save averaged results, default |
| 182 | + averaged_results[obs] = np.average(self.results[obs + "s"]) |
| 183 | + |
| 184 | + # clear results for next round |
| 185 | + for obs in self.observables: |
| 186 | + self.results[obs + "s"] = [] |
| 187 | + |
| 188 | + return averaged_results |
| 189 | + |
| 190 | + |
| 191 | +# High-level Helper functions |
| 192 | + |
| 193 | + |
| 194 | +def get_tegra_gr_clocks(n=None, quiet=False): |
| 195 | + """Get tunable parameter for Tegra graphics clock, n is desired number of values.""" |
| 196 | + d = tegra() |
| 197 | + gr_clocks = d.supported_gr_clocks |
| 198 | + |
| 199 | + if n and (len(gr_clocks) > n): |
| 200 | + indices = np.array(np.ceil(np.linspace(0, len(gr_clocks) - 1, n)), dtype=int) |
| 201 | + gr_clocks = np.array(gr_clocks)[indices] |
| 202 | + |
| 203 | + tune_params = dict() |
| 204 | + tune_params["tegra_gr_clock"] = list(gr_clocks) |
| 205 | + |
| 206 | + if not quiet: |
| 207 | + print("Using gr frequencies:", tune_params["tegra_gr_clock"]) |
| 208 | + return tune_params |
0 commit comments