|
| 1 | +//===--------- latency_tracker.cpp - common ------------------------------===// |
| 2 | +// |
| 3 | +// Copyright (C) 2024 Intel Corporation |
| 4 | +// |
| 5 | +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM |
| 6 | +// Exceptions. See LICENSE.TXT |
| 7 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 8 | +// |
| 9 | +//===----------------------------------------------------------------------===// |
| 10 | + |
| 11 | +#pragma once |
| 12 | + |
| 13 | +#include <chrono> |
| 14 | +#include <cstdint> |
| 15 | +#include <limits> |
| 16 | +#include <memory> |
| 17 | + |
| 18 | +#include "logger/ur_logger.hpp" |
| 19 | + |
| 20 | +#if defined(UR_ENABLE_LATENCY_HISTOGRAM) |
| 21 | + |
| 22 | +#include <hdr/hdr_histogram.h> |
| 23 | + |
| 24 | +static inline bool trackLatency = []() { |
| 25 | + try { |
| 26 | + auto map = getenv_to_map("UR_LOG_LATENCY"); |
| 27 | + |
| 28 | + if (!map.has_value()) { |
| 29 | + return false; |
| 30 | + } |
| 31 | + |
| 32 | + auto it = map->find("level"); |
| 33 | + return it != map->end() && |
| 34 | + logger::str_to_level(it->second.front()) != logger::Level::QUIET; |
| 35 | + } catch (...) { |
| 36 | + return false; |
| 37 | + } |
| 38 | +}(); |
| 39 | + |
| 40 | +static constexpr size_t numPercentiles = 7; |
| 41 | +static constexpr double percentiles[numPercentiles] = { |
| 42 | + 50.0, 90.0, 99.0, 99.9, 99.99, 99.999, 99.9999}; |
| 43 | + |
| 44 | +struct latencyValues { |
| 45 | + int64_t min; |
| 46 | + int64_t max; |
| 47 | + int64_t mean; |
| 48 | + int64_t stddev; |
| 49 | + int64_t percentileValues[numPercentiles]; |
| 50 | +}; |
| 51 | + |
| 52 | +using histogram_ptr = |
| 53 | + std::unique_ptr<struct hdr_histogram, decltype(&hdr_close)>; |
| 54 | + |
| 55 | +static inline latencyValues getValues(const struct hdr_histogram *histogram) { |
| 56 | + latencyValues values; |
| 57 | + values.max = hdr_max(histogram); |
| 58 | + values.min = hdr_min(histogram); |
| 59 | + values.mean = static_cast<int64_t>(hdr_mean(histogram)); |
| 60 | + values.stddev = static_cast<int64_t>(hdr_stddev(histogram)); |
| 61 | + |
| 62 | + auto ret = hdr_value_at_percentiles( |
| 63 | + histogram, percentiles, values.percentileValues, numPercentiles); |
| 64 | + if (ret != 0) { |
| 65 | + logger::error("Failed to get percentiles from latency histogram"); |
| 66 | + } |
| 67 | + |
| 68 | + return values; |
| 69 | +} |
| 70 | + |
| 71 | +class latency_printer { |
| 72 | + public: |
| 73 | + latency_printer() : logger(logger::create_logger("latency", true, false)) {} |
| 74 | + |
| 75 | + inline void publishLatency(const std::string &name, |
| 76 | + histogram_ptr histogram) { |
| 77 | + auto [it, inserted] = values.try_emplace(name, std::move(histogram)); |
| 78 | + if (!inserted) { |
| 79 | + // combine histograms |
| 80 | + hdr_add(it->second.get(), histogram.get()); |
| 81 | + } |
| 82 | + } |
| 83 | + |
| 84 | + inline ~latency_printer() { |
| 85 | + if (trackLatency) { |
| 86 | + print(); |
| 87 | + } |
| 88 | + } |
| 89 | + |
| 90 | + inline void print() { |
| 91 | + printHeader(); |
| 92 | + |
| 93 | + for (auto &[name, histogram] : values) { |
| 94 | + auto value = getValues(histogram.get()); |
| 95 | + logger.log(logger::Level::INFO, |
| 96 | + "{},{},{},{},{},{},{},{},{},{},{},{},ns", name, |
| 97 | + value.min, value.max, value.mean, value.stddev, |
| 98 | + value.percentileValues[0], value.percentileValues[1], |
| 99 | + value.percentileValues[2], value.percentileValues[3], |
| 100 | + value.percentileValues[4], value.percentileValues[5], |
| 101 | + value.percentileValues[6]); |
| 102 | + } |
| 103 | + } |
| 104 | + |
| 105 | + private: |
| 106 | + inline void printHeader() { |
| 107 | + logger.log(logger::Level::INFO, "Latency histogram:"); |
| 108 | + logger.log(logger::Level::INFO, |
| 109 | + "name,min,max,mean,stdev,p{},p{},p{},p{},p{},p{},p{},unit", |
| 110 | + percentiles[0], percentiles[1], percentiles[2], |
| 111 | + percentiles[3], percentiles[4], percentiles[5], |
| 112 | + percentiles[6]); |
| 113 | + } |
| 114 | + |
| 115 | + std::map<std::string, histogram_ptr> values; |
| 116 | + logger::Logger logger; |
| 117 | +}; |
| 118 | + |
| 119 | +inline latency_printer &globalLatencyPrinter() { |
| 120 | + static latency_printer printer; |
| 121 | + return printer; |
| 122 | +} |
| 123 | + |
| 124 | +class latency_histogram { |
| 125 | + public: |
| 126 | + inline latency_histogram(const char *name, |
| 127 | + latency_printer &printer = globalLatencyPrinter(), |
| 128 | + int64_t lowestDiscernibleValue = 1, |
| 129 | + int64_t highestTrackableValue = 100'000'000'000, |
| 130 | + int significantFigures = 3) |
| 131 | + : name(name), histogram(nullptr, nullptr), printer(printer) { |
| 132 | + if (trackLatency) { |
| 133 | + struct hdr_histogram *cHistogram; |
| 134 | + auto ret = hdr_init(lowestDiscernibleValue, highestTrackableValue, |
| 135 | + significantFigures, &cHistogram); |
| 136 | + if (ret != 0) { |
| 137 | + logger::error("Failed to initialize latency histogram"); |
| 138 | + } |
| 139 | + histogram = |
| 140 | + std::unique_ptr<struct hdr_histogram, decltype(&hdr_close)>( |
| 141 | + cHistogram, &hdr_close); |
| 142 | + } |
| 143 | + } |
| 144 | + |
| 145 | + latency_histogram(const latency_histogram &) = delete; |
| 146 | + latency_histogram(latency_histogram &&) = delete; |
| 147 | + |
| 148 | + inline ~latency_histogram() { |
| 149 | + if (!trackLatency || !histogram) { |
| 150 | + return; |
| 151 | + } |
| 152 | + |
| 153 | + if (hdr_min(histogram.get()) == std::numeric_limits<int64_t>::max()) { |
| 154 | + logger::info("[{}] latency: no data", name); |
| 155 | + return; |
| 156 | + } |
| 157 | + |
| 158 | + printer.publishLatency(name, std::move(histogram)); |
| 159 | + } |
| 160 | + |
| 161 | + inline void trackValue(int64_t value) { |
| 162 | + hdr_record_value(histogram.get(), value); |
| 163 | + } |
| 164 | + |
| 165 | + private: |
| 166 | + const char *name; |
| 167 | + histogram_ptr histogram; |
| 168 | + latency_printer &printer; |
| 169 | +}; |
| 170 | + |
| 171 | +class latency_tracker { |
| 172 | + public: |
| 173 | + inline explicit latency_tracker(latency_histogram &stats) |
| 174 | + : stats(trackLatency ? &stats : nullptr), begin() { |
| 175 | + if (trackLatency) { |
| 176 | + begin = std::chrono::steady_clock::now(); |
| 177 | + } |
| 178 | + } |
| 179 | + inline latency_tracker() {} |
| 180 | + inline ~latency_tracker() { |
| 181 | + if (stats) { |
| 182 | + auto tp = std::chrono::steady_clock::now(); |
| 183 | + auto diffNanos = |
| 184 | + std::chrono::duration_cast<std::chrono::nanoseconds>(tp - begin) |
| 185 | + .count(); |
| 186 | + stats->trackValue(static_cast<int64_t>(diffNanos)); |
| 187 | + } |
| 188 | + } |
| 189 | + |
| 190 | + latency_tracker(const latency_tracker &) = delete; |
| 191 | + latency_tracker &operator=(const latency_tracker &) = delete; |
| 192 | + |
| 193 | + inline latency_tracker(latency_tracker &&rhs) noexcept |
| 194 | + : stats(rhs.stats), begin(rhs.begin) { |
| 195 | + rhs.stats = nullptr; |
| 196 | + } |
| 197 | + |
| 198 | + inline latency_tracker &operator=(latency_tracker &&rhs) noexcept { |
| 199 | + if (this != &rhs) { |
| 200 | + this->~latency_tracker(); |
| 201 | + new (this) latency_tracker(std::move(rhs)); |
| 202 | + } |
| 203 | + return *this; |
| 204 | + } |
| 205 | + |
| 206 | + private: |
| 207 | + latency_histogram *stats{nullptr}; |
| 208 | + std::chrono::time_point<std::chrono::steady_clock> begin; |
| 209 | +}; |
| 210 | + |
| 211 | +// To resolve __COUNTER__ |
| 212 | +#define CONCAT(a, b) a##b |
| 213 | + |
| 214 | +// Each tracker has it's own thread-local histogram. |
| 215 | +// At program exit, all histograms for the same scope are |
| 216 | +// aggregated. |
| 217 | +#define TRACK_SCOPE_LATENCY_CNT(name, cnt) \ |
| 218 | + static thread_local latency_histogram CONCAT(histogram, cnt)(name); \ |
| 219 | + latency_tracker CONCAT(tracker, cnt)(CONCAT(histogram, cnt)); |
| 220 | +#define TRACK_SCOPE_LATENCY(name) TRACK_SCOPE_LATENCY_CNT(name, __COUNTER__) |
| 221 | + |
| 222 | +#else // UR_ENABLE_LATENCY_HISTOGRAM |
| 223 | + |
| 224 | +#define TRACK_SCOPE_LATENCY(name) |
| 225 | + |
| 226 | +#endif // UR_ENABLE_LATENCY_HISTOGRAM |
0 commit comments