|
2 | 2 |
|
3 | 3 | import logging
|
4 | 4 | import os
|
| 5 | +import re |
5 | 6 | import typing as t
|
6 | 7 | from functools import partial
|
7 | 8 | from typing import TYPE_CHECKING
|
@@ -101,9 +102,107 @@ def generate_latest(self):
|
101 | 102 | if self.multiproc:
|
102 | 103 | registry = self.prometheus_client.CollectorRegistry()
|
103 | 104 | self.prometheus_client.multiprocess.MultiProcessCollector(registry)
|
104 |
| - return self.prometheus_client.generate_latest(registry) |
| 105 | + raw_output = self.prometheus_client.generate_latest(registry) |
| 106 | + return self._fix_histogram_ordering(raw_output) |
105 | 107 | else:
|
106 |
| - return self.prometheus_client.generate_latest() |
| 108 | + raw_output = self.prometheus_client.generate_latest() |
| 109 | + return self._fix_histogram_ordering(raw_output) |
| 110 | + |
| 111 | + def _fix_histogram_ordering(self, prometheus_output: bytes) -> bytes: |
| 112 | + """ |
| 113 | + Fix histogram metric ordering to comply with Prometheus text format specification. |
| 114 | + |
| 115 | + The Prometheus format requires histogram metrics to be grouped by metric name with: |
| 116 | + 1. All _bucket metrics for a histogram (in ascending order of 'le' values) |
| 117 | + 2. Followed by _count metric |
| 118 | + 3. Followed by _sum metric |
| 119 | + |
| 120 | + Args: |
| 121 | + prometheus_output: Raw Prometheus format output |
| 122 | + |
| 123 | + Returns: |
| 124 | + Properly ordered Prometheus format output |
| 125 | + """ |
| 126 | + lines = prometheus_output.decode('utf-8').strip().split('\n') |
| 127 | + |
| 128 | + # Separate comments/help lines from metric lines |
| 129 | + comment_lines = [] |
| 130 | + metric_lines = [] |
| 131 | + |
| 132 | + for line in lines: |
| 133 | + if line.startswith('#') or line.strip() == '': |
| 134 | + comment_lines.append(line) |
| 135 | + else: |
| 136 | + metric_lines.append(line) |
| 137 | + |
| 138 | + # Group metrics by base name (without _bucket, _count, _sum suffixes) |
| 139 | + metrics_by_base = {} |
| 140 | + non_histogram_metrics = [] |
| 141 | + |
| 142 | + for line in metric_lines: |
| 143 | + if not line.strip(): |
| 144 | + continue |
| 145 | + |
| 146 | + # Extract metric name (everything before the first space or '{') |
| 147 | + if '{' in line: |
| 148 | + metric_name = line.split('{')[0] |
| 149 | + else: |
| 150 | + metric_name = line.split(' ')[0] |
| 151 | + |
| 152 | + # Check if this is a histogram metric |
| 153 | + if metric_name.endswith('_bucket'): |
| 154 | + base_name = metric_name[:-7] # Remove '_bucket' |
| 155 | + if base_name not in metrics_by_base: |
| 156 | + metrics_by_base[base_name] = {'bucket': [], 'count': [], 'sum': []} |
| 157 | + metrics_by_base[base_name]['bucket'].append(line) |
| 158 | + elif metric_name.endswith('_count'): |
| 159 | + base_name = metric_name[:-6] # Remove '_count' |
| 160 | + if base_name not in metrics_by_base: |
| 161 | + metrics_by_base[base_name] = {'bucket': [], 'count': [], 'sum': []} |
| 162 | + metrics_by_base[base_name]['count'].append(line) |
| 163 | + elif metric_name.endswith('_sum'): |
| 164 | + base_name = metric_name[:-4] # Remove '_sum' |
| 165 | + if base_name not in metrics_by_base: |
| 166 | + metrics_by_base[base_name] = {'bucket': [], 'count': [], 'sum': []} |
| 167 | + metrics_by_base[base_name]['sum'].append(line) |
| 168 | + else: |
| 169 | + non_histogram_metrics.append(line) |
| 170 | + |
| 171 | + # Function to extract 'le' value for bucket sorting |
| 172 | + def extract_le_value(bucket_line: str) -> float: |
| 173 | + try: |
| 174 | + # Find le="value" in the line |
| 175 | + match = re.search(r'le="([^"]+)"', bucket_line) |
| 176 | + if match: |
| 177 | + le_val = match.group(1) |
| 178 | + if le_val == '+Inf': |
| 179 | + return float('inf') |
| 180 | + return float(le_val) |
| 181 | + return float('inf') # Default if parsing fails |
| 182 | + except: |
| 183 | + return float('inf') |
| 184 | + |
| 185 | + # Rebuild the output with proper ordering |
| 186 | + result_lines = comment_lines.copy() |
| 187 | + |
| 188 | + # Add non-histogram metrics first |
| 189 | + result_lines.extend(non_histogram_metrics) |
| 190 | + |
| 191 | + # Add histogram metrics in proper order |
| 192 | + for base_name in sorted(metrics_by_base.keys()): |
| 193 | + hist_data = metrics_by_base[base_name] |
| 194 | + |
| 195 | + # Sort buckets by 'le' value in ascending order |
| 196 | + sorted_buckets = sorted(hist_data['bucket'], key=extract_le_value) |
| 197 | + result_lines.extend(sorted_buckets) |
| 198 | + |
| 199 | + # Add count metrics |
| 200 | + result_lines.extend(hist_data['count']) |
| 201 | + |
| 202 | + # Add sum metrics |
| 203 | + result_lines.extend(hist_data['sum']) |
| 204 | + |
| 205 | + return '\n'.join(result_lines).encode('utf-8') |
107 | 206 |
|
108 | 207 | def text_string_to_metric_families(self) -> t.Generator[Metric, None, None]:
|
109 | 208 | yield from self.prometheus_client.parser.text_string_to_metric_families(
|
|
0 commit comments