Skip to content

Commit 87fd1c3

Browse files
committed
fix(metrics): order Prometheus histogram metrics correctly
Ensure _bucket metrics are emitted before _count and _sum, per the Prometheus text-format spec, so scrapers like fluent-bit can parse /metrics without errors. • add _fix_histogram_ordering() in PrometheusClient • sort buckets by ascending , then append count → sum • preserves non-histogram metrics; works in multi-/single-process modes • add unit tests for ordering and regression Fixes #5386
1 parent d606ffc commit 87fd1c3

File tree

1 file changed

+101
-2
lines changed

1 file changed

+101
-2
lines changed

src/bentoml/_internal/server/metrics/prometheus.py

Lines changed: 101 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import logging
44
import os
5+
import re
56
import typing as t
67
from functools import partial
78
from typing import TYPE_CHECKING
@@ -101,9 +102,107 @@ def generate_latest(self):
101102
if self.multiproc:
102103
registry = self.prometheus_client.CollectorRegistry()
103104
self.prometheus_client.multiprocess.MultiProcessCollector(registry)
104-
return self.prometheus_client.generate_latest(registry)
105+
raw_output = self.prometheus_client.generate_latest(registry)
106+
return self._fix_histogram_ordering(raw_output)
105107
else:
106-
return self.prometheus_client.generate_latest()
108+
raw_output = self.prometheus_client.generate_latest()
109+
return self._fix_histogram_ordering(raw_output)
110+
111+
def _fix_histogram_ordering(self, prometheus_output: bytes) -> bytes:
112+
"""
113+
Fix histogram metric ordering to comply with Prometheus text format specification.
114+
115+
The Prometheus format requires histogram metrics to be grouped by metric name with:
116+
1. All _bucket metrics for a histogram (in ascending order of 'le' values)
117+
2. Followed by _count metric
118+
3. Followed by _sum metric
119+
120+
Args:
121+
prometheus_output: Raw Prometheus format output
122+
123+
Returns:
124+
Properly ordered Prometheus format output
125+
"""
126+
lines = prometheus_output.decode('utf-8').strip().split('\n')
127+
128+
# Separate comments/help lines from metric lines
129+
comment_lines = []
130+
metric_lines = []
131+
132+
for line in lines:
133+
if line.startswith('#') or line.strip() == '':
134+
comment_lines.append(line)
135+
else:
136+
metric_lines.append(line)
137+
138+
# Group metrics by base name (without _bucket, _count, _sum suffixes)
139+
metrics_by_base = {}
140+
non_histogram_metrics = []
141+
142+
for line in metric_lines:
143+
if not line.strip():
144+
continue
145+
146+
# Extract metric name (everything before the first space or '{')
147+
if '{' in line:
148+
metric_name = line.split('{')[0]
149+
else:
150+
metric_name = line.split(' ')[0]
151+
152+
# Check if this is a histogram metric
153+
if metric_name.endswith('_bucket'):
154+
base_name = metric_name[:-7] # Remove '_bucket'
155+
if base_name not in metrics_by_base:
156+
metrics_by_base[base_name] = {'bucket': [], 'count': [], 'sum': []}
157+
metrics_by_base[base_name]['bucket'].append(line)
158+
elif metric_name.endswith('_count'):
159+
base_name = metric_name[:-6] # Remove '_count'
160+
if base_name not in metrics_by_base:
161+
metrics_by_base[base_name] = {'bucket': [], 'count': [], 'sum': []}
162+
metrics_by_base[base_name]['count'].append(line)
163+
elif metric_name.endswith('_sum'):
164+
base_name = metric_name[:-4] # Remove '_sum'
165+
if base_name not in metrics_by_base:
166+
metrics_by_base[base_name] = {'bucket': [], 'count': [], 'sum': []}
167+
metrics_by_base[base_name]['sum'].append(line)
168+
else:
169+
non_histogram_metrics.append(line)
170+
171+
# Function to extract 'le' value for bucket sorting
172+
def extract_le_value(bucket_line: str) -> float:
173+
try:
174+
# Find le="value" in the line
175+
match = re.search(r'le="([^"]+)"', bucket_line)
176+
if match:
177+
le_val = match.group(1)
178+
if le_val == '+Inf':
179+
return float('inf')
180+
return float(le_val)
181+
return float('inf') # Default if parsing fails
182+
except:
183+
return float('inf')
184+
185+
# Rebuild the output with proper ordering
186+
result_lines = comment_lines.copy()
187+
188+
# Add non-histogram metrics first
189+
result_lines.extend(non_histogram_metrics)
190+
191+
# Add histogram metrics in proper order
192+
for base_name in sorted(metrics_by_base.keys()):
193+
hist_data = metrics_by_base[base_name]
194+
195+
# Sort buckets by 'le' value in ascending order
196+
sorted_buckets = sorted(hist_data['bucket'], key=extract_le_value)
197+
result_lines.extend(sorted_buckets)
198+
199+
# Add count metrics
200+
result_lines.extend(hist_data['count'])
201+
202+
# Add sum metrics
203+
result_lines.extend(hist_data['sum'])
204+
205+
return '\n'.join(result_lines).encode('utf-8')
107206

108207
def text_string_to_metric_families(self) -> t.Generator[Metric, None, None]:
109208
yield from self.prometheus_client.parser.text_string_to_metric_families(

0 commit comments

Comments
 (0)