|
1 | 1 | # SPDX-License-Identifier: Apache-2.0
|
2 | 2 |
|
| 3 | +from copy import copy |
3 | 4 | import time
|
4 | 5 | from typing import TYPE_CHECKING
|
5 | 6 | from typing import Counter as CollectionsCounter
|
@@ -669,20 +670,33 @@ def log(self, stats: Stats):
|
669 | 670 | if local_interval_elapsed(stats.now, self.last_local_log,
|
670 | 671 | self.local_interval):
|
671 | 672 | if self.spec_decode_metrics is not None:
|
| 673 | + # The counters in self.spec_decode_metrics are aggregates. |
| 674 | + # The Prometheus Counters must be incremented with deltas. |
| 675 | + # Keep track of the previously seen value so we can compute deltas. |
| 676 | + if self.last_spec_decode_metrics is None: |
| 677 | + self.last_spec_decode_metrics = copy(self.spec_decode_metrics) |
| 678 | + self.last_spec_decode_metrics.accepted_tokens = 0 |
| 679 | + self.last_spec_decode_metrics.draft_tokens = 0 |
| 680 | + self.last_spec_decode_metrics.emitted_tokens = 0 |
| 681 | + |
| 682 | + snapshot = copy(self.spec_decode_metrics) |
| 683 | + |
672 | 684 | self._log_gauge(
|
673 | 685 | self.metrics.gauge_spec_decode_draft_acceptance_rate,
|
674 | 686 | self.spec_decode_metrics.draft_acceptance_rate)
|
675 | 687 | self._log_gauge(self.metrics.gauge_spec_decode_efficiency,
|
676 | 688 | self.spec_decode_metrics.system_efficiency)
|
677 | 689 | self._log_counter(
|
678 | 690 | self.metrics.counter_spec_decode_num_accepted_tokens,
|
679 |
| - self.spec_decode_metrics.accepted_tokens) |
| 691 | + snapshot.accepted_tokens - self.last_spec_decode_metrics.accepted_tokens) |
680 | 692 | self._log_counter(
|
681 | 693 | self.metrics.counter_spec_decode_num_draft_tokens,
|
682 |
| - self.spec_decode_metrics.draft_tokens) |
| 694 | + snapshot.draft_tokens - self.last_spec_decode_metrics.draft_tokens) |
683 | 695 | self._log_counter(
|
684 | 696 | self.metrics.counter_spec_decode_num_emitted_tokens,
|
685 |
| - self.spec_decode_metrics.emitted_tokens) |
| 697 | + snapshot.emitted_tokens - self.last_spec_decode_metrics.emitted_tokens) |
| 698 | + |
| 699 | + self.last_spec_decode_metrics = snapshot |
686 | 700 |
|
687 | 701 | # Reset tracked stats for next interval.
|
688 | 702 | self.num_prompt_tokens = []
|
|
0 commit comments