Skip to content

Commit f75f82c

Browse files
authored
Customer Facing Statsbeat: Added logic for dropped item count (#41950)
* Added logic for dropped item count * Addressed review comments * Updated CHANGELOG
1 parent 27ed0de commit f75f82c

File tree

5 files changed

+318
-8
lines changed

5 files changed

+318
-8
lines changed

sdk/monitor/azure-monitor-opentelemetry-exporter/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
([#41733](https://github.com/Azure/azure-sdk-for-python/pull/41733))
99
- Added customer-facing statsbeat preview.
1010
([#41669](https://github.com/Azure/azure-sdk-for-python/pull/41669))
11+
- Customer Facing Statsbeat: Added logic for dropped item count
12+
([#41950](https://github.com/Azure/azure-sdk-for-python/pull/41950))
1113

1214
### Breaking Changes
1315

sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/_constants.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# cSpell:disable
44

55
from enum import Enum
6+
from typing import Union
67
from opentelemetry.semconv.metrics import MetricInstruments
78
from opentelemetry.semconv.metrics.http_metrics import (
89
HTTP_CLIENT_REQUEST_DURATION,
@@ -64,7 +65,6 @@
6465
_MESSAGE_ENVELOPE_NAME = "Microsoft.ApplicationInsights.Message"
6566
_REQUEST_ENVELOPE_NAME = "Microsoft.ApplicationInsights.Request"
6667
_REMOTE_DEPENDENCY_ENVELOPE_NAME = "Microsoft.ApplicationInsights.RemoteDependency"
67-
_REMOTE_DEPENDENCY_ENVELOPE_DATA = "Microsoft.ApplicationInsights.RemoteDependencyData"
6868
_EVENT_ENVELOPE_NAME = "Microsoft.ApplicationInsights.Event"
6969
_PAGE_VIEW_ENVELOPE_NAME = "Microsoft.ApplicationInsights.PageView"
7070
_PERFORMANCE_COUNTER_ENVELOPE_NAME = "Microsoft.ApplicationInsights.PerformanceCounter"
@@ -145,6 +145,8 @@ class DropCode(str, Enum, metaclass=CaseInsensitiveEnumMeta):
145145
CLIENT_PERSISTENCE_CAPACITY = "CLIENT_PERSISTENCE_CAPACITY"
146146
UNKNOWN = "UNKNOWN"
147147

148+
DropCodeType = Union[DropCode, int]
149+
148150
class RetryCode(str, Enum, metaclass=CaseInsensitiveEnumMeta):
149151
CLIENT_TIMEOUT = "CLIENT_TIMEOUT"
150152
UNKNOWN = "UNKNOWN"
@@ -167,7 +169,7 @@ def __init__(self, language: str, version: str, compute_type: str):
167169
_TYPE_MAP = {
168170
_EVENT_ENVELOPE_NAME: _CUSTOM_EVENT,
169171
_METRIC_ENVELOPE_NAME: _CUSTOM_METRIC,
170-
_REMOTE_DEPENDENCY_ENVELOPE_DATA: _DEPENDENCY,
172+
_REMOTE_DEPENDENCY_ENVELOPE_NAME: _DEPENDENCY,
171173
_EXCEPTION_ENVELOPE_NAME: _EXCEPTION,
172174
_PAGE_VIEW_ENVELOPE_NAME: _PAGE_VIEW,
173175
_MESSAGE_ENVELOPE_NAME: _TRACE,

sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/statsbeat/_customer_statsbeat.py

Lines changed: 72 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
metrics that track the usage and performance of the Azure Monitor OpenTelemetry Exporter.
77
"""
88

9-
from typing import List, Dict, Any, Iterable
9+
from typing import List, Dict, Any, Iterable, Optional
1010
import os
1111

1212
from opentelemetry.metrics import CallbackOptions, Observation
@@ -17,7 +17,8 @@
1717
_APPLICATIONINSIGHTS_STATSBEAT_ENABLED_PREVIEW,
1818
_DEFAULT_STATS_SHORT_EXPORT_INTERVAL,
1919
CustomerStatsbeatProperties,
20-
#DropCode,
20+
DropCode,
21+
DropCodeType,
2122
#RetryCode,
2223
CustomerStatsbeatMetricName,
2324
_CUSTOMER_STATSBEAT_LANGUAGE,
@@ -29,11 +30,15 @@
2930
get_compute_type,
3031
)
3132

33+
from azure.monitor.opentelemetry.exporter.statsbeat._utils import (
34+
categorize_status_code,
35+
)
3236
from azure.monitor.opentelemetry.exporter import VERSION
3337

3438
class _CustomerStatsbeatTelemetryCounters:
3539
def __init__(self):
3640
self.total_item_success_count: Dict[str, Any] = {}
41+
self.total_item_drop_count: Dict[str, Dict[DropCodeType, Dict[str, int]]] = {}
3742

3843
class CustomerStatsbeatMetrics(metaclass=Singleton):
3944
def __init__(self, options):
@@ -66,20 +71,51 @@ def __init__(self, options):
6671
description="Tracks successful telemetry items sent to Azure Monitor",
6772
callbacks=[self._item_success_callback]
6873
)
74+
self._dropped_gauge = self._customer_statsbeat_meter.create_observable_gauge(
75+
name=CustomerStatsbeatMetricName.ITEM_DROP_COUNT.value,
76+
description="Tracks dropped telemetry items sent to Azure Monitor",
77+
callbacks=[self._item_drop_callback]
78+
)
6979

7080
def count_successful_items(self, count: int, telemetry_type: str) -> None:
7181
if not self._is_enabled or count <= 0:
7282
return
83+
7384
if telemetry_type in self._counters.total_item_success_count:
7485
self._counters.total_item_success_count[telemetry_type] += count
7586
else:
7687
self._counters.total_item_success_count[telemetry_type] = count
7788

89+
def count_dropped_items(
90+
self, count: int, telemetry_type: str, drop_code: DropCodeType,
91+
exception_message: Optional[str] = None
92+
) -> None:
93+
if not self._is_enabled or count <= 0:
94+
return
95+
96+
# Get or create the drop_code map for this telemetry_type
97+
if telemetry_type not in self._counters.total_item_drop_count:
98+
self._counters.total_item_drop_count[telemetry_type] = {}
99+
drop_code_map = self._counters.total_item_drop_count[telemetry_type]
100+
101+
# Get or create the reason map for this drop_code
102+
if drop_code not in drop_code_map:
103+
drop_code_map[drop_code] = {}
104+
reason_map = drop_code_map[drop_code]
105+
106+
# Generate a low-cardinality, informative reason description
107+
reason = self._get_drop_reason(drop_code, exception_message)
108+
109+
# Update the count for this reason
110+
current_count = reason_map.get(reason, 0)
111+
reason_map[reason] = current_count + count
112+
78113
def _item_success_callback(self, options: CallbackOptions) -> Iterable[Observation]: # pylint: disable=unused-argument
79114
if not getattr(self, "_is_enabled", False):
80115
return []
81116

82117
observations: List[Observation] = []
118+
83119
for telemetry_type, count in self._counters.total_item_success_count.items():
84120
attributes = {
85121
"language": self._customer_properties.language,
@@ -90,3 +126,37 @@ def _item_success_callback(self, options: CallbackOptions) -> Iterable[Observati
90126
observations.append(Observation(count, dict(attributes)))
91127

92128
return observations
129+
130+
def _item_drop_callback(self, options: CallbackOptions) -> Iterable[Observation]: # pylint: disable=unused-argument
131+
if not getattr(self, "_is_enabled", False):
132+
return []
133+
observations: List[Observation] = []
134+
for telemetry_type, drop_code_map in self._counters.total_item_drop_count.items():
135+
for drop_code, reason_map in drop_code_map.items():
136+
for reason, count in reason_map.items():
137+
attributes = {
138+
"language": self._customer_properties.language,
139+
"version": self._customer_properties.version,
140+
"compute_type": self._customer_properties.compute_type,
141+
"drop.code": drop_code,
142+
"drop.reason": reason,
143+
"telemetry_type": telemetry_type
144+
}
145+
observations.append(Observation(count, dict(attributes)))
146+
147+
return observations
148+
149+
def _get_drop_reason(self, drop_code: DropCodeType, exception_message: Optional[str] = None) -> str:
150+
if isinstance(drop_code, int):
151+
return categorize_status_code(drop_code)
152+
153+
if drop_code == DropCode.CLIENT_EXCEPTION:
154+
return exception_message if exception_message else "unknown_exception"
155+
156+
drop_code_reasons = {
157+
DropCode.CLIENT_READONLY: "readonly_mode",
158+
DropCode.CLIENT_STALE_DATA: "stale_data",
159+
DropCode.CLIENT_PERSISTENCE_CAPACITY: "persistence_full",
160+
}
161+
162+
return drop_code_reasons.get(drop_code, "unknown_reason")

sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/statsbeat/_utils.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,3 +67,26 @@ def _update_requests_map(type_name, value):
6767
else:
6868
_REQUESTS_MAP[type_name] = {}
6969
_REQUESTS_MAP[type_name][value] = prev + 1
70+
71+
def categorize_status_code(status_code: int) -> str:
72+
status_map = {
73+
400: "bad_request",
74+
401: "unauthorized",
75+
402: "daily quota exceeded",
76+
403: "forbidden",
77+
404: "not_found",
78+
408: "request_timeout",
79+
413: "payload_too_large",
80+
429: "too_many_requests",
81+
500: "internal_server_error",
82+
502: "bad_gateway",
83+
503: "service_unavailable",
84+
504: "gateway_timeout",
85+
}
86+
if status_code in status_map:
87+
return status_map[status_code]
88+
if 400 <= status_code < 500:
89+
return "client_error_4xx"
90+
if 500 <= status_code < 600:
91+
return "server_error_5xx"
92+
return f"status_{status_code}"

0 commit comments

Comments
 (0)