Skip to content

Commit a710405

Browse files
JacksonWeberlzchen
andauthored
[Monitor OpenTelemetry Exporter] Add Check for Synthetic Source (#41733)
* Add check for user-agent and synthetic source. * Update CHANGELOG.md * Update CHANGELOG.md * Fix typing issue. * Update _utils.py * Update CHANGELOG.md * Update CHANGELOG.md --------- Co-authored-by: Leighton Chen <lechen@microsoft.com>
1 parent 81ab258 commit a710405

File tree

7 files changed

+150
-4
lines changed

7 files changed

+150
-4
lines changed

sdk/monitor/azure-monitor-opentelemetry-exporter/CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44

55
### Features Added
66

7+
- Detect synthetically created telemetry based on the user-agent header
8+
([#41733](https://github.com/Azure/azure-sdk-for-python/pull/41733))
9+
710
### Breaking Changes
811

912
### Bugs Fixed

sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/_utils.py

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import threading
1010
import time
1111
import warnings
12-
from typing import Callable, Dict, Any
12+
from typing import Callable, Dict, Any, Optional
1313

1414
from opentelemetry.semconv.resource import ResourceAttributes
1515
from opentelemetry.sdk.resources import Resource
@@ -292,12 +292,50 @@ def _get_cloud_role_instance(resource: Resource) -> str:
292292
return platform.node() # hostname default
293293

294294

295-
def _is_synthetic_source(properties: Attributes) -> bool:
295+
def _is_synthetic_source(properties: Optional[Any]) -> bool:
296296
# TODO: Use semconv symbol when released in upstream
297+
if not properties:
298+
return False
297299
synthetic_type = properties.get("user_agent.synthetic.type") # type: ignore
298300
return synthetic_type in ("bot", "test")
299301

300302

303+
def _is_synthetic_load(properties: Optional[Any]) -> bool:
304+
"""
305+
Check if the request is from a synthetic load test by examining the HTTP user agent.
306+
307+
:param properties: The attributes/properties to check for user agent information
308+
:type properties: Optional[Any]
309+
:return: True if the user agent contains "AlwaysOn", False otherwise
310+
:rtype: bool
311+
"""
312+
if not properties:
313+
return False
314+
315+
# Check both old and new semantic convention attributes for HTTP user agent
316+
user_agent = (
317+
properties.get("user_agent.original") or # type: ignore # New semantic convention
318+
properties.get("http.user_agent") # type: ignore # Legacy semantic convention
319+
)
320+
321+
if user_agent and isinstance(user_agent, str):
322+
return "AlwaysOn" in user_agent
323+
324+
return False
325+
326+
327+
def _is_any_synthetic_source(properties: Optional[Any]) -> bool:
328+
"""
329+
Check if the telemetry should be marked as synthetic from any source.
330+
331+
:param properties: The attributes/properties to check
332+
:type properties: Optional[Any]
333+
:return: True if any synthetic source is detected, False otherwise
334+
:rtype: bool
335+
"""
336+
return _is_synthetic_source(properties) or _is_synthetic_load(properties)
337+
338+
301339
# pylint: disable=W0622
302340
def _filter_custom_properties(properties: Attributes, filter=None) -> Dict[str, str]:
303341
truncated_properties: Dict[str, str] = {}

sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/export/logs/_exporter.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,8 @@ def _convert_log_to_envelope(log_data: LogData) -> TelemetryItem:
126126
envelope.tags[ContextTagKeys.AI_OPERATION_PARENT_ID] = "{:016x}".format( # type: ignore
127127
log_record.span_id or _DEFAULT_SPAN_ID
128128
)
129+
if _utils._is_any_synthetic_source(log_record.attributes):
130+
envelope.tags[ContextTagKeys.AI_OPERATION_SYNTHETIC_SOURCE] = "True" # type: ignore
129131
# Special use case: Customers want to be able to set location ip on log records
130132
location_ip = trace_utils._get_location_ip(log_record.attributes)
131133
if location_ip:

sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/export/metrics/_exporter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ def _convert_point_to_envelope(
217217
envelope = _utils._create_telemetry_item(point.time_unix_nano)
218218
envelope.name = _METRIC_ENVELOPE_NAME
219219
envelope.tags.update(_utils._populate_part_a_fields(resource)) # type: ignore
220-
if _utils._is_synthetic_source(point.attributes):
220+
if _utils._is_any_synthetic_source(point.attributes):
221221
envelope.tags[ContextTagKeys.AI_OPERATION_SYNTHETIC_SOURCE] = "True" # type: ignore
222222
namespace = None
223223
if scope is not None and _is_metric_namespace_opted_in():

sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/export/trace/_exporter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ def _convert_span_to_envelope(span: ReadableSpan) -> TelemetryItem:
221221
envelope.tags[ContextTagKeys.AI_OPERATION_ID] = "{:032x}".format(span.context.trace_id)
222222
if SpanAttributes.ENDUSER_ID in span.attributes:
223223
envelope.tags[ContextTagKeys.AI_USER_ID] = span.attributes[SpanAttributes.ENDUSER_ID]
224-
if _utils._is_synthetic_source(span.attributes):
224+
if _utils._is_any_synthetic_source(span.attributes):
225225
envelope.tags[ContextTagKeys.AI_OPERATION_SYNTHETIC_SOURCE] = "True"
226226
if span.parent and span.parent.span_id:
227227
envelope.tags[ContextTagKeys.AI_OPERATION_PARENT_ID] = "{:016x}".format(span.parent.span_id)

sdk/monitor/azure-monitor-opentelemetry-exporter/tests/logs/test_logs.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -552,6 +552,70 @@ def test_log_to_envelope_timestamp(self):
552552
self.assertEqual(envelope.time, ns_to_iso_str(record.observed_timestamp))
553553
self._log_data.log_record = old_record
554554

555+
def test_log_to_envelope_synthetic_source(self):
556+
exporter = self._exporter
557+
resource = Resource.create(
558+
{
559+
"service.name": "testServiceName",
560+
"service.namespace": "testServiceNamespace",
561+
"service.instance.id": "testServiceInstanceId",
562+
}
563+
)
564+
log_data = _logs.LogData(
565+
_logs.LogRecord(
566+
timestamp=1646865018558419456,
567+
trace_id=125960616039069540489478540494783893221,
568+
span_id=2909973987304607650,
569+
severity_text="WARNING",
570+
trace_flags=None,
571+
severity_number=SeverityNumber.WARN,
572+
body="Test message",
573+
resource=resource,
574+
attributes={
575+
"test": "attribute",
576+
"user_agent.synthetic.type": "bot",
577+
},
578+
),
579+
InstrumentationScope("test_name"),
580+
)
581+
envelope = exporter._log_to_envelope(log_data)
582+
583+
self.assertEqual(envelope.tags.get(ContextTagKeys.AI_OPERATION_SYNTHETIC_SOURCE), "True")
584+
self.assertEqual(envelope.tags.get(ContextTagKeys.AI_CLOUD_ROLE), "testServiceNamespace.testServiceName")
585+
self.assertEqual(envelope.tags.get(ContextTagKeys.AI_CLOUD_ROLE_INSTANCE), "testServiceInstanceId")
586+
587+
def test_log_to_envelope_synthetic_load_always_on(self):
588+
exporter = self._exporter
589+
resource = Resource.create(
590+
{
591+
"service.name": "testServiceName",
592+
"service.namespace": "testServiceNamespace",
593+
"service.instance.id": "testServiceInstanceId",
594+
}
595+
)
596+
log_data = _logs.LogData(
597+
_logs.LogRecord(
598+
timestamp=1646865018558419456,
599+
trace_id=125960616039069540489478540494783893221,
600+
span_id=2909973987304607650,
601+
severity_text="WARNING",
602+
trace_flags=None,
603+
severity_number=SeverityNumber.WARN,
604+
body="Test message",
605+
resource=resource,
606+
attributes={
607+
"test": "attribute",
608+
"http.user_agent": "Azure-Load-Testing/1.0 AlwaysOn",
609+
},
610+
),
611+
InstrumentationScope("test_name"),
612+
)
613+
envelope = exporter._log_to_envelope(log_data)
614+
615+
self.assertEqual(envelope.tags.get(ContextTagKeys.AI_OPERATION_SYNTHETIC_SOURCE), "True")
616+
self.assertEqual(envelope.tags.get(ContextTagKeys.AI_CLOUD_ROLE), "testServiceNamespace.testServiceName")
617+
self.assertEqual(envelope.tags.get(ContextTagKeys.AI_CLOUD_ROLE_INSTANCE), "testServiceInstanceId")
618+
555619

556620
class TestAzureLogExporterWithDisabledStorage(TestAzureLogExporter):
557621
_exporter_class = partial(AzureMonitorLogExporter, disable_offline_storage=True)

sdk/monitor/azure-monitor-opentelemetry-exporter/tests/test_utils.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -586,3 +586,42 @@ def test_is_synthetic_source_none(self):
586586
def test_is_synthetic_source_other(self):
587587
properties = {"user_agent.synthetic.type": "user"}
588588
self.assertFalse(_utils._is_synthetic_source(properties))
589+
590+
def test_is_synthetic_load_always_on_legacy(self):
591+
properties = {"http.user_agent": "Mozilla/5.0 AlwaysOn"}
592+
self.assertTrue(_utils._is_synthetic_load(properties))
593+
594+
def test_is_synthetic_load_always_on_new_convention(self):
595+
properties = {"user_agent.original": "Azure-Load-Testing/1.0 AlwaysOn"}
596+
self.assertTrue(_utils._is_synthetic_load(properties))
597+
598+
def test_is_synthetic_load_always_on_case_sensitive(self):
599+
properties = {"http.user_agent": "Mozilla/5.0 alwayson"}
600+
self.assertFalse(_utils._is_synthetic_load(properties))
601+
602+
def test_is_synthetic_load_no_user_agent(self):
603+
properties = {}
604+
self.assertFalse(_utils._is_synthetic_load(properties))
605+
606+
def test_is_synthetic_load_normal_user_agent(self):
607+
properties = {"http.user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
608+
self.assertFalse(_utils._is_synthetic_load(properties))
609+
610+
def test_is_any_synthetic_source_bot(self):
611+
properties = {"user_agent.synthetic.type": "bot"}
612+
self.assertTrue(_utils._is_any_synthetic_source(properties))
613+
614+
def test_is_any_synthetic_source_always_on(self):
615+
properties = {"http.user_agent": "Azure-Load-Testing/1.0 AlwaysOn"}
616+
self.assertTrue(_utils._is_any_synthetic_source(properties))
617+
618+
def test_is_any_synthetic_source_both(self):
619+
properties = {
620+
"user_agent.synthetic.type": "bot",
621+
"http.user_agent": "Azure-Load-Testing/1.0 AlwaysOn"
622+
}
623+
self.assertTrue(_utils._is_any_synthetic_source(properties))
624+
625+
def test_is_any_synthetic_source_none(self):
626+
properties = {"http.user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
627+
self.assertFalse(_utils._is_any_synthetic_source(properties))

0 commit comments

Comments
 (0)