Skip to content

Commit 6477736

Browse files
authored
fix(eap): Handle user attributes colliding with sentry attributes (#95369)
Users can send custom attributes that happen to be named the same as the public aliases as sentry attributes. For example, they can name an attribute `timestamp`. When this happens, we should not return its key as `timestamp`. Instead, we should return it as `tags[{name},{type}]` to disambiguate it from the sentry defined attribute.
1 parent f91f02d commit 6477736

File tree

3 files changed

+176
-116
lines changed

3 files changed

+176
-116
lines changed

src/sentry/search/eap/utils.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from sentry_protos.snuba.v1.trace_item_attribute_pb2 import Function
1313

1414
from sentry.exceptions import InvalidSearchQuery
15+
from sentry.search.eap.columns import ResolvedAttribute
1516
from sentry.search.eap.constants import SAMPLING_MODE_MAP
1617
from sentry.search.eap.ourlogs.attributes import (
1718
LOGS_INTERNAL_TO_PUBLIC_ALIAS_MAPPINGS,
@@ -20,8 +21,10 @@
2021
LOGS_PRIVATE_ATTRIBUTES,
2122
LOGS_REPLACEMENT_ATTRIBUTES,
2223
LOGS_REPLACEMENT_MAP,
24+
OURLOG_ATTRIBUTE_DEFINITIONS,
2325
)
2426
from sentry.search.eap.spans.attributes import (
27+
SPAN_ATTRIBUTE_DEFINITIONS,
2528
SPAN_INTERNAL_TO_SECONDARY_ALIASES_MAPPING,
2629
SPANS_INTERNAL_TO_PUBLIC_ALIAS_MAPPINGS,
2730
SPANS_PRIVATE_ATTRIBUTE_PREFIXES,
@@ -124,6 +127,11 @@ def validate_sampling(sampling_mode: SAMPLING_MODES | None) -> DownsampledStorag
124127
SupportedTraceItemType.LOGS: LOGS_INTERNAL_TO_PUBLIC_ALIAS_MAPPINGS,
125128
}
126129

130+
PUBLIC_ALIAS_TO_INTERNAL_MAPPING: dict[SupportedTraceItemType, dict[str, ResolvedAttribute]] = {
131+
SupportedTraceItemType.SPANS: SPAN_ATTRIBUTE_DEFINITIONS,
132+
SupportedTraceItemType.LOGS: OURLOG_ATTRIBUTE_DEFINITIONS,
133+
}
134+
127135

128136
PRIVATE_ATTRIBUTES: dict[SupportedTraceItemType, set[str]] = {
129137
SupportedTraceItemType.SPANS: SPANS_PRIVATE_ATTRIBUTES,
@@ -158,7 +166,17 @@ def translate_internal_to_public_alias(
158166
item_type: SupportedTraceItemType,
159167
) -> str | None:
160168
mapping = INTERNAL_TO_PUBLIC_ALIAS_MAPPINGS.get(item_type, {}).get(type, {})
161-
return mapping.get(internal_alias)
169+
public_alias = mapping.get(internal_alias)
170+
if public_alias is not None:
171+
return public_alias
172+
173+
resolved_column = PUBLIC_ALIAS_TO_INTERNAL_MAPPING.get(item_type, {}).get(internal_alias)
174+
if resolved_column is not None:
175+
# if there is a known public alias with this exact name, it means we need to wrap
176+
# it in the explicitly typed tags syntax in order for it to reference the correct column
177+
return f"tags[{internal_alias},{type}]"
178+
179+
return None
162180

163181

164182
def get_secondary_aliases(

tests/snuba/api/endpoints/test_organization_trace_item_attributes.py

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ def test_body_attribute(self):
159159

160160
assert response.status_code == 200, response.content
161161
keys = {item["key"] for item in response.data}
162-
assert keys == {"severity", "message", "project"}
162+
assert keys == {"severity", "message", "project", "tags[message,string]"}
163163

164164
def test_disallowed_attributes(self):
165165
logs = [
@@ -181,6 +181,29 @@ def test_disallowed_attributes(self):
181181
keys = {item["key"] for item in response.data}
182182
assert keys == {"severity", "message", "project", "sentry.item_type2"}
183183

184+
def test_attribute_collision(self):
185+
logs = [
186+
self.create_ourlog(
187+
organization=self.organization,
188+
project=self.project,
189+
attributes={"timestamp": "bar", "severity": "baz"},
190+
),
191+
]
192+
193+
self.store_ourlogs(logs)
194+
195+
response = self.do_request()
196+
197+
assert response.status_code == 200, response.content
198+
keys = {item["key"] for item in response.data}
199+
assert keys == {
200+
"message",
201+
"project",
202+
"severity",
203+
"tags[severity,string]",
204+
"tags[timestamp,string]",
205+
}
206+
184207

185208
class OrganizationTraceItemAttributesEndpointSpansTest(
186209
OrganizationTraceItemAttributesEndpointTestBase, BaseSpansTestCase
@@ -465,6 +488,39 @@ def test_tags_list_sentry_conventions(self):
465488
key=itemgetter("key"),
466489
)
467490

491+
def test_attribute_collision(self):
492+
self.store_segment(
493+
self.project.id,
494+
uuid4().hex,
495+
uuid4().hex,
496+
span_id=uuid4().hex[:16],
497+
organization_id=self.organization.id,
498+
parent_span_id=None,
499+
timestamp=before_now(days=0, minutes=10).replace(microsecond=0),
500+
transaction="foo",
501+
duration=100,
502+
exclusive_time=100,
503+
tags={
504+
"span.op": "foo",
505+
"span.duration": "bar",
506+
},
507+
is_eap=True,
508+
)
509+
510+
response = self.do_request()
511+
assert response.status_code == 200, response.data
512+
assert response.data == [
513+
{
514+
"key": "span.description",
515+
"name": "span.description",
516+
"secondaryAliases": ["description", "message"],
517+
},
518+
{"key": "project", "name": "project"},
519+
{"key": "transaction", "name": "transaction"},
520+
{"key": "tags[span.duration,string]", "name": "tags[span.duration,string]"},
521+
{"key": "tags[span.op,string]", "name": "tags[span.op,string]"},
522+
]
523+
468524

469525
class OrganizationTraceItemAttributeValuesEndpointBaseTest(APITestCase, SnubaTestCase):
470526
feature_flags: dict[str, bool]

tests/snuba/api/endpoints/test_project_trace_item_details.py

Lines changed: 100 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from sentry.testutils.helpers.datetime import before_now
88

99

10-
class ProjectEventDetailsTest(APITestCase, SnubaTestCase, OurLogTestCase, SpanTestCase):
10+
class ProjectTraceItemDetailsEndpointTest(APITestCase, SnubaTestCase, OurLogTestCase, SpanTestCase):
1111
def setUp(self):
1212
super().setUp()
1313
self.login_as(user=self.user)
@@ -38,47 +38,27 @@ def do_request(self, event_type: str, item_id: str, features=None):
3838
)
3939

4040
def test_simple(self):
41-
logs = [
42-
self.create_ourlog(
43-
{
44-
"body": "foo",
45-
"trace_id": self.trace_uuid,
41+
log = self.create_ourlog(
42+
{
43+
"body": "foo",
44+
"trace_id": self.trace_uuid,
45+
},
46+
attributes={
47+
"str_attr": {
48+
"string_value": "1",
4649
},
47-
attributes={
48-
"str_attr": {
49-
"string_value": "1",
50-
},
51-
"int_attr": {"int_value": 2},
52-
"float_attr": {
53-
"double_value": 3.0,
54-
},
55-
"bool_attr": {
56-
"bool_value": True,
57-
},
50+
"int_attr": {"int_value": 2},
51+
"float_attr": {
52+
"double_value": 3.0,
53+
},
54+
"bool_attr": {
55+
"bool_value": True,
5856
},
59-
timestamp=self.one_min_ago,
60-
),
61-
]
62-
self.store_ourlogs(logs)
63-
item_list_url = reverse(
64-
"sentry-api-0-organization-events",
65-
kwargs={
66-
"organization_id_or_slug": self.project.organization.slug,
6757
},
58+
timestamp=self.one_min_ago,
6859
)
69-
with self.feature(self.features):
70-
item_list_response = self.client.get(
71-
item_list_url,
72-
{
73-
"field": ["message", "sentry.item_id", "sentry.trace_id"],
74-
"query": "",
75-
"orderby": "sentry.item_id",
76-
"project": self.project.id,
77-
"dataset": "ourlogs",
78-
},
79-
)
80-
assert item_list_response.data is not None
81-
item_id = item_list_response.data["data"][0]["sentry.item_id"]
60+
self.store_ourlogs([log])
61+
item_id = uuid.UUID(bytes=bytes(reversed(log.item_id))).hex
8262

8363
trace_details_response = self.do_request("logs", item_id)
8464

@@ -113,47 +93,27 @@ def test_simple(self):
11393
)
11494

11595
def test_simple_using_logs_item_type(self):
116-
logs = [
117-
self.create_ourlog(
118-
{
119-
"body": "foo",
120-
"trace_id": self.trace_uuid,
96+
log = self.create_ourlog(
97+
{
98+
"body": "foo",
99+
"trace_id": self.trace_uuid,
100+
},
101+
attributes={
102+
"str_attr": {
103+
"string_value": "1",
121104
},
122-
attributes={
123-
"str_attr": {
124-
"string_value": "1",
125-
},
126-
"int_attr": {"int_value": 2},
127-
"float_attr": {
128-
"double_value": 3.0,
129-
},
130-
"bool_attr": {
131-
"bool_value": True,
132-
},
105+
"int_attr": {"int_value": 2},
106+
"float_attr": {
107+
"double_value": 3.0,
108+
},
109+
"bool_attr": {
110+
"bool_value": True,
133111
},
134-
timestamp=self.one_min_ago,
135-
),
136-
]
137-
self.store_ourlogs(logs)
138-
item_list_url = reverse(
139-
"sentry-api-0-organization-events",
140-
kwargs={
141-
"organization_id_or_slug": self.project.organization.slug,
142112
},
113+
timestamp=self.one_min_ago,
143114
)
144-
with self.feature(self.features):
145-
item_list_response = self.client.get(
146-
item_list_url,
147-
{
148-
"field": ["message", "sentry.item_id", "sentry.trace_id"],
149-
"query": "",
150-
"orderby": "sentry.item_id",
151-
"project": self.project.id,
152-
"dataset": "ourlogs",
153-
},
154-
)
155-
assert item_list_response.data is not None
156-
item_id = item_list_response.data["data"][0]["sentry.item_id"]
115+
self.store_ourlogs([log])
116+
item_id = uuid.UUID(bytes=bytes(reversed(log.item_id))).hex
157117

158118
trace_details_response = self.do_request("logs", item_id)
159119

@@ -325,49 +285,29 @@ def test_simple_using_spans_item_type_with_sentry_conventions(self):
325285
)
326286

327287
def test_logs_with_a_meta_key(self):
328-
logs = [
329-
self.create_ourlog(
330-
{
331-
"body": "[Filtered]",
332-
"trace_id": self.trace_uuid,
288+
log = self.create_ourlog(
289+
{
290+
"body": "[Filtered]",
291+
"trace_id": self.trace_uuid,
292+
},
293+
attributes={
294+
"str_attr": {
295+
"string_value": "1",
333296
},
334-
attributes={
335-
"str_attr": {
336-
"string_value": "1",
337-
},
338-
"sentry._meta.fields.attributes.sentry.body": '{"length": 300, "reason": "value too long"}',
339-
"sentry._meta.fields.attributes.float_attr": '{"unit": "float"}',
340-
"int_attr": {"int_value": 2},
341-
"float_attr": {
342-
"double_value": 3.0,
343-
},
344-
"bool_attr": {
345-
"bool_value": True,
346-
},
297+
"sentry._meta.fields.attributes.sentry.body": '{"length": 300, "reason": "value too long"}',
298+
"sentry._meta.fields.attributes.float_attr": '{"unit": "float"}',
299+
"int_attr": {"int_value": 2},
300+
"float_attr": {
301+
"double_value": 3.0,
302+
},
303+
"bool_attr": {
304+
"bool_value": True,
347305
},
348-
timestamp=self.one_min_ago,
349-
),
350-
]
351-
self.store_ourlogs(logs)
352-
item_list_url = reverse(
353-
"sentry-api-0-organization-events",
354-
kwargs={
355-
"organization_id_or_slug": self.project.organization.slug,
356306
},
307+
timestamp=self.one_min_ago,
357308
)
358-
with self.feature(self.features):
359-
item_list_response = self.client.get(
360-
item_list_url,
361-
{
362-
"field": ["message", "sentry.item_id", "sentry.trace_id"],
363-
"query": "",
364-
"orderby": "sentry.item_id",
365-
"project": self.project.id,
366-
"dataset": "ourlogs",
367-
},
368-
)
369-
assert item_list_response.data is not None
370-
item_id = item_list_response.data["data"][0]["sentry.item_id"]
309+
self.store_ourlogs([log])
310+
item_id = uuid.UUID(bytes=bytes(reversed(log.item_id))).hex
371311

372312
trace_details_response = self.do_request("logs", item_id)
373313

@@ -407,3 +347,49 @@ def test_logs_with_a_meta_key(self):
407347
).isoformat()
408348
+ "Z",
409349
}
350+
351+
def test_user_attributes_collide_with_sentry_attributes(self):
352+
log = self.create_ourlog(
353+
{
354+
"body": "foo",
355+
"trace_id": self.trace_uuid,
356+
},
357+
attributes={"timestamp": "bar", "severity": "baz"},
358+
timestamp=self.one_min_ago,
359+
)
360+
361+
self.store_ourlogs([log])
362+
item_id = uuid.UUID(bytes=bytes(reversed(log.item_id))).hex
363+
364+
trace_details_response = self.do_request("logs", item_id)
365+
assert trace_details_response.status_code == 200, trace_details_response.content
366+
367+
timestamp_nanos = int(self.one_min_ago.timestamp() * 1_000_000_000)
368+
assert trace_details_response.data == {
369+
"attributes": [
370+
{"name": "project_id", "type": "int", "value": str(self.project.id)},
371+
{"name": "severity_number", "type": "int", "value": "0"},
372+
{
373+
"name": "tags[sentry.timestamp_nanos,number]",
374+
"type": "int",
375+
"value": str(timestamp_nanos),
376+
},
377+
{
378+
"name": "tags[sentry.timestamp_precise,number]",
379+
"type": "int",
380+
"value": str(timestamp_nanos),
381+
},
382+
{"name": "message", "type": "str", "value": "foo"},
383+
{"name": "severity", "type": "str", "value": "INFO"},
384+
{"name": "tags[severity,string]", "type": "str", "value": "baz"},
385+
{"name": "tags[timestamp,string]", "type": "str", "value": "bar"},
386+
{"name": "trace", "type": "str", "value": self.trace_uuid},
387+
],
388+
"meta": {},
389+
"itemId": item_id,
390+
"timestamp": self.one_min_ago.replace(
391+
microsecond=0,
392+
tzinfo=None,
393+
).isoformat()
394+
+ "Z",
395+
}

0 commit comments

Comments
 (0)