Skip to content

Commit 9037647

Browse files
authored
feat(issues): Add parameterization for traceparent (#93721)
These are currently parameterized as something like the following which is unhelpful for grouping (redacted example): `<int>-<md5>-0a0000000000abcd<int>`. The regex here is intentionally strict and will only support the current version (`00`) to help ensure this is conservative in matching. The option for this is done outside of our normal path since we need the `traceparent` parameterization to happen before `int`/`md5` in the order of operations. Once this is rolled out, a follow-up PR will clean up quite a bit of our experiment logic here. See also: https://www.w3.org/TR/trace-context/#traceparent-header
1 parent 69d494d commit 9037647

File tree

4 files changed

+44
-22
lines changed

4 files changed

+44
-22
lines changed

src/sentry/grouping/parameterization.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,9 @@ def compiled_pattern(self) -> re.Pattern[str]:
9494
)
9595
""",
9696
),
97+
ParameterizationRegex(
98+
name="traceparent", raw_pattern=r"""\b00-[0-9a-f]{32}-[0-9a-f]{16}-0[01]\b"""
99+
),
97100
ParameterizationRegex(
98101
name="uuid",
99102
raw_pattern=r"""\b[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\b""",

src/sentry/grouping/strategies/message.py

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,24 @@
3737
"bool",
3838
)
3939

40+
REGEX_PATTERN_KEYS_WITH_TRACEPARENT = (
41+
"email",
42+
"url",
43+
"hostname",
44+
"ip",
45+
"traceparent",
46+
"uuid",
47+
"sha1",
48+
"md5",
49+
"date",
50+
"duration",
51+
"hex",
52+
"float",
53+
"int",
54+
"quoted_str",
55+
"bool",
56+
)
57+
4058
EXPERIMENT_PROJECTS = [ # Active internal Sentry projects
4159
1,
4260
11276,
@@ -71,11 +89,7 @@ def normalize_message_for_grouping(message: str, event: Event, share_analytics:
7189
if trimmed != message:
7290
trimmed += "..."
7391

74-
parameterizer = Parameterizer(
75-
regex_pattern_keys=REGEX_PATTERN_KEYS, experiments=(UniqueIdExperiment,)
76-
)
77-
78-
def _shoudl_run_experiment(experiment_name: str) -> bool:
92+
def _should_run_experiment(experiment_name: str) -> bool:
7993
return bool(
8094
not is_self_hosted()
8195
and event.project_id
@@ -87,7 +101,16 @@ def _shoudl_run_experiment(experiment_name: str) -> bool:
87101
)
88102
)
89103

90-
normalized = parameterizer.parameterize_all(trimmed, _shoudl_run_experiment)
104+
parameterizer = Parameterizer(
105+
regex_pattern_keys=REGEX_PATTERN_KEYS, experiments=(UniqueIdExperiment,)
106+
)
107+
if _should_run_experiment("traceparent"):
108+
parameterizer = Parameterizer(
109+
regex_pattern_keys=REGEX_PATTERN_KEYS_WITH_TRACEPARENT,
110+
experiments=(UniqueIdExperiment,),
111+
)
112+
113+
normalized = parameterizer.parameterize_all(trimmed, _should_run_experiment)
91114

92115
for experiment in parameterizer.get_successful_experiments():
93116
if share_analytics and experiment.counter < 100:

src/sentry/options/defaults.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2624,6 +2624,11 @@
26242624
default=0.0,
26252625
flags=FLAG_ADMIN_MODIFIABLE | FLAG_AUTOMATOR_MODIFIABLE | FLAG_RATE,
26262626
)
2627+
register(
2628+
"grouping.experiments.parameterization.traceparent",
2629+
default=0.0,
2630+
flags=FLAG_ADMIN_MODIFIABLE | FLAG_AUTOMATOR_MODIFIABLE | FLAG_RATE,
2631+
)
26272632

26282633
# TODO: For now, only a small number of projects are going through a grouping config transition at
26292634
# any given time, so we're sampling at 100% in order to be able to get good signal. Once we've fully

tests/sentry/grouping/test_parameterization.py

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,27 +7,13 @@
77
Parameterizer,
88
UniqueIdExperiment,
99
)
10+
from sentry.grouping.strategies.message import REGEX_PATTERN_KEYS_WITH_TRACEPARENT
1011

1112

1213
@pytest.fixture
1314
def parameterizer():
1415
return Parameterizer(
15-
regex_pattern_keys=(
16-
"email",
17-
"url",
18-
"hostname",
19-
"ip",
20-
"uuid",
21-
"sha1",
22-
"md5",
23-
"date",
24-
"duration",
25-
"hex",
26-
"float",
27-
"int",
28-
"quoted_str",
29-
"bool",
30-
),
16+
regex_pattern_keys=REGEX_PATTERN_KEYS_WITH_TRACEPARENT,
3117
experiments=(UniqueIdExperiment,),
3218
)
3319

@@ -130,6 +116,11 @@ def parameterizer():
130116
("hex", """blah 0x9af8c3b had a problem""", """blah <hex> had a problem"""),
131117
("float", """blah 0.23 had a problem""", """blah <float> had a problem"""),
132118
("int", """blah 23 had a problem""", """blah <int> had a problem"""),
119+
(
120+
"traceparent",
121+
"""traceparent: 00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01""",
122+
"""traceparent: <traceparent>""",
123+
),
133124
("quoted str", """blah b="1" had a problem""", """blah b=<quoted_str> had a problem"""),
134125
("bool", """blah a=true had a problem""", """blah a=<bool> had a problem"""),
135126
(

0 commit comments

Comments
 (0)