Skip to content

Allow key value based transformers with custom replacement function #10

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 23 additions & 8 deletions localstack_snapshot/snapshots/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,35 +184,36 @@ def replace_val(s):
return input_data


class KeyValueBasedTransformer:
class KeyValueBasedTransformerFunctionReplacement:
def __init__(
self,
match_fn: Callable[[str, Any], Optional[str]],
replacement: str,
replacement_function: [Callable[[str, Any], str]],
replace_reference: bool = True,
):
self.match_fn = match_fn
self.replacement = replacement
self.replacement_function = replacement_function
self.replace_reference = replace_reference

def transform(self, input_data: dict, *, ctx: TransformContext) -> dict:
for k, v in input_data.items():
if (match_result := self.match_fn(k, v)) is not None:
replacement = self.replacement_function(k, v)
if self.replace_reference:
_register_serialized_reference_replacement(
ctx, reference_value=match_result, replacement=self.replacement
ctx, reference_value=match_result, replacement=replacement
)
else:
if isinstance(v, str):
SNAPSHOT_LOGGER.debug(
f"Replacing value for key '{k}': Match result '{match_result:.200s}' with '{self.replacement}'. (Original value: {str(v)})"
f"Replacing value for key '{k}': Match result '{match_result:.200s}' with '{replacement}'. (Original value: {str(v)})"
)
input_data[k] = v.replace(match_result, self.replacement)
input_data[k] = v.replace(match_result, replacement)
else:
SNAPSHOT_LOGGER.debug(
f"Replacing value for key '{k}' with '{self.replacement}'. (Original value: {str(v)})"
f"Replacing value for key '{k}' with '{replacement}'. (Original value: {str(v)})"
)
input_data[k] = self.replacement
input_data[k] = replacement
elif isinstance(v, list) and len(v) > 0:
for i in range(0, len(v)):
if isinstance(v[i], dict):
Expand All @@ -223,6 +224,20 @@ def transform(self, input_data: dict, *, ctx: TransformContext) -> dict:
return input_data


class KeyValueBasedTransformer(KeyValueBasedTransformerFunctionReplacement):
def __init__(
self,
match_fn: Callable[[str, Any], Optional[str]],
replacement: str,
replace_reference: bool = True,
):
super().__init__(
match_fn=match_fn,
replacement_function=lambda k, v: replacement,
replace_reference=replace_reference,
)


class GenericTransformer:
def __init__(self, fn: Callable[[dict, TransformContext], dict]):
self.fn = fn
Expand Down
30 changes: 29 additions & 1 deletion localstack_snapshot/snapshots/transformer_utility.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from re import Pattern
from typing import Optional
from typing import Any, Callable, Optional

from localstack_snapshot.snapshots.transformer import (
JsonpathTransformer,
KeyValueBasedTransformer,
KeyValueBasedTransformerFunctionReplacement,
RegexTransformer,
TextTransformer,
)
Expand Down Expand Up @@ -38,6 +39,33 @@ def key_value(
replace_reference=reference_replacement,
)

@staticmethod
def key_value_replacement_function(
key: str,
replacement_function: Callable[[str, Any], str] = None,
reference_replacement: bool = True,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thought: so far I've seen reference_replacement as not too intuitive when defaulted to True, especially because it can change anything in the snapshot, including keys. Many times it goes like this: adding key_value transformer, see that comparison breaks, set reference_replacement to False.

Maybe we shouldn't set a default at all to this new enhanced function so that with each use the user thinks about whether they need a reference replacement. I see a drawback that the behavior would then differ from existing transformers, but also have an opinion that the default makes more harm than good.

WDYT?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting, I made a different observation - usually we want to have reference replacements where possible. We just have to avoid reference replacing values in predefined keys - usually this happens if the transformer matches too much, as random data is not usually in our snapshot keys.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, thanks for sharing!. I do see value in reference replacements by default, just not in keys. This is a separate discussion though, happy to leave current behavior for this transformer. 👍

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with the key replacement, this is more a side effect than anything else. Happy to work together to fix this in the future!

):
"""Creates a new KeyValueBasedTransformer. If the key matches, the value will be replaced.

:param key: the name of the key which should be replaced
:param replacement_function: The function calculating the replacement. Will be passed the key and value of the replaced pair.
By default it is the key-name in lowercase, separated with hyphen
:param reference_replacement: if False, only the original value for this key will be replaced.
If True all references of this value will be replaced (using a regex pattern), for the entire test case.
In this case, the replaced value will be nummerated as well.
Default: True

:return: KeyValueBasedTransformer
"""
replacement_function = replacement_function or (
lambda x, y: _replace_camel_string_with_hyphen(key)
)
return KeyValueBasedTransformerFunctionReplacement(
lambda k, v: v if k == key and (v is not None and v != "") else None,
replacement_function=replacement_function,
replace_reference=reference_replacement,
)

@staticmethod
def jsonpath(jsonpath: str, value_replacement: str, reference_replacement: bool = True):
"""Creates a new JsonpathTransformer. If the jsonpath matches, the value will be replaced.
Expand Down
47 changes: 47 additions & 0 deletions tests/test_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,53 @@ def test_key_value_replacement(self):

assert json.loads(tmp) == expected_key_value_reference

def test_key_value_replacement_custom_function(self):
input = {
"hello": "world",
"hello2": "again",
"path": {"to": {"anotherkey": "hi", "inside": {"hello": "inside"}}},
}

key_value = TransformerUtility.key_value_replacement_function(
"hello",
replacement_function=lambda k, v: f"placeholder({len(v)})",
reference_replacement=False,
)

expected_key_value = {
"hello": "placeholder(5)",
"hello2": "again",
"path": {"to": {"anotherkey": "hi", "inside": {"hello": "placeholder(6)"}}},
}

copied = copy.deepcopy(input)
ctx = TransformContext()
assert key_value.transform(copied, ctx=ctx) == expected_key_value
assert ctx.serialized_replacements == []

copied = copy.deepcopy(input)
key_value = TransformerUtility.key_value_replacement_function(
"hello",
replacement_function=lambda k, v: f"placeholder({len(v)})",
reference_replacement=True,
)
# replacement counters are per replacement key, so it will start from 1 again.
expected_key_value_reference = {
"hello": "<placeholder(5):1>",
"hello2": "again",
"path": {
"to": {"anotherkey": "hi", "<placeholder(6):1>": {"hello": "<placeholder(6):1>"}}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: test it with another same-length value to see if we get :2 in replacement correctly. Also for case without reference replacement would be great to test what happens when function gives the same result for two different strings.
I added a PR for that: #11

},
}
assert key_value.transform(copied, ctx=ctx) == copied
assert len(ctx.serialized_replacements) == 2

tmp = json.dumps(copied, default=str)
for sr in ctx.serialized_replacements:
tmp = sr(tmp)

assert json.loads(tmp) == expected_key_value_reference

def test_key_value_replacement_with_falsy_value(self):
input = {
"hello": "world",
Expand Down