diff --git a/localstack_snapshot/snapshots/prototype.py b/localstack_snapshot/snapshots/prototype.py index 3ddc7b5..533f285 100644 --- a/localstack_snapshot/snapshots/prototype.py +++ b/localstack_snapshot/snapshots/prototype.py @@ -272,6 +272,8 @@ def _transform_dict_to_parseable_values(self, original): self._transform_dict_to_parseable_values(v) if isinstance(v, str) and v.startswith("{"): + # Doesn't handle JSON arrays and nested JSON strings. See JsonStringTransformer. + # TODO for the major release consider having JSON parsing in one place only: either here or in JsonStringTransformer try: json_value = json.loads(v) original[k] = json_value diff --git a/localstack_snapshot/snapshots/transformer.py b/localstack_snapshot/snapshots/transformer.py index a0a81c4..f0a6384 100644 --- a/localstack_snapshot/snapshots/transformer.py +++ b/localstack_snapshot/snapshots/transformer.py @@ -1,8 +1,10 @@ import copy +import json import logging import os import re from datetime import datetime +from json import JSONDecodeError from re import Pattern from typing import Any, Callable, Optional, Protocol @@ -375,3 +377,78 @@ def replace_val(s): f"Registering text pattern '{self.text}' in snapshot with '{self.replacement}'" ) return input_data + + +class JsonStringTransformer: + """ + Parses JSON string at the specified key. + Additionally, attempts to parse any JSON strings inside the parsed JSON + + This transformer complements the default parsing of JSON strings in + localstack_snapshot.snapshots.prototype.SnapshotSession._transform_dict_to_parseable_values + + Shortcomings of the default parser that this transformer addresses: + - parsing of nested JSON strings '{"a": "{\\"b\\":42}"}' + - parsing of JSON arrays at the specified key, e.g. '["a", "b"]' + + Such parsing allows applying transformations further to the elements of the parsed JSON - timestamps, ARNs, etc. + + Such parsing is not done by default because it's not a common use case. + Whether to parse a JSON string or not should be decided by the user on a case by case basis. + Limited general parsing that we already have is preserved for backwards compatibility. + """ + + key: str + + def __init__(self, key: str): + self.key = key + + def transform(self, input_data: dict, *, ctx: TransformContext = None) -> dict: + return self._transform_dict(input_data, ctx=ctx) + + def _transform(self, input_data: Any, ctx: TransformContext = None) -> Any: + if isinstance(input_data, dict): + return self._transform_dict(input_data, ctx=ctx) + elif isinstance(input_data, list): + return self._transform_list(input_data, ctx=ctx) + return input_data + + def _transform_dict(self, input_data: dict, ctx: TransformContext = None) -> dict: + for k, v in input_data.items(): + if k == self.key and isinstance(v, str) and v.strip().startswith(("{", "[")): + try: + SNAPSHOT_LOGGER.debug(f"Replacing string value of {k} with parsed JSON") + json_value = json.loads(v) + input_data[k] = self._transform_nested(json_value) + except JSONDecodeError: + SNAPSHOT_LOGGER.exception( + f'Value mapped to "{k}" key is not a valid JSON string and won\'t be transformed. Value: {v}' + ) + else: + input_data[k] = self._transform(v, ctx=ctx) + return input_data + + def _transform_list(self, input_data: list, ctx: TransformContext = None) -> list: + return [self._transform(item, ctx=ctx) for item in input_data] + + def _transform_nested(self, input_data: Any) -> Any: + """ + Separate method from the main `_transform_dict` one because + it checks every string while the main one attempts to load at specified key only. + This one is implicit, best-effort attempt, + while the main one is explicit about at which key transform should happen + """ + if isinstance(input_data, list): + input_data = [self._transform_nested(item) for item in input_data] + if isinstance(input_data, dict): + for k, v in input_data.items(): + input_data[k] = self._transform_nested(v) + if isinstance(input_data, str) and input_data.strip().startswith(("{", "[")): + try: + json_value = json.loads(input_data) + input_data = self._transform_nested(json_value) + except JSONDecodeError: + SNAPSHOT_LOGGER.debug( + f"The value is not a valid JSON string and won't be transformed. The value: {input_data}" + ) + return input_data diff --git a/localstack_snapshot/snapshots/transformer_utility.py b/localstack_snapshot/snapshots/transformer_utility.py index 7a46651..65e412c 100644 --- a/localstack_snapshot/snapshots/transformer_utility.py +++ b/localstack_snapshot/snapshots/transformer_utility.py @@ -3,9 +3,11 @@ from localstack_snapshot.snapshots.transformer import ( JsonpathTransformer, + JsonStringTransformer, KeyValueBasedTransformer, KeyValueBasedTransformerFunctionReplacement, RegexTransformer, + SortingTransformer, TextTransformer, ) @@ -109,3 +111,27 @@ def text(text: str, replacement: str): :return: TextTransformer """ return TextTransformer(text, replacement) + + @staticmethod + def json_string(key: str) -> JsonStringTransformer: + """Creates a new JsonStringTransformer. If there is a valid JSON text string at specified key + it will be loaded as a regular object or array. + + :param key: key at which JSON string is expected + + :return: JsonStringTransformer + """ + return JsonStringTransformer(key) + + @staticmethod + def sorting(key: str, sorting_fn: Optional[Callable[[...], Any]]) -> SortingTransformer: + """Creates a new SortingTransformer. + + Sorts a list at `key` with the given `sorting_fn` (argument for `sorted(list, key=sorting_fn)`) + + :param key: key at which the list to sort is expected + :param sorting_fn: sorting function + + :return: SortingTransformer + """ + return SortingTransformer(key, sorting_fn) diff --git a/tests/test_transformer.py b/tests/test_transformer.py index 498266e..98a50d4 100644 --- a/tests/test_transformer.py +++ b/tests/test_transformer.py @@ -4,6 +4,7 @@ import pytest from localstack_snapshot.snapshots.transformer import ( + JsonStringTransformer, SortingTransformer, TimestampTransformer, TransformContext, @@ -311,6 +312,57 @@ def test_text(self, value): output = sr(output) assert json.loads(output) == expected + @pytest.mark.parametrize( + "input_value,transformed_value", + [ + pytest.param('{"a": "b"}', {"a": "b"}, id="simple_json_object"), + pytest.param('{\n "a": "b"\n}', {"a": "b"}, id="formatted_json_object"), + pytest.param('\n {"a": "b"}', {"a": "b"}, id="json_with_whitespaces"), + pytest.param('{"a": 42}malformed', '{"a": 42}malformed', id="malformed_json"), + pytest.param('["a", "b"]', ["a", "b"], id="simple_json_list"), + pytest.param('{"a": "{\\"b\\":42}"}', {"a": {"b": 42}}, id="nested_json_object"), + pytest.param( + '{"a": "\\n {\\n \\"b\\":42}"}', + {"a": {"b": 42}}, + id="nested_formatted_json_object_with_whitespaces", + ), + pytest.param( + '{"a": "[{\\"b\\":\\"c\\"}]"}', {"a": [{"b": "c"}]}, id="nested_json_list" + ), + pytest.param( + '{"a": "{\\"b\\":42malformed}"}', + {"a": '{"b":42malformed}'}, + id="malformed_nested_json", + ), + pytest.param("[]", [], id="empty_list"), + pytest.param("{}", {}, id="empty_object"), + pytest.param("", "", id="empty_string"), + ], + ) + def test_json_string(self, input_value, transformed_value): + key = "key" + input_data = {key: input_value} + expected = {key: transformed_value} + + transformer = JsonStringTransformer(key) + + ctx = TransformContext() + output = transformer.transform(input_data, ctx=ctx) + + assert output == expected + + def test_json_string_in_a_nested_key(self): + key = "nested-key-in-an-object-hidden-inside-a-list" + input_data = {"top-level-key": [{key: '{"a": "b"}'}]} + expected = {"top-level-key": [{key: {"a": "b"}}]} + + transformer = JsonStringTransformer(key) + + ctx = TransformContext() + output = transformer.transform(input_data, ctx=ctx) + + assert output == expected + class TestTimestampTransformer: def test_generic_timestamp_transformer(self):