Skip to content

Tech debt: let HistoryEncoding work with string-based type annotations in addition to the normal type-based ones #3068

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion src/databricks/labs/ucx/progress/history.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations
import dataclasses
import datetime as dt
import typing
from enum import Enum, EnumMeta
import json
import logging
Expand Down Expand Up @@ -100,7 +101,13 @@ def _get_field_names_with_types(cls, klass: type[Record]) -> tuple[dict[str, typ
- A dictionary of fields to include in the object data, and their type.
- The type of the failures field, if present.
"""
field_names_with_types = {field.name: field.type for field in dataclasses.fields(klass)}
# Ignore the field types returned by dataclasses.fields(): it doesn't resolve string-based annotations (which
# are produced automatically in a __future__.__annotations__ context). Unfortunately the dataclass mechanism
# captures the type hints prior to resolution (which happens later in the class initialization process).
# As such, we rely on dataclasses.fields() for the set of field names, but not the types which we fetch directly.
klass_type_hints = typing.get_type_hints(klass)
field_names = [field.name for field in dataclasses.fields(klass)]
field_names_with_types = {field_name: klass_type_hints[field_name] for field_name in field_names}
if "failures" not in field_names_with_types:
failures_type = None
else:
Expand Down
6 changes: 3 additions & 3 deletions tests/unit/progress/test_history.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def test_historical_encoder_object_id(ownership) -> None:
class _CompoundKey:
a_field: str = "field-a"
b_field: str = "field-b"
c_field: str = "field-c"
c_field: "str" = "field-c" # Annotations can be strings as well.

@property
def d_property(self) -> str:
Expand Down Expand Up @@ -270,7 +270,7 @@ def test_historical_encoder_object_data_values_strings_as_is(ownership) -> None:
@dataclass
class _AClass:
a_field: str = "value"
existing_json_field: str = "[1, 2, 3]"
existing_json_field: "str" = "[1, 2, 3]"
optional_string_field: str | None = "value"

__id_attributes__: ClassVar = ("a_field",)
Expand Down Expand Up @@ -481,7 +481,7 @@ class _BrokenFailures2:
__id_attributes__: ClassVar = ("a_field",)


@pytest.mark.parametrize("klass,broken_type", ((_BrokenFailures1, list[int]), (_BrokenFailures2, None)))
@pytest.mark.parametrize("klass,broken_type", ((_BrokenFailures1, list[int]), (_BrokenFailures2, type(None))))
def test_historical_encoder_failures_verification(
ownership,
klass: type[DataclassWithIdAttributes],
Expand Down