Skip to content

Commit 9aace9e

Browse files
authored
Deprecate Row.as_dict() (#309)
This PR deprecates the `Row.as_dict()` method: this class is intended to be interchangeable with Spark's `Row`, which doesn't support `.as_dict()` method. Having it present leads to subtle bugs where code works when the execution-based backends are being used but fails when the Spark-based backend is used instead. Clients should instead use `.asDict()`. To signal the deprecation we use Python's warnings mechanism (including the use of the new annotation in Python 3.13, to help with static code analysis).
1 parent 94ed7f0 commit 9aace9e

File tree

3 files changed

+34
-13
lines changed

3 files changed

+34
-13
lines changed

src/databricks/labs/lsql/core.py

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@
33
import json
44
import logging
55
import random
6+
import sys
67
import threading
78
import time
89
import types
10+
import warnings
911
from collections.abc import Callable, Iterator
1012
from datetime import timedelta
1113
from typing import TYPE_CHECKING, Any
@@ -70,28 +72,39 @@ def __new__(cls, *args, **kwargs):
7072
return tuple.__new__(cls, args)
7173

7274
@classmethod
73-
def factory(cls, col_names: list[str]) -> type:
75+
def factory(cls, col_names: list[str]) -> type["Row"]:
7476
"""Create a new Row class with the given column names."""
7577
return type("Row", (Row,), {"__columns__": col_names})
7678

77-
def as_dict(self) -> dict[str, Any]:
78-
"""Convert the row to a dictionary with the same conventions as Databricks SDK."""
79-
return dict(zip(self.__columns__, self, strict=True))
79+
# If we can mark the method as deprecated via PEP 702 annotation, prefer this because it helps mypy and
80+
# PyCharm/IntelliJ detect and flag deprecated use.
81+
if sys.version_info >= (3, 13):
82+
83+
@warnings.deprecated("Using as_dict() on rows is deprecated; use asDict() instead.") # pylint: disable=no-member
84+
def as_dict(self) -> dict[str, Any]:
85+
"""Convert the row to a dictionary with the same conventions as Databricks SDK."""
86+
return self.asDict()
87+
else:
88+
89+
def as_dict(self) -> dict[str, Any]:
90+
"""Convert the row to a dictionary with the same conventions as Databricks SDK."""
91+
warnings.warn("Using as_dict() on rows is deprecated; use asDict() instead.", DeprecationWarning)
92+
return self.asDict()
8093

8194
# PySpark's compatibility
8295
def asDict(self, recursive: bool = False) -> dict[str, Any]:
8396
_ = recursive
84-
return self.as_dict()
97+
return dict(zip(self.__columns__, self, strict=True))
8598

86-
def __eq__(self, other):
99+
def __eq__(self, other) -> bool:
87100
"""Check if the rows are equal."""
88101
if not isinstance(other, Row):
89102
return False
90103
# compare rows as dictionaries, because the order
91104
# of fields in constructor is not guaranteed
92-
return self.as_dict() == other.as_dict()
105+
return self.asDict() == other.asDict()
93106

94-
def __contains__(self, item):
107+
def __contains__(self, item) -> bool:
95108
"""Check if the column is in the row."""
96109
return item in self.__columns__
97110

@@ -114,7 +127,7 @@ def __getattr__(self, col):
114127
except ValueError:
115128
raise AttributeError(col) from None
116129

117-
def __repr__(self):
130+
def __repr__(self) -> str:
118131
"""Get the string representation of the row."""
119132
return f"Row({', '.join(f'{k}={v!r}' for (k, v) in zip(self.__columns__, self, strict=True))})"
120133

@@ -311,7 +324,7 @@ def fetch_all(
311324
>>> pickup_time, dropoff_time = row[0], row[1]
312325
>>> pickup_zip = row.pickup_zip
313326
>>> dropoff_zip = row["dropoff_zip"]
314-
>>> all_fields = row.as_dict()
327+
>>> all_fields = row.asDict()
315328
>>> logger.info(f"{pickup_zip}@{pickup_time} -> {dropoff_zip}@{dropoff_time}: {all_fields}")
316329
317330
:param statement: str
@@ -366,7 +379,7 @@ def fetch_one(self, statement: str, disable_magic: bool = False, **kwargs) -> Ro
366379
>>> pickup_time, dropoff_time = row[0], row[1]
367380
>>> pickup_zip = row.pickup_zip
368381
>>> dropoff_zip = row['dropoff_zip']
369-
>>> all_fields = row.as_dict()
382+
>>> all_fields = row.asDict()
370383
>>> print(f'{pickup_zip}@{pickup_time} -> {dropoff_zip}@{dropoff_time}: {all_fields}')
371384
372385
:param statement: str

tests/integration/test_core.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def test_sql_execution_partial(ws, env_or_skip):
4545
pickup_time, dropoff_time = row[0], row[1]
4646
pickup_zip = row.pickup_zip
4747
dropoff_zip = row["dropoff_zip"]
48-
all_fields = row.as_dict()
48+
all_fields = row.asDict()
4949
logger.info(f"{pickup_zip}@{pickup_time} -> {dropoff_zip}@{dropoff_time}: {all_fields}")
5050
results.append((pickup_zip, dropoff_zip))
5151
assert results == [
@@ -83,3 +83,11 @@ def test_fetch_value(ws):
8383
see = StatementExecutionExt(ws)
8484
count = see.fetch_value("SELECT COUNT(*) FROM samples.nyctaxi.trips")
8585
assert count == 21932
86+
87+
88+
def test_row_as_dict_deprecated(ws) -> None:
89+
see = StatementExecutionExt(ws)
90+
row = see.fetch_one("SELECT 1")
91+
assert row is not None
92+
with pytest.deprecated_call():
93+
_ = row.as_dict()

tests/unit/test_core.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def test_row_from_kwargs(row):
3838
assert "foo" in row
3939
assert len(row) == 2
4040
assert list(row) == ["bar", True]
41-
assert row.as_dict() == {"foo": "bar", "enabled": True}
41+
assert row.asDict() == {"foo": "bar", "enabled": True}
4242
foo, enabled = row
4343
assert foo == "bar"
4444
assert enabled is True

0 commit comments

Comments
 (0)