Deprecate Row.as_dict() (#309)

asnare · web-flow · commit 9aace9e4d278 · 2024-11-07T14:38:53.000+01:00
This PR deprecates the `Row.as_dict()` method: this class is intended to
be interchangeable with Spark's `Row`, which doesn't support
`.as_dict()` method. Having it present leads to subtle bugs where code
works when the execution-based backends are being used but fails when
the Spark-based backend is used instead. Clients should instead use
`.asDict()`.

To signal the deprecation we use Python's warnings mechanism (including
the use of the new annotation in Python 3.13, to help with static code
analysis).
diff --git a/src/databricks/labs/lsql/core.py b/src/databricks/labs/lsql/core.py
@@ -3,9 +3,11 @@
 import json
 import logging
 import random
+import sys
 import threading
 import time
 import types
+import warnings
 from collections.abc import Callable, Iterator
 from datetime import timedelta
 from typing import TYPE_CHECKING, Any
@@ -70,28 +72,39 @@ def __new__(cls, *args, **kwargs):
         return tuple.__new__(cls, args)
 
     @classmethod
-    def factory(cls, col_names: list[str]) -> type:
+    def factory(cls, col_names: list[str]) -> type["Row"]:
         """Create a new Row class with the given column names."""
         return type("Row", (Row,), {"__columns__": col_names})
 
-    def as_dict(self) -> dict[str, Any]:
-        """Convert the row to a dictionary with the same conventions as Databricks SDK."""
-        return dict(zip(self.__columns__, self, strict=True))
+    # If we can mark the method as deprecated via PEP 702 annotation, prefer this because it helps mypy and
+    # PyCharm/IntelliJ detect and flag deprecated use.
+    if sys.version_info >= (3, 13):
+
+        @warnings.deprecated("Using as_dict() on rows is deprecated; use asDict() instead.")  # pylint: disable=no-member
+        def as_dict(self) -> dict[str, Any]:
+            """Convert the row to a dictionary with the same conventions as Databricks SDK."""
+            return self.asDict()
+    else:
+
+        def as_dict(self) -> dict[str, Any]:
+            """Convert the row to a dictionary with the same conventions as Databricks SDK."""
+            warnings.warn("Using as_dict() on rows is deprecated; use asDict() instead.", DeprecationWarning)
+            return self.asDict()
 
     # PySpark's compatibility
     def asDict(self, recursive: bool = False) -> dict[str, Any]:
         _ = recursive
-        return self.as_dict()
+        return dict(zip(self.__columns__, self, strict=True))
 
-    def __eq__(self, other):
+    def __eq__(self, other) -> bool:
         """Check if the rows are equal."""
         if not isinstance(other, Row):
             return False
         # compare rows as dictionaries, because the order
         # of fields in constructor is not guaranteed
-        return self.as_dict() == other.as_dict()
+        return self.asDict() == other.asDict()
 
-    def __contains__(self, item):
+    def __contains__(self, item) -> bool:
         """Check if the column is in the row."""
         return item in self.__columns__
 
@@ -114,7 +127,7 @@ def __getattr__(self, col):
         except ValueError:
             raise AttributeError(col) from None
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         """Get the string representation of the row."""
         return f"Row({', '.join(f'{k}={v!r}' for (k, v) in zip(self.__columns__, self, strict=True))})"
 
@@ -311,7 +324,7 @@ def fetch_all(
         >>>     pickup_time, dropoff_time = row[0], row[1]
         >>>     pickup_zip = row.pickup_zip
         >>>     dropoff_zip = row["dropoff_zip"]
-        >>>     all_fields = row.as_dict()
+        >>>     all_fields = row.asDict()
         >>>     logger.info(f"{pickup_zip}@{pickup_time} -> {dropoff_zip}@{dropoff_time}: {all_fields}")
 
         :param statement: str
@@ -366,7 +379,7 @@ def fetch_one(self, statement: str, disable_magic: bool = False, **kwargs) -> Ro
         >>>     pickup_time, dropoff_time = row[0], row[1]
         >>>     pickup_zip = row.pickup_zip
         >>>     dropoff_zip = row['dropoff_zip']
-        >>>     all_fields = row.as_dict()
+        >>>     all_fields = row.asDict()
         >>>     print(f'{pickup_zip}@{pickup_time} -> {dropoff_zip}@{dropoff_time}: {all_fields}')
 
         :param statement: str
diff --git a/tests/integration/test_core.py b/tests/integration/test_core.py
@@ -45,7 +45,7 @@ def test_sql_execution_partial(ws, env_or_skip):
         pickup_time, dropoff_time = row[0], row[1]
         pickup_zip = row.pickup_zip
         dropoff_zip = row["dropoff_zip"]
-        all_fields = row.as_dict()
+        all_fields = row.asDict()
         logger.info(f"{pickup_zip}@{pickup_time} -> {dropoff_zip}@{dropoff_time}: {all_fields}")
         results.append((pickup_zip, dropoff_zip))
     assert results == [
@@ -83,3 +83,11 @@ def test_fetch_value(ws):
     see = StatementExecutionExt(ws)
     count = see.fetch_value("SELECT COUNT(*) FROM samples.nyctaxi.trips")
     assert count == 21932
+
+
+def test_row_as_dict_deprecated(ws) -> None:
+    see = StatementExecutionExt(ws)
+    row = see.fetch_one("SELECT 1")
+    assert row is not None
+    with pytest.deprecated_call():
+        _ = row.as_dict()
diff --git a/tests/unit/test_core.py b/tests/unit/test_core.py
@@ -38,7 +38,7 @@ def test_row_from_kwargs(row):
     assert "foo" in row
     assert len(row) == 2
     assert list(row) == ["bar", True]
-    assert row.as_dict() == {"foo": "bar", "enabled": True}
+    assert row.asDict() == {"foo": "bar", "enabled": True}
     foo, enabled = row
     assert foo == "bar"
     assert enabled is True