ray-project
diff --git a/‎python/ray/air/BUILD
Lines changed: 1 addition & 1 deletion b/‎python/ray/air/BUILD
Lines changed: 1 addition & 1 deletion
diff --git a/‎python/ray/air/tests/conftest.py
Lines changed: 8 additions & 0 deletions b/‎python/ray/air/tests/conftest.py
Lines changed: 8 additions & 0 deletions
diff --git a/‎python/ray/air/tests/test_arrow.py
Lines changed: 93 additions & 14 deletions b/‎python/ray/air/tests/test_arrow.py
Lines changed: 93 additions & 14 deletions
@@ -49,7 +49,7 @@ py_test(
 py_test(
     name = "test_arrow",
     size = "small",
-    srcs = ["tests/test_arrow.py"],
+    srcs = ["tests/test_arrow.py", "conftest"],
     tags = ["team:ml", "team:data", "ray_data", "exclusive"],
     deps = [":ml_lib"]
 )
 
@@ -13,3 +13,11 @@ def restore_data_context(request):
     original = copy.deepcopy(ray.data.context.DataContext.get_current())
     yield
     ray.data.context.DataContext._set_current(original)
+
+
+@pytest.fixture
+def disable_fallback_to_object_extension(request, restore_data_context):
+    """Disables fallback to ArrowPythonObjectType"""
+    ray.data.context.DataContext.get_current().enable_fallback_to_arrow_object_ext_type = (
+        False
+    )
@@ -4,14 +4,18 @@
 import numpy as np
 import pyarrow as pa
 import pytest
+from packaging.version import parse as parse_version
 
+from ray._private.utils import _get_pyarrow_version
 from ray.air.util.tensor_extensions.arrow import (
     ArrowConversionError,
     _convert_to_pyarrow_native_array,
     _infer_pyarrow_type,
     convert_to_pyarrow_array,
+    ArrowTensorArray,
 )
 from ray.air.util.tensor_extensions.utils import create_ragged_ndarray
+from ray.data import DataContext
 from ray.tests.conftest import *  # noqa
 
 import psutil
@@ -23,16 +27,56 @@ class UserObj:
 
 
 @pytest.mark.parametrize(
-    "numpy_precision, expected_arrow_type",
+    "input",
+    [
+        # Python native lists
+        [
+            [1, 2],
+            [3, 4],
+        ],
+        # Python native tuples
+        [
+            (1, 2),
+            (3, 4),
+        ],
+        # Lists as PA scalars
+        [
+            pa.scalar([1, 2]),
+            pa.scalar([3, 4]),
+        ],
+    ],
+)
+def test_arrow_native_list_conversion(input, disable_fallback_to_object_extension):
+    """Test asserts that nested lists are represented as native Arrow lists
+    upon serialization into Arrow format (and are NOT converted to numpy
+    tensor using extension)"""
+
+    if isinstance(input[0], pa.Scalar) and parse_version(
+        _get_pyarrow_version()
+    ) <= parse_version("13.0.0"):
+        pytest.skip(
+            "Pyarrow < 13.0 not able to properly infer native types from its own Scalars"
+        )
+
+    pa_arr = convert_to_pyarrow_array(input, "a")
+
+    # Should be able to natively convert back to Pyarrow array,
+    # not using any extensions
+    assert pa_arr.type == pa.list_(pa.int64()), pa_arr.type
+    assert pa.array(input) == pa_arr, pa_arr
+
+
+@pytest.mark.parametrize("arg_type", ["list", "ndarray"])
+@pytest.mark.parametrize(
+    "numpy_precision, expected_arrow_timestamp_type",
     [
         ("ms", pa.timestamp("ms")),
         ("us", pa.timestamp("us")),
         ("ns", pa.timestamp("ns")),
-        # Arrow has a special date32 type for dates.
-        ("D", pa.date32()),
         # The coarsest resolution Arrow supports is seconds.
         ("Y", pa.timestamp("s")),
         ("M", pa.timestamp("s")),
+        ("D", pa.timestamp("s")),
         ("h", pa.timestamp("s")),
         ("m", pa.timestamp("s")),
         ("s", pa.timestamp("s")),
@@ -44,26 +88,61 @@ class UserObj:
 )
 def test_convert_datetime_array(
     numpy_precision: str,
-    expected_arrow_type: pa.DataType,
+    expected_arrow_timestamp_type: pa.TimestampType,
+    arg_type: str,
+    restore_data_context,
 ):
-    numpy_array = np.zeros(1, dtype=f"datetime64[{numpy_precision}]")
-
-    pyarrow_array = _convert_to_pyarrow_native_array(numpy_array, "")
-
-    assert pyarrow_array.type == expected_arrow_type
-    assert len(numpy_array) == len(pyarrow_array)
-
-
+    DataContext.get_current().enable_fallback_to_arrow_object_ext_type = False
+
+    ndarray = np.ones(1, dtype=f"datetime64[{numpy_precision}]")
+
+    if arg_type == "ndarray":
+        column_values = ndarray
+    elif arg_type == "list":
+        column_values = [ndarray]
+    else:
+        pytest.fail(f"Unknown type: {arg_type}")
+
+    # Step 1: Convert to PA array
+    converted = convert_to_pyarrow_array(column_values, "")
+
+    if arg_type == "ndarray":
+        expected = pa.array(
+            column_values.astype(f"datetime64[{expected_arrow_timestamp_type.unit}]")
+        )
+    elif arg_type == "list":
+        expected = ArrowTensorArray.from_numpy(
+            [
+                column_values[0].astype(
+                    f"datetime64[{expected_arrow_timestamp_type.unit}]"
+                )
+            ]
+        )
+    else:
+        pytest.fail(f"Unknown type: {arg_type}")
+
+    assert expected.type == converted.type
+    assert expected == converted
+
+
+@pytest.mark.parametrize("arg_type", ["list", "ndarray"])
 @pytest.mark.parametrize("dtype", ["int64", "float64", "datetime64[ns]"])
-def test_infer_type_does_not_leak_memory(dtype):
+def test_infer_type_does_not_leak_memory(arg_type, dtype):
     # Test for https://github.com/apache/arrow/issues/45493.
-    column_values = np.zeros(923040, dtype=dtype)  # A ~7 MiB column
+    ndarray = np.zeros(923040, dtype=dtype)  # A ~7 MiB column
 
     process = psutil.Process()
     gc.collect()
     pa.default_memory_pool().release_unused()
     before = process.memory_info().rss
 
+    if arg_type == "ndarray":
+        column_values = ndarray
+    elif arg_type == "list":
+        column_values = [ndarray]
+    else:
+        pytest.fail(f"Unknown type: {arg_type}")
+
     _infer_pyarrow_type(column_values)
 
     gc.collect()
Original file line number	Diff line number	Diff line change
`@@ -49,7 +49,7 @@ py_test(`
`49`	`49`	`py_test(`
`50`	`50`	`name = "test_arrow",`
`51`	`51`	`size = "small",`
`52`		`- srcs = ["tests/test_arrow.py"],`
	`52`	`+ srcs = ["tests/test_arrow.py", "conftest"],`
`53`	`53`	`tags = ["team:ml", "team:data", "ray_data", "exclusive"],`
`54`	`54`	`deps = [":ml_lib"]`
`55`	`55`	`)`