pandas-dev · WillAyd · May 7, 2024 · May 6, 2024 · May 6, 2024 · May 6, 2024
diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -672,47 +672,47 @@ def _create_mi_with_dt64tz_level():
 
 
 indices_dict = {
-    "string": Index([f"pandas_{i}" for i in range(100)]),
-    "datetime": date_range("2020-01-01", periods=100),
-    "datetime-tz": date_range("2020-01-01", periods=100, tz="US/Pacific"),
-    "period": period_range("2020-01-01", periods=100, freq="D"),
-    "timedelta": timedelta_range(start="1 day", periods=100, freq="D"),
-    "range": RangeIndex(100),
-    "int8": Index(np.arange(100), dtype="int8"),
-    "int16": Index(np.arange(100), dtype="int16"),
-    "int32": Index(np.arange(100), dtype="int32"),
-    "int64": Index(np.arange(100), dtype="int64"),
-    "uint8": Index(np.arange(100), dtype="uint8"),
-    "uint16": Index(np.arange(100), dtype="uint16"),
-    "uint32": Index(np.arange(100), dtype="uint32"),
-    "uint64": Index(np.arange(100), dtype="uint64"),
-    "float32": Index(np.arange(100), dtype="float32"),
-    "float64": Index(np.arange(100), dtype="float64"),
+    "string": Index([f"pandas_{i}" for i in range(10)]),
+    "datetime": date_range("2020-01-01", periods=10),
+    "datetime-tz": date_range("2020-01-01", periods=10, tz="US/Pacific"),
+    "period": period_range("2020-01-01", periods=10, freq="D"),
+    "timedelta": timedelta_range(start="1 day", periods=10, freq="D"),
+    "range": RangeIndex(10),
+    "int8": Index(np.arange(10), dtype="int8"),
+    "int16": Index(np.arange(10), dtype="int16"),
+    "int32": Index(np.arange(10), dtype="int32"),
+    "int64": Index(np.arange(10), dtype="int64"),
+    "uint8": Index(np.arange(10), dtype="uint8"),
+    "uint16": Index(np.arange(10), dtype="uint16"),
+    "uint32": Index(np.arange(10), dtype="uint32"),
+    "uint64": Index(np.arange(10), dtype="uint64"),
+    "float32": Index(np.arange(10), dtype="float32"),
+    "float64": Index(np.arange(10), dtype="float64"),
     "bool-object": Index([True, False] * 5, dtype=object),
     "bool-dtype": Index([True, False] * 5, dtype=bool),
     "complex64": Index(
-        np.arange(100, dtype="complex64") + 1.0j * np.arange(100, dtype="complex64")
+        np.arange(10, dtype="complex64") + 1.0j * np.arange(10, dtype="complex64")
     ),
     "complex128": Index(
-        np.arange(100, dtype="complex128") + 1.0j * np.arange(100, dtype="complex128")
+        np.arange(10, dtype="complex128") + 1.0j * np.arange(10, dtype="complex128")
     ),
-    "categorical": CategoricalIndex(list("abcd") * 25),
-    "interval": IntervalIndex.from_breaks(np.linspace(0, 100, num=101)),
+    "categorical": CategoricalIndex(list("abcd") * 2),
+    "interval": IntervalIndex.from_breaks(np.linspace(0, 100, num=11)),
     "empty": Index([]),
     "tuples": MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])),
     "mi-with-dt64tz-level": _create_mi_with_dt64tz_level(),
     "multi": _create_multiindex(),
     "repeats": Index([0, 0, 1, 1, 2, 2]),
-    "nullable_int": Index(np.arange(100), dtype="Int64"),
-    "nullable_uint": Index(np.arange(100), dtype="UInt16"),
-    "nullable_float": Index(np.arange(100), dtype="Float32"),
-    "nullable_bool": Index(np.arange(100).astype(bool), dtype="boolean"),
+    "nullable_int": Index(np.arange(10), dtype="Int64"),
+    "nullable_uint": Index(np.arange(10), dtype="UInt16"),
+    "nullable_float": Index(np.arange(10), dtype="Float32"),
+    "nullable_bool": Index(np.arange(10).astype(bool), dtype="boolean"),
     "string-python": Index(
-        pd.array([f"pandas_{i}" for i in range(100)], dtype="string[python]")
+        pd.array([f"pandas_{i}" for i in range(10)], dtype="string[python]")
     ),
 }
 if has_pyarrow:
-    idx = Index(pd.array([f"pandas_{i}" for i in range(100)], dtype="string[pyarrow]"))
+    idx = Index(pd.array([f"pandas_{i}" for i in range(10)], dtype="string[pyarrow]"))
     indices_dict["string-pyarrow"] = idx
 
 

diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py
@@ -17,9 +17,9 @@ def datetime_frame() -> DataFrame:
     Columns are ['A', 'B', 'C', 'D']
     """
     return DataFrame(
-        np.random.default_rng(2).standard_normal((100, 4)),
+        np.random.default_rng(2).standard_normal((10, 4)),
         columns=Index(list("ABCD"), dtype=object),
-        index=date_range("2000-01-01", periods=100, freq="B"),
+        index=date_range("2000-01-01", periods=10, freq="B"),
     )
 
 

diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
@@ -118,7 +118,7 @@ def test_setitem_list2(self):
 
     def test_getitem_boolean(self, mixed_float_frame, mixed_int_frame, datetime_frame):
         # boolean indexing
-        d = datetime_frame.index[10]
+        d = datetime_frame.index[len(datetime_frame) // 2]
         indexer = datetime_frame.index > d
         indexer_obj = indexer.astype(object)
 

diff --git a/pandas/tests/frame/methods/test_at_time.py b/pandas/tests/frame/methods/test_at_time.py
@@ -97,7 +97,7 @@ def test_at_time_raises(self, frame_or_series):
 
     def test_at_time_axis(self, axis):
         # issue 8839
-        rng = date_range("1/1/2000", "1/5/2000", freq="5min")
+        rng = date_range("1/1/2000", "1/2/2000", freq="5min")
         ts = DataFrame(np.random.default_rng(2).standard_normal((len(rng), len(rng))))
         ts.index, ts.columns = rng, rng
 

diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py
@@ -285,7 +285,7 @@ def test_corrwith(self, datetime_frame, dtype):
         b = datetime_frame.add(noise, axis=0)
 
         # make sure order does not matter
-        b = b.reindex(columns=b.columns[::-1], index=b.index[::-1][10:])
+        b = b.reindex(columns=b.columns[::-1], index=b.index[::-1][len(a) // 2 :])
         del b["B"]
 
         colcorr = a.corrwith(b, axis=0)
@@ -301,7 +301,7 @@ def test_corrwith(self, datetime_frame, dtype):
         dropped = a.corrwith(b, axis=1, drop=True)
         assert a.index[-1] not in dropped.index
 
-        # non time-series data
+    def test_corrwith_non_timeseries_data(self):
         index = ["a", "b", "c", "d", "e"]
         columns = ["one", "two", "three", "four"]
         df1 = DataFrame(

diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py
@@ -60,9 +60,6 @@ def test_fillna_datetime(self, datetime_frame):
 
         padded = datetime_frame.ffill()
         assert np.isnan(padded.loc[padded.index[:5], "A"]).all()
-        assert (
-            padded.loc[padded.index[-5:], "A"] == padded.loc[padded.index[-5], "A"]
-        ).all()
 
         msg = r"missing 1 required positional argument: 'value'"
         with pytest.raises(TypeError, match=msg):

diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py
@@ -33,7 +33,7 @@ def read_csv(self, path, **kwargs):
 
         return read_csv(path, **params)
 
-    def test_to_csv_from_csv1(self, temp_file, float_frame, datetime_frame):
+    def test_to_csv_from_csv1(self, temp_file, float_frame):
         path = str(temp_file)
         float_frame.iloc[:5, float_frame.columns.get_loc("A")] = np.nan
 
@@ -42,6 +42,8 @@ def test_to_csv_from_csv1(self, temp_file, float_frame, datetime_frame):
         float_frame.to_csv(path, header=False)
         float_frame.to_csv(path, index=False)
 
+    def test_to_csv_from_csv1_datetime(self, temp_file, datetime_frame):
+        path = str(temp_file)
         # test roundtrip
         # freq does not roundtrip
         datetime_frame.index = datetime_frame.index._with_freq(None)
@@ -59,7 +61,8 @@ def test_to_csv_from_csv1(self, temp_file, float_frame, datetime_frame):
         recons = self.read_csv(path, index_col=None, parse_dates=True)
         tm.assert_almost_equal(datetime_frame.values, recons.values)
 
-        # corner case
+    def test_to_csv_from_csv1_corner_case(self, temp_file):
+        path = str(temp_file)
         dm = DataFrame(
             {
                 "s1": Series(range(3), index=np.arange(3, dtype=np.int64)),
@@ -1167,9 +1170,16 @@ def test_to_csv_with_dst_transitions(self, td, temp_file):
         result.index = to_datetime(result.index, utc=True).tz_convert("Europe/London")
         tm.assert_frame_equal(result, df)
 
-    def test_to_csv_with_dst_transitions_with_pickle(self, temp_file):
+    @pytest.mark.parametrize(
+        "start,end",
+        [
+            ["2015-03-29", "2015-03-30"],
+            ["2015-10-25", "2015-10-26"],
+        ],
+    )
+    def test_to_csv_with_dst_transitions_with_pickle(self, start, end, temp_file):
         # GH11619
-        idx = date_range("2015-01-01", "2015-12-31", freq="h", tz="Europe/Paris")
+        idx = date_range(start, end, freq="h", tz="Europe/Paris")
         idx = idx._with_freq(None)  # freq does not round-trip
         idx._data._freq = None  # otherwise there is trouble on unpickle
         df = DataFrame({"values": 1, "idx": idx}, index=idx)

diff --git a/pandas/tests/frame/methods/test_truncate.py b/pandas/tests/frame/methods/test_truncate.py
@@ -60,7 +60,7 @@ def test_truncate(self, datetime_frame, frame_or_series):
         truncated = ts.truncate(before=ts.index[-1] + ts.index.freq)
         assert len(truncated) == 0
 
-        msg = "Truncate: 2000-01-06 00:00:00 must be after 2000-05-16 00:00:00"
+        msg = "Truncate: 2000-01-06 00:00:00 must be after 2000-01-11 00:00:00"
         with pytest.raises(ValueError, match=msg):
             ts.truncate(
                 before=ts.index[-1] - ts.index.freq, after=ts.index[0] + ts.index.freq

diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py
@@ -249,20 +249,19 @@ def f(dtype):
         with pytest.raises(ValueError, match=msg):
             f("M8[ns]")
 
-    def test_pickle(self, float_string_frame, timezone_frame):
-        empty_frame = DataFrame()
-
+    def test_pickle_float_string_frame(self, float_string_frame):
         unpickled = tm.round_trip_pickle(float_string_frame)
         tm.assert_frame_equal(float_string_frame, unpickled)
 
         # buglet
         float_string_frame._mgr.ndim
 
-        # empty
+    def test_pickle_empty(self):
+        empty_frame = DataFrame()
         unpickled = tm.round_trip_pickle(empty_frame)
         repr(unpickled)
 
-        # tz frame
+    def test_pickle_empty_tz_frame(self, timezone_frame):
         unpickled = tm.round_trip_pickle(timezone_frame)
         tm.assert_frame_equal(timezone_frame, unpickled)
 

diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
@@ -2280,7 +2280,7 @@ def test_check_dtype_empty_numeric_column(self, dtype):
     @pytest.mark.parametrize(
         "dtype", tm.STRING_DTYPES + tm.BYTES_DTYPES + tm.OBJECT_DTYPES
     )
-    def test_check_dtype_empty_string_column(self, request, dtype):
+    def test_check_dtype_empty_string_column(self, dtype):
         # GH24386: Ensure dtypes are set correctly for an empty DataFrame.
         # Empty DataFrame is generated via dictionary data with non-overlapping columns.
         data = DataFrame({"a": [1, 2]}, columns=["b"], dtype=dtype)