From f64275e4c9bb7556bfdf3f32fa256b974c4cdfa5 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com>
Date: Mon, 10 Mar 2025 12:06:53 -0400
Subject: [PATCH 1/4] BUG(string dtype): Empty sum produces incorrect result
 (#60936)

---
 pandas/core/arrays/base.py                    | 10 +++++++-
 pandas/tests/frame/test_reductions.py         | 10 ++++++++
 pandas/tests/groupby/test_reductions.py       | 14 +++++++++++
 pandas/tests/resample/test_base.py            | 25 +++++++++++++++++++
 .../tests/resample/test_resampler_grouper.py  | 20 +++++++++++++++
 5 files changed, 78 insertions(+), 1 deletion(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 62ca2a45fb941..ffcd6ebc4ae11 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -2386,7 +2386,15 @@ def _groupby_op(
             if op.how not in ["any", "all"]:
                 # Fail early to avoid conversion to object
                 op._get_cython_function(op.kind, op.how, np.dtype(object), False)
-            npvalues = self.to_numpy(object, na_value=np.nan)
+
+            arr = self
+            if op.how == "sum":
+                # https://github.com/pandas-dev/pandas/issues/60229
+                # All NA should result in the empty string.
+                assert "skipna" in kwargs
+                if kwargs["skipna"] and min_count == 0:
+                    arr = arr.fillna("")
+            npvalues = arr.to_numpy(object, na_value=np.nan)
         else:
             raise NotImplementedError(
                 f"function is not implemented for this dtype: {self.dtype}"
diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
index 1b2e55c978071..8b450cecfca00 100644
--- a/pandas/tests/frame/test_reductions.py
+++ b/pandas/tests/frame/test_reductions.py
@@ -846,6 +846,16 @@ def test_axis_1_empty(self, all_reductions, index):
         expected = Series([], index=index, dtype=expected_dtype)
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.parametrize("min_count", [0, 1])
+    def test_axis_1_sum_na(self, string_dtype_no_object, skipna, min_count):
+        # https://github.com/pandas-dev/pandas/issues/60229
+        dtype = string_dtype_no_object
+        df = DataFrame({"a": [pd.NA]}, dtype=dtype)
+        result = df.sum(axis=1, skipna=skipna, min_count=min_count)
+        value = "" if skipna and min_count == 0 else pd.NA
+        expected = Series([value], dtype=dtype)
+        tm.assert_series_equal(result, expected)
+
     @pytest.mark.parametrize("method, unit", [("sum", 0), ("prod", 1)])
     @pytest.mark.parametrize("numeric_only", [None, True, False])
     def test_sum_prod_nanops(self, method, unit, numeric_only):
diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py
index 599b0aabf85d5..8701b8d86651e 100644
--- a/pandas/tests/groupby/test_reductions.py
+++ b/pandas/tests/groupby/test_reductions.py
@@ -710,6 +710,20 @@ def test_min_empty_string_dtype(func, string_dtype_no_object):
     tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.parametrize("min_count", [0, 1])
+def test_string_dtype_empty_sum(string_dtype_no_object, skipna, min_count):
+    # https://github.com/pandas-dev/pandas/issues/60229
+    dtype = string_dtype_no_object
+    df = DataFrame({"a": ["x"], "b": [pd.NA]}, dtype=dtype)
+    gb = df.groupby("a")
+    result = gb.sum(skipna=skipna, min_count=min_count)
+    value = "" if skipna and min_count == 0 else pd.NA
+    expected = DataFrame(
+        {"b": value}, index=pd.Index(["x"], name="a", dtype=dtype), dtype=dtype
+    )
+    tm.assert_frame_equal(result, expected)
+
+
 def test_max_nan_bug():
     df = DataFrame(
         {
diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py
index dcf6c6099abab..27d87729e2075 100644
--- a/pandas/tests/resample/test_base.py
+++ b/pandas/tests/resample/test_base.py
@@ -150,6 +150,31 @@ def test_resample_empty_series(freq, empty_series_dti, resample_method):
     assert result.index.freq == expected.index.freq
 
 
+@pytest.mark.parametrize("min_count", [0, 1])
+def test_resample_empty_sum_string(string_dtype_no_object, min_count):
+    # https://github.com/pandas-dev/pandas/issues/60229
+    dtype = string_dtype_no_object
+    ser = Series(
+        pd.NA,
+        index=DatetimeIndex(
+            [
+                "2000-01-01 00:00:00",
+                "2000-01-01 00:00:10",
+                "2000-01-01 00:00:20",
+                "2000-01-01 00:00:30",
+            ]
+        ),
+        dtype=dtype,
+    )
+    rs = ser.resample("20s")
+    result = rs.sum(min_count=min_count)
+
+    value = "" if min_count == 0 else pd.NA
+    index = date_range(start="2000-01-01", freq="20s", periods=2, unit="s")
+    expected = Series(value, index=index, dtype=dtype)
+    tm.assert_series_equal(result, expected)
+
+
 @all_ts
 @pytest.mark.parametrize(
     "freq",
diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py
index e2d456fea2b23..278a19bdc8348 100644
--- a/pandas/tests/resample/test_resampler_grouper.py
+++ b/pandas/tests/resample/test_resampler_grouper.py
@@ -526,6 +526,26 @@ def test_resample_groupby_agg_object_dtype_all_nan(consolidate):
     tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.parametrize("min_count", [0, 1])
+def test_groupby_resample_empty_sum_string(
+    string_dtype_no_object, test_frame, min_count
+):
+    # https://github.com/pandas-dev/pandas/issues/60229
+    dtype = string_dtype_no_object
+    test_frame = test_frame.assign(B=pd.array([pd.NA] * len(test_frame), dtype=dtype))
+    gbrs = test_frame.groupby("A").resample("40s")
+    result = gbrs.sum(min_count=min_count)
+
+    index = pd.MultiIndex(
+        levels=[[1, 2, 3], [pd.to_datetime("2000-01-01", unit="ns")]],
+        codes=[[0, 1, 2], [0, 0, 0]],
+        names=["A", None],
+    )
+    value = "" if min_count == 0 else pd.NA
+    expected = DataFrame({"B": value}, index=index, dtype=dtype)
+    tm.assert_frame_equal(result, expected)
+
+
 def test_groupby_resample_with_list_of_keys():
     # GH 47362
     df = DataFrame(

From e2f988dc1d895b8c0116ae757541c940f67e5d37 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 11 Jun 2025 12:01:39 +0200
Subject: [PATCH 2/4] skipna keyword not yet available in groupby for 2.3

---
 pandas/core/arrays/base.py              | 3 +--
 pandas/tests/groupby/test_reductions.py | 6 +++---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index ffcd6ebc4ae11..28a95ce1784a2 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -2391,8 +2391,7 @@ def _groupby_op(
             if op.how == "sum":
                 # https://github.com/pandas-dev/pandas/issues/60229
                 # All NA should result in the empty string.
-                assert "skipna" in kwargs
-                if kwargs["skipna"] and min_count == 0:
+                if min_count == 0:
                     arr = arr.fillna("")
             npvalues = arr.to_numpy(object, na_value=np.nan)
         else:
diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py
index 8701b8d86651e..896cd223c3ff3 100644
--- a/pandas/tests/groupby/test_reductions.py
+++ b/pandas/tests/groupby/test_reductions.py
@@ -711,13 +711,13 @@ def test_min_empty_string_dtype(func, string_dtype_no_object):
 
 
 @pytest.mark.parametrize("min_count", [0, 1])
-def test_string_dtype_empty_sum(string_dtype_no_object, skipna, min_count):
+def test_string_dtype_empty_sum(string_dtype_no_object, min_count):
     # https://github.com/pandas-dev/pandas/issues/60229
     dtype = string_dtype_no_object
     df = DataFrame({"a": ["x"], "b": [pd.NA]}, dtype=dtype)
     gb = df.groupby("a")
-    result = gb.sum(skipna=skipna, min_count=min_count)
-    value = "" if skipna and min_count == 0 else pd.NA
+    result = gb.sum(min_count=min_count)
+    value = "" if min_count == 0 else pd.NA
     expected = DataFrame(
         {"b": value}, index=pd.Index(["x"], name="a", dtype=dtype), dtype=dtype
     )

From f3f54114e25dca12978307a06d56e8920056348e Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 11 Jun 2025 15:37:41 +0200
Subject: [PATCH 3/4] avoid depr warning about include_groups

---
 pandas/tests/resample/test_resampler_grouper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py
index 278a19bdc8348..3eba4b4f23bd4 100644
--- a/pandas/tests/resample/test_resampler_grouper.py
+++ b/pandas/tests/resample/test_resampler_grouper.py
@@ -533,7 +533,7 @@ def test_groupby_resample_empty_sum_string(
     # https://github.com/pandas-dev/pandas/issues/60229
     dtype = string_dtype_no_object
     test_frame = test_frame.assign(B=pd.array([pd.NA] * len(test_frame), dtype=dtype))
-    gbrs = test_frame.groupby("A").resample("40s")
+    gbrs = test_frame.groupby("A").resample("40s", include_groups=False)
     result = gbrs.sum(min_count=min_count)
 
     index = pd.MultiIndex(

From 914d062c9f906ed7c116df86140fd4d8062c2d80 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 11 Jun 2025 15:38:48 +0200
Subject: [PATCH 4/4] changed default resolution

---
 pandas/tests/resample/test_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py
index 27d87729e2075..1d9e9124db2b0 100644
--- a/pandas/tests/resample/test_base.py
+++ b/pandas/tests/resample/test_base.py
@@ -170,7 +170,7 @@ def test_resample_empty_sum_string(string_dtype_no_object, min_count):
     result = rs.sum(min_count=min_count)
 
     value = "" if min_count == 0 else pd.NA
-    index = date_range(start="2000-01-01", freq="20s", periods=2, unit="s")
+    index = date_range(start="2000-01-01", freq="20s", periods=2)
     expected = Series(value, index=index, dtype=dtype)
     tm.assert_series_equal(result, expected)