Enrich GroupedHashAggregateStream name to ease debugging Resources exhausted errors (apache#16152)

ahmed-mez · web-flow · commit e305353ac636 · 2025-05-24T12:36:19.000+08:00
* Enrich GroupedHashAggregateStream name to ease debugging Resources exhausted errors

* Use human_display

* clippy
diff --git a/datafusion/core/tests/memory_limit/mod.rs b/datafusion/core/tests/memory_limit/mod.rs
@@ -408,6 +408,19 @@ async fn oom_with_tracked_consumer_pool() {
         .await
 }
 
+#[tokio::test]
+async fn oom_grouped_hash_aggregate() {
+    TestCase::new()
+        .with_query("SELECT COUNT(*), SUM(request_bytes) FROM t GROUP BY host")
+        .with_expected_errors(vec![
+            "Failed to allocate additional",
+            "GroupedHashAggregateStream[0] (count(1), sum(t.request_bytes))",
+        ])
+        .with_memory_limit(1_000)
+        .run()
+        .await
+}
+
 /// For regression case: if spilled `StringViewArray`'s buffer will be referenced by
 /// other batches which are also need to be spilled, then the spill writer will
 /// repeatedly write out the same buffer, and after reading back, each batch's size
diff --git a/datafusion/physical-plan/src/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs
@@ -529,7 +529,12 @@ impl GroupedHashAggregateStream {
             })
             .collect();
 
-        let name = format!("GroupedHashAggregateStream[{partition}]");
+        let agg_fn_names = aggregate_exprs
+            .iter()
+            .map(|expr| expr.human_display())
+            .collect::<Vec<_>>()
+            .join(", ");
+        let name = format!("GroupedHashAggregateStream[{partition}] ({agg_fn_names})");
         let reservation = MemoryConsumer::new(name)
             .with_can_spill(true)
             .register(context.memory_pool());