apache · DerGut · Jun 20, 2025 · Jun 26, 2025 · Jun 27, 2025 · Jun 27, 2025
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/crates/iceberg/Cargo.toml b/crates/iceberg/Cargo.toml
@@ -90,6 +90,7 @@ typed-builder = { workspace = true }
 url = { workspace = true }
 uuid = { workspace = true }
 zstd = { workspace = true }
+tracing = { workspace = true }
 
 [dev-dependencies]
 ctor = { workspace = true }

diff --git a/crates/iceberg/src/delete_file_index.rs b/crates/iceberg/src/delete_file_index.rs
@@ -21,9 +21,10 @@ use std::sync::{Arc, RwLock};
 
 use futures::StreamExt;
 use futures::channel::mpsc::{Sender, channel};
+use itertools::Itertools;
 use tokio::sync::Notify;
 
-use crate::runtime::spawn;
+use crate::runtime::{JoinHandle, spawn};
 use crate::scan::{DeleteFileContext, FileScanTaskDeleteFile};
 use crate::spec::{DataContentType, DataFile, Struct};
 
@@ -51,33 +52,52 @@ struct PopulatedDeleteFileIndex {
     // TODO: Deletion Vector support
 }
 
+#[derive(Debug)]
+pub(crate) struct DeleteIndexMetrics {
+    pub(crate) indexed_delete_files: u32,
+    pub(crate) equality_delete_files: u32,
+    pub(crate) positional_delete_files: u32,
+}
+
 impl DeleteFileIndex {
-    /// create a new `DeleteFileIndex` along with the sender that populates it with delete files
-    pub(crate) fn new() -> (DeleteFileIndex, Sender<DeleteFileContext>) {
+    /// Create a new `DeleteFileIndex` along with the sender that populates it
+    /// with delete files
+    ///
+    /// It will asynchronously wait for all delete files to come in before it
+    /// starts indexing.
+    pub(crate) fn new() -> (
+        DeleteFileIndex,
+        Sender<DeleteFileContext>,
+        JoinHandle<DeleteIndexMetrics>,
+    ) {
         // TODO: what should the channel limit be?
-        let (tx, rx) = channel(10);
+        let (delete_file_tx, delete_file_rx) = channel(10);
         let notify = Arc::new(Notify::new());
         let state = Arc::new(RwLock::new(DeleteFileIndexState::Populating(
             notify.clone(),
         )));
-        let delete_file_stream = rx.boxed();
+        let delete_file_stream = delete_file_rx.boxed();
 
-        spawn({
+        let metrics_handle = spawn({
             let state = state.clone();
             async move {
                 let delete_files = delete_file_stream.collect::<Vec<_>>().await;
 
                 let populated_delete_file_index = PopulatedDeleteFileIndex::new(delete_files);
 
+                let metrics = populated_delete_file_index.metrics();
+
                 {
                     let mut guard = state.write().unwrap();
                     *guard = DeleteFileIndexState::Populated(populated_delete_file_index);
                 }
                 notify.notify_waiters();
+
+                metrics
             }
         });
 
-        (DeleteFileIndex { state }, tx)
+        (DeleteFileIndex { state }, delete_file_tx, metrics_handle)
     }
 
     /// Gets all the delete files that apply to the specified data file.
@@ -207,4 +227,21 @@ impl PopulatedDeleteFileIndex {
 
         results
     }
+
+    fn metrics(&self) -> DeleteIndexMetrics {
+        // We count both partitioned and globally applied equality deletes.
+        let equality_delete_files =
+            flattened_len(&self.eq_deletes_by_partition) + self.global_deletes.len() as u32;
+        let positional_delete_files = flattened_len(&self.pos_deletes_by_partition);
+
+        DeleteIndexMetrics {
+            indexed_delete_files: equality_delete_files + positional_delete_files,
+            equality_delete_files,
+            positional_delete_files,
+        }
+    }
+}
+
+fn flattened_len(map: &HashMap<Struct, Vec<Arc<DeleteFileContext>>>) -> u32 {
+    map.values().flatten().try_len().unwrap_or(0) as u32
 }
diff --git a/crates/iceberg/src/lib.rs b/crates/iceberg/src/lib.rs
@@ -70,11 +70,11 @@ pub mod table;
 
 mod avro;
 pub mod cache;
-pub mod io;
-pub mod spec;
-
 pub mod inspect;
+pub mod io;
+pub mod metrics;
 pub mod scan;
+pub mod spec;
 
 pub mod expr;
 pub mod transaction;

diff --git a/crates/iceberg/src/metrics.rs b/crates/iceberg/src/metrics.rs
@@ -0,0 +1,154 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! This module contains the metrics reporting API for Iceberg.
+//!
+//! It is used to report table operations in a pluggable way. See the [docs]
+//! for more details.
+//!
+//! [docs] https://iceberg.apache.org/docs/latest/metrics-reporting
+
+use std::collections::HashMap;
+use std::fmt::Debug;
+use std::sync::Arc;
+use std::time::Duration;
+
+use async_trait::async_trait;
+use tracing::info;
+
+use crate::TableIdent;
+use crate::expr::Predicate;
+use crate::spec::SchemaId;
+
+/// This trait defines the API for reporting metrics of table operations.
+///
+/// Refer to the [Iceberg docs] for details.
+///
+/// [Iceberg docs]: https://iceberg.apache.org/docs/latest/metrics-reporting/
+#[async_trait]
+pub(crate) trait MetricsReporter: Debug + Send + Sync {
+    /// Indicates that an operation is done by reporting a MetricsReport.
+    ///
+    /// Any errors are expected to be handled internally.
+    async fn report(&self, report: MetricsReport);
+}
+
+/// An enum of all metrics reports.
+#[derive(Debug)]
+pub(crate) enum MetricsReport {
+    /// A Table Scan report that contains all relevant information from a Table Scan.
+    Scan {
+        table: TableIdent,
+        snapshot_id: i64,
+        schema_id: SchemaId,
+
+        /// If None, the scan is an unfiltered full table scan.
+        filter: Option<Arc<Predicate>>,
+
+        /// If None, the scan projects all fields.
+        // TODO: We could default to listing all field names in those cases: check what Java is doing.
+        projected_field_names: Option<Vec<String>>,
+        projected_field_ids: Arc<Vec<i32>>,
+
+        metrics: Arc<ScanMetrics>,
+        metadata: HashMap<String, String>,
+    },
+}
+
+/// Carries all metrics for a particular scan.
+#[derive(Debug)]
+pub(crate) struct ScanMetrics {
+    pub(crate) total_planning_duration: Duration,
+
+    // Manifest-level metrics, computed by walking the snapshot's manifest list
+    // file entries and checking which manifests match the scan's predicates.
+    pub(crate) total_data_manifests: u32,
+    pub(crate) total_delete_manifests: u32,
+    pub(crate) skipped_data_manifests: u32,
+    pub(crate) skipped_delete_manifests: u32,
+    pub(crate) scanned_data_manifests: u32,
+    pub(crate) scanned_delete_manifests: u32,
+
+    // Data file-level metrics.
+    pub(crate) result_data_files: u32,
+    pub(crate) skipped_data_files: u32,
+    pub(crate) total_file_size_in_bytes: u64,
+
+    // Delete file-level metrics.
+    pub(crate) result_delete_files: u32,
+    pub(crate) skipped_delete_files: u32,
+    pub(crate) total_delete_file_size_in_bytes: u64,
+
+    pub(crate) indexed_delete_files: u32,
+    pub(crate) equality_delete_files: u32,
+    pub(crate) positional_delete_files: u32,
+}
+
+/// A reporter that logs the metrics to the console.
+#[derive(Clone, Debug)]
+pub(crate) struct LoggingMetricsReporter {}
+
+impl LoggingMetricsReporter {
+    pub(crate) fn new() -> Self {
+        Self {}
+    }
+}
+
+#[async_trait]
+impl MetricsReporter for LoggingMetricsReporter {
+    async fn report(&self, report: MetricsReport) {
+        match report {
+            MetricsReport::Scan {
+                table,
+                snapshot_id,
+                schema_id,
+                filter,
+                projected_field_names,
+                projected_field_ids,
+                metrics,
+                metadata,
+            } => {
+                info!(
+                    table = %table,
+                    snapshot_id = snapshot_id,
+                    schema_id = schema_id,
+                    filter = ?filter,
+                    projected_field_names = ?projected_field_names,
+                    projected_field_ids = ?projected_field_ids,
+                    scan_metrics.total_planning_duration = ?metrics.total_planning_duration,
+                    scan_metrics.total_data_manifests = metrics.total_data_manifests,
+                    scan_metrics.total_delete_manifests = metrics.total_delete_manifests,
+                    scan_metrics.scanned_data_manifests = metrics.scanned_data_manifests,
+                    scan_metrics.scanned_delete_manifests = metrics.scanned_delete_manifests,
+                    scan_metrics.skipped_data_manifests = metrics.skipped_data_manifests,
+                    scan_metrics.skipped_delete_manifests = metrics.skipped_delete_manifests,
+                    scan_metrics.result_data_files = metrics.result_data_files,
+                    scan_metrics.result_delete_files = metrics.result_delete_files,
+                    scan_metrics.skipped_data_files = metrics.skipped_data_files,
+                    scan_metrics.skipped_delete_files = metrics.skipped_delete_files,
+                    scan_metrics.total_file_size_in_bytes = metrics.total_file_size_in_bytes,
+                    scan_metrics.total_delete_file_size_in_bytes = metrics.total_delete_file_size_in_bytes,
+                    scan_metrics.indexed_delete_files = metrics.indexed_delete_files,
+                    scan_metrics.equality_delete_files = metrics.equality_delete_files,
+                    scan_metrics.positional_delete_files = metrics.positional_delete_files,
+                    metadata = ?metadata,
+                    "Received metrics report"
+                );
+            }
+        }
+    }
+}
diff --git a/crates/iceberg/src/scan/context.rs b/crates/iceberg/src/scan/context.rs
@@ -23,6 +23,7 @@ use futures::{SinkExt, TryFutureExt};
 use crate::delete_file_index::DeleteFileIndex;
 use crate::expr::{Bind, BoundPredicate, Predicate};
 use crate::io::object_cache::ObjectCache;
+use crate::scan::metrics::ManifestMetrics;
 use crate::scan::{
     BoundPredicates, ExpressionEvaluatorCache, FileScanTask, ManifestEvaluatorCache,
     PartitionFilterCache,
@@ -186,16 +187,25 @@ impl PlanContext {
         tx_data: Sender<ManifestEntryContext>,
         delete_file_idx: DeleteFileIndex,
         delete_file_tx: Sender<ManifestEntryContext>,
-    ) -> Result<Box<impl Iterator<Item = Result<ManifestFileContext>> + 'static>> {
+    ) -> Result<(Vec<Result<ManifestFileContext>>, ManifestMetrics)> {
         let manifest_files = manifest_list.entries().iter();
 
-        // TODO: Ideally we could ditch this intermediate Vec as we return an iterator.
+        // TODO: Ideally we could ditch this intermediate Vec as we can return
+        // an iterator over the results. Updates to the manifest metrics somewhat
+        // complicate this because they need to be serialized somewhere, and an
+        // iterator can't easily take ownership of the metrics.
+        // A vec allows us to apply the mutations within this function.
+        // A vec also implicitly implements Send and Sync, meaning we can pass
+        // it around more easily in the concurrent planning step.
         let mut filtered_mfcs = vec![];
 
+        let mut metrics = ManifestMetrics::default();
         for manifest_file in manifest_files {
             let tx = if manifest_file.content == ManifestContentType::Deletes {
+                metrics.total_delete_manifests += 1;
                 delete_file_tx.clone()
             } else {
+                metrics.total_data_manifests += 1;
                 tx_data.clone()
             };
 
@@ -212,6 +222,10 @@ impl PlanContext {
                     )
                     .eval(manifest_file)?
                 {
+                    match manifest_file.content {
+                        ManifestContentType::Data => metrics.skipped_data_manifests += 1,
+                        ManifestContentType::Deletes => metrics.skipped_delete_manifests += 1,
+                    }
                     continue;
                 }
 
@@ -230,7 +244,14 @@ impl PlanContext {
             filtered_mfcs.push(Ok(mfc));
         }
 
-        Ok(Box::new(filtered_mfcs.into_iter()))
+        // They're not yet scanned, but will be scanned concurrently in the
+        // next processing step.
+        metrics.scanned_data_manifests =
+            metrics.total_data_manifests - metrics.skipped_data_manifests;
+        metrics.scanned_delete_manifests =
+            metrics.total_delete_manifests - metrics.skipped_delete_manifests;
+
+        Ok((filtered_mfcs, metrics))
     }
 
     fn create_manifest_file_context(