diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index f82658a1d2..2eebd0e42a 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -847,7 +847,7 @@ def add_files( import pyarrow.compute as pc expr = pc.field("file_path").isin(file_paths) - referenced_files = [file["file_path"] for file in self._table.inspect.files().filter(expr).to_pylist()] + referenced_files = [file["file_path"] for file in self._table.inspect.data_files().filter(expr).to_pylist()] if referenced_files: raise ValueError(f"Cannot add files that are already referenced by table, files: {', '.join(referenced_files)}") diff --git a/pyiceberg/table/inspect.py b/pyiceberg/table/inspect.py index cce5250ad5..3bb0268a05 100644 --- a/pyiceberg/table/inspect.py +++ b/pyiceberg/table/inspect.py @@ -650,11 +650,14 @@ def _files(self, snapshot_id: Optional[int] = None, data_file_filter: Optional[S snapshot = self._get_snapshot(snapshot_id) io = self.tbl.io - files_table: list[pa.Table] = [] - for manifest_list in snapshot.manifests(io): - files_table.append(self._get_files_from_manifest(manifest_list, data_file_filter)) - return pa.concat_tables(files_table) + executor = ExecutorFactory.get_or_create() + results = list( + executor.map( + lambda manifest_list: self._get_files_from_manifest(manifest_list, data_file_filter), snapshot.manifests(io) + ) + ) + return pa.concat_tables(results) def files(self, snapshot_id: Optional[int] = None) -> "pa.Table": return self._files(snapshot_id)