From c2a8aaede7b7d59a763f85b5271bbe14d5389be2 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 13 Oct 2025 19:43:43 -0700 Subject: [PATCH] [SPARK-53896][CORE] Enable `spark.io.compression.lzf.parallel.enabled` by default --- .../main/scala/org/apache/spark/internal/config/package.scala | 2 +- docs/configuration.md | 2 +- docs/core-migration-guide.md | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index d413d06ffc94f..94fe31e1cd8cb 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -2137,7 +2137,7 @@ package object config { .doc("When true, LZF compression will use multiple threads to compress data in parallel.") .version("4.0.0") .booleanConf - .createWithDefault(false) + .createWithDefault(true) private[spark] val IO_WARNING_LARGEFILETHRESHOLD = ConfigBuilder("spark.io.warning.largeFileThreshold") diff --git a/docs/configuration.md b/docs/configuration.md index 573b485f7e2df..b999a6ee25778 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1918,7 +1918,7 @@ Apart from these, the following properties are also available, and may be useful spark.io.compression.lzf.parallel.enabled - false + true When true, LZF compression will use multiple threads to compress data in parallel. diff --git a/docs/core-migration-guide.md b/docs/core-migration-guide.md index a738363ace1d9..19b77624d6260 100644 --- a/docs/core-migration-guide.md +++ b/docs/core-migration-guide.md @@ -29,6 +29,7 @@ license: | - Since Spark 4.1, Spark uses Apache Hadoop Magic Committer for all S3 buckets by default. To restore the behavior before Spark 4.0, you can set `spark.hadoop.fs.s3a.committer.magic.enabled=false`. - Since Spark 4.1, `java.lang.InternalError` encountered during file reading will no longer fail the task if the configuration `spark.sql.files.ignoreCorruptFiles` or the data source option `ignoreCorruptFiles` is set to `true`. - Since Spark 4.1, Spark ignores `*.blacklist.*` alternative configuration names. To restore the behavior before Spark 4.1, you can use the corresponding configuration names instead which exists since Spark 3.1.0. +- Since Spark 4.1, Spark will use multiple threads for LZF compression to compress data in parallel. To restore the behavior before Spark 4.1, you can set `spark.io.compression.lzf.parallel.enabled` to `false`. ## Upgrading from Core 3.5 to 4.0