From 3d0bd4b789bc76935f8bed9e5b62ac40b99e9d5f Mon Sep 17 00:00:00 2001 From: Emilie Faracci Date: Thu, 22 May 2025 12:04:30 -0700 Subject: [PATCH 1/4] [SPARK-52265][SQL][TEST] Fix regex leading to empty PROCESS_TABLES.testingVersions in HiveExternalCatalogVersionsSuite --- .../spark/sql/hive/HiveExternalCatalogVersionsSuite.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala index 684e4c53e4ef5..0600b7bcef2cb 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala @@ -262,13 +262,13 @@ object PROCESS_TABLES extends QueryTest with SQLTestUtils { val testingVersions: Seq[String] = if (isPythonVersionAvailable && SystemUtils.isJavaVersionAtMost(JavaVersion.JAVA_17)) { import scala.io.Source + val SparkVersionPattern = """""".r try Utils.tryWithResource( Source.fromURL(s"$releaseMirror/spark")) { source => source.mkString .split("\n") - .filter(_.contains("""""".r.findFirstMatchIn(_).get.group(1)) + .filter(SparkVersionPattern.unanchored.matches(_)) + .map(SparkVersionPattern.findFirstMatchIn(_).get.group(1)) .filter(_ < org.apache.spark.SPARK_VERSION) .filterNot(skipReleaseVersions.contains).toImmutableArraySeq } catch { From 0d088c8683693cda666e543d2fc9ddab40a78ec4 Mon Sep 17 00:00:00 2001 From: Emilie Faracci Date: Wed, 4 Jun 2025 10:43:24 -0700 Subject: [PATCH 2/4] Ensures fix is compatible with Spark 4.0 release --- .../spark/sql/hive/HiveExternalCatalogVersionsSuite.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala index 0600b7bcef2cb..1cb69d78582c8 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala @@ -96,7 +96,10 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { mirrors.distinct :+ "https://archive.apache.org/dist" :+ PROCESS_TABLES.releaseMirror logInfo(s"Trying to download Spark $version from $sites") for (site <- sites) { - val filename = s"spark-$version-bin-hadoop3-scala2.13.tgz" + val filename = version match { + case v if v.startsWith("3") => s"spark-$version-bin-hadoop3-scala2.13.tgz" + case _ => s"spark-$version-bin-hadoop3.tgz" + } val url = s"$site/spark/spark-$version/$filename" logInfo(s"Downloading Spark $version from $url") try { From ecb98352f283fd50d16bd0df5745f2cfc2addde8 Mon Sep 17 00:00:00 2001 From: Emilie Faracci Date: Wed, 4 Jun 2025 12:53:17 -0700 Subject: [PATCH 3/4] Change match logic to scala_version instead of filename --- .../spark/sql/hive/HiveExternalCatalogVersionsSuite.scala | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala index 1cb69d78582c8..5105551a3f517 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala @@ -96,10 +96,12 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { mirrors.distinct :+ "https://archive.apache.org/dist" :+ PROCESS_TABLES.releaseMirror logInfo(s"Trying to download Spark $version from $sites") for (site <- sites) { - val filename = version match { - case v if v.startsWith("3") => s"spark-$version-bin-hadoop3-scala2.13.tgz" - case _ => s"spark-$version-bin-hadoop3.tgz" + val scala_version = version match { + case v if v.startsWith("3.") => "-scala2.13" + case v if v.startsWith("4.") => "" + case _ => fail(s"Spark version $version is unexpected") } + val filename = s"spark-$version-bin-hadoop3$scala_version.tgz" val url = s"$site/spark/spark-$version/$filename" logInfo(s"Downloading Spark $version from $url") try { From 7fa00e10ec9e006f02824a8e14e6eb69ffba5c27 Mon Sep 17 00:00:00 2001 From: Emilie Faracci Date: Thu, 5 Jun 2025 09:59:24 -0700 Subject: [PATCH 4/4] Adjusting formatting to camelCase --- .../sql/hive/HiveExternalCatalogVersionsSuite.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala index 5105551a3f517..32f2c6d508d33 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala @@ -96,12 +96,12 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { mirrors.distinct :+ "https://archive.apache.org/dist" :+ PROCESS_TABLES.releaseMirror logInfo(s"Trying to download Spark $version from $sites") for (site <- sites) { - val scala_version = version match { + val scalaVersion = version match { case v if v.startsWith("3.") => "-scala2.13" case v if v.startsWith("4.") => "" case _ => fail(s"Spark version $version is unexpected") } - val filename = s"spark-$version-bin-hadoop3$scala_version.tgz" + val filename = s"spark-$version-bin-hadoop3$scalaVersion.tgz" val url = s"$site/spark/spark-$version/$filename" logInfo(s"Downloading Spark $version from $url") try { @@ -267,13 +267,13 @@ object PROCESS_TABLES extends QueryTest with SQLTestUtils { val testingVersions: Seq[String] = if (isPythonVersionAvailable && SystemUtils.isJavaVersionAtMost(JavaVersion.JAVA_17)) { import scala.io.Source - val SparkVersionPattern = """""".r + val sparkVersionPattern = """""".r try Utils.tryWithResource( Source.fromURL(s"$releaseMirror/spark")) { source => source.mkString .split("\n") - .filter(SparkVersionPattern.unanchored.matches(_)) - .map(SparkVersionPattern.findFirstMatchIn(_).get.group(1)) + .filter(sparkVersionPattern.unanchored.matches(_)) + .map(sparkVersionPattern.findFirstMatchIn(_).get.group(1)) .filter(_ < org.apache.spark.SPARK_VERSION) .filterNot(skipReleaseVersions.contains).toImmutableArraySeq } catch {