Skip to content

Commit 6a6a081

Browse files
PetarVasiljevic-DBcloud-fan
authored andcommitted
[SPARK-52405] Extend V2JDBCTest to test multiple partition reading
### What changes were proposed in this pull request? This PR is just extending the existing V2JDBCTest that is used for testing different pushdowns for JDBC connectors. JDBC Options support `numPartitions`, `lowerBound`, `upperBound`, and `partitionColumn` options. The idea is to test reading data from JDBC data sources when these options are used. Using these options will disable some of the pushdowns, for example Offset with Limit or Sort with Limit. Other pushdowns shouldn't be regressed (like Limit or Aggregation) and these are all tested. ### Why are the changes needed? Testing if there is a correctness issue when using multiple partitions for reading the data from JDBC data sources. ### Does this PR introduce _any_ user-facing change? No, since this is test only PR. ### How was this patch tested? ### Was this patch authored or co-authored using generative AI tooling? Closes #51098 from PetarVasiljevic-DB/test_jdbc_parallel_read. Authored-by: Petar Vasiljevic <petar.vasiljevic@databricks.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
1 parent a9bfacb commit 6a6a081

File tree

8 files changed

+433
-124
lines changed

8 files changed

+433
-124
lines changed

connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,24 @@ import org.apache.spark.tags.DockerTest
3737
@DockerTest
3838
class DB2IntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
3939

40+
// Following tests are disabled for both single and multiple partition read
4041
override def excluded: Seq[String] = Seq(
41-
"scan with aggregate push-down: COVAR_POP with DISTINCT",
42-
"scan with aggregate push-down: COVAR_SAMP with DISTINCT",
43-
"scan with aggregate push-down: CORR with DISTINCT",
44-
"scan with aggregate push-down: CORR without DISTINCT",
45-
"scan with aggregate push-down: REGR_INTERCEPT with DISTINCT",
46-
"scan with aggregate push-down: REGR_SLOPE with DISTINCT",
47-
"scan with aggregate push-down: REGR_R2 with DISTINCT",
48-
"scan with aggregate push-down: REGR_SXY with DISTINCT")
42+
"scan with aggregate push-down: COVAR_POP with DISTINCT (false)",
43+
"scan with aggregate push-down: COVAR_POP with DISTINCT (true)",
44+
"scan with aggregate push-down: COVAR_SAMP with DISTINCT (false)",
45+
"scan with aggregate push-down: COVAR_SAMP with DISTINCT (true)",
46+
"scan with aggregate push-down: CORR with DISTINCT (false)",
47+
"scan with aggregate push-down: CORR with DISTINCT (true)",
48+
"scan with aggregate push-down: CORR without DISTINCT (false)",
49+
"scan with aggregate push-down: CORR without DISTINCT (true)",
50+
"scan with aggregate push-down: REGR_INTERCEPT with DISTINCT (false)",
51+
"scan with aggregate push-down: REGR_INTERCEPT with DISTINCT (true)",
52+
"scan with aggregate push-down: REGR_SLOPE with DISTINCT (false)",
53+
"scan with aggregate push-down: REGR_SLOPE with DISTINCT (true)",
54+
"scan with aggregate push-down: REGR_R2 with DISTINCT (false)",
55+
"scan with aggregate push-down: REGR_R2 with DISTINCT (true)",
56+
"scan with aggregate push-down: REGR_SXY with DISTINCT (false)",
57+
"scan with aggregate push-down: REGR_SXY with DISTINCT (true)")
4958

5059
override val catalogName: String = "db2"
5160
override val namespaceOpt: Option[String] = Some("DB2INST1")

connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala

Lines changed: 37 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -57,25 +57,44 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JD
5757
queryNode.rdd
5858
}
5959

60+
// Following tests are disabled for both single and multiple partition read
6061
override def excluded: Seq[String] = Seq(
61-
"simple scan with OFFSET",
62-
"simple scan with LIMIT and OFFSET",
63-
"simple scan with paging: top N and OFFSET",
64-
"scan with aggregate push-down: VAR_POP with DISTINCT",
65-
"scan with aggregate push-down: COVAR_POP with DISTINCT",
66-
"scan with aggregate push-down: COVAR_POP without DISTINCT",
67-
"scan with aggregate push-down: COVAR_SAMP with DISTINCT",
68-
"scan with aggregate push-down: COVAR_SAMP without DISTINCT",
69-
"scan with aggregate push-down: CORR with DISTINCT",
70-
"scan with aggregate push-down: CORR without DISTINCT",
71-
"scan with aggregate push-down: REGR_INTERCEPT with DISTINCT",
72-
"scan with aggregate push-down: REGR_INTERCEPT without DISTINCT",
73-
"scan with aggregate push-down: REGR_SLOPE with DISTINCT",
74-
"scan with aggregate push-down: REGR_SLOPE without DISTINCT",
75-
"scan with aggregate push-down: REGR_R2 with DISTINCT",
76-
"scan with aggregate push-down: REGR_R2 without DISTINCT",
77-
"scan with aggregate push-down: REGR_SXY with DISTINCT",
78-
"scan with aggregate push-down: REGR_SXY without DISTINCT")
62+
"simple scan with OFFSET (false)",
63+
"simple scan with OFFSET (true)",
64+
"simple scan with LIMIT and OFFSET (false)",
65+
"simple scan with LIMIT and OFFSET (true)",
66+
"simple scan with paging: top N and OFFSET (false)",
67+
"simple scan with paging: top N and OFFSET (true)",
68+
"scan with aggregate push-down: VAR_POP with DISTINCT (false)",
69+
"scan with aggregate push-down: VAR_POP with DISTINCT (true)",
70+
"scan with aggregate push-down: COVAR_POP with DISTINCT (false)",
71+
"scan with aggregate push-down: COVAR_POP with DISTINCT (true)",
72+
"scan with aggregate push-down: COVAR_POP without DISTINCT (false)",
73+
"scan with aggregate push-down: COVAR_POP without DISTINCT (true)",
74+
"scan with aggregate push-down: COVAR_SAMP with DISTINCT (false)",
75+
"scan with aggregate push-down: COVAR_SAMP with DISTINCT (true)",
76+
"scan with aggregate push-down: COVAR_SAMP without DISTINCT (false)",
77+
"scan with aggregate push-down: COVAR_SAMP without DISTINCT (true)",
78+
"scan with aggregate push-down: CORR with DISTINCT (false)",
79+
"scan with aggregate push-down: CORR with DISTINCT (true)",
80+
"scan with aggregate push-down: CORR without DISTINCT (false)",
81+
"scan with aggregate push-down: CORR without DISTINCT (true)",
82+
"scan with aggregate push-down: REGR_INTERCEPT with DISTINCT (false)",
83+
"scan with aggregate push-down: REGR_INTERCEPT with DISTINCT (true)",
84+
"scan with aggregate push-down: REGR_INTERCEPT without DISTINCT (false)",
85+
"scan with aggregate push-down: REGR_INTERCEPT without DISTINCT (true)",
86+
"scan with aggregate push-down: REGR_SLOPE with DISTINCT (false)",
87+
"scan with aggregate push-down: REGR_SLOPE with DISTINCT (true)",
88+
"scan with aggregate push-down: REGR_SLOPE without DISTINCT (false)",
89+
"scan with aggregate push-down: REGR_SLOPE without DISTINCT (true)",
90+
"scan with aggregate push-down: REGR_R2 with DISTINCT (false)",
91+
"scan with aggregate push-down: REGR_R2 with DISTINCT (true)",
92+
"scan with aggregate push-down: REGR_R2 without DISTINCT (false)",
93+
"scan with aggregate push-down: REGR_R2 without DISTINCT (true)",
94+
"scan with aggregate push-down: REGR_SXY with DISTINCT (false)",
95+
"scan with aggregate push-down: REGR_SXY with DISTINCT (true)",
96+
"scan with aggregate push-down: REGR_SXY without DISTINCT (false)",
97+
"scan with aggregate push-down: REGR_SXY without DISTINCT (true)")
7998

8099
override val catalogName: String = "mssql"
81100
override val db = new MsSQLServerDatabaseOnDocker

connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala

Lines changed: 37 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -36,25 +36,44 @@ import org.apache.spark.tags.DockerTest
3636
@DockerTest
3737
class MySQLIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
3838

39+
// Following tests are disabled for both single and multiple partition read
3940
override def excluded: Seq[String] = Seq(
40-
"scan with aggregate push-down: VAR_POP with DISTINCT",
41-
"scan with aggregate push-down: VAR_SAMP with DISTINCT",
42-
"scan with aggregate push-down: STDDEV_POP with DISTINCT",
43-
"scan with aggregate push-down: STDDEV_SAMP with DISTINCT",
44-
"scan with aggregate push-down: COVAR_POP with DISTINCT",
45-
"scan with aggregate push-down: COVAR_POP without DISTINCT",
46-
"scan with aggregate push-down: COVAR_SAMP with DISTINCT",
47-
"scan with aggregate push-down: COVAR_SAMP without DISTINCT",
48-
"scan with aggregate push-down: CORR with DISTINCT",
49-
"scan with aggregate push-down: CORR without DISTINCT",
50-
"scan with aggregate push-down: REGR_INTERCEPT with DISTINCT",
51-
"scan with aggregate push-down: REGR_INTERCEPT without DISTINCT",
52-
"scan with aggregate push-down: REGR_SLOPE with DISTINCT",
53-
"scan with aggregate push-down: REGR_SLOPE without DISTINCT",
54-
"scan with aggregate push-down: REGR_R2 with DISTINCT",
55-
"scan with aggregate push-down: REGR_R2 without DISTINCT",
56-
"scan with aggregate push-down: REGR_SXY with DISTINCT",
57-
"scan with aggregate push-down: REGR_SXY without DISTINCT")
41+
"scan with aggregate push-down: VAR_POP with DISTINCT (false)",
42+
"scan with aggregate push-down: VAR_POP with DISTINCT (true)",
43+
"scan with aggregate push-down: VAR_SAMP with DISTINCT (false)",
44+
"scan with aggregate push-down: VAR_SAMP with DISTINCT (true)",
45+
"scan with aggregate push-down: STDDEV_POP with DISTINCT (false)",
46+
"scan with aggregate push-down: STDDEV_POP with DISTINCT (true)",
47+
"scan with aggregate push-down: STDDEV_SAMP with DISTINCT (false)",
48+
"scan with aggregate push-down: STDDEV_SAMP with DISTINCT (true)",
49+
"scan with aggregate push-down: COVAR_POP with DISTINCT (false)",
50+
"scan with aggregate push-down: COVAR_POP with DISTINCT (true)",
51+
"scan with aggregate push-down: COVAR_POP without DISTINCT (false)",
52+
"scan with aggregate push-down: COVAR_POP without DISTINCT (true)",
53+
"scan with aggregate push-down: COVAR_SAMP with DISTINCT (false)",
54+
"scan with aggregate push-down: COVAR_SAMP with DISTINCT (true)",
55+
"scan with aggregate push-down: COVAR_SAMP without DISTINCT (false)",
56+
"scan with aggregate push-down: COVAR_SAMP without DISTINCT (true)",
57+
"scan with aggregate push-down: CORR with DISTINCT (false)",
58+
"scan with aggregate push-down: CORR with DISTINCT (true)",
59+
"scan with aggregate push-down: CORR without DISTINCT (false)",
60+
"scan with aggregate push-down: CORR without DISTINCT (true)",
61+
"scan with aggregate push-down: REGR_INTERCEPT with DISTINCT (false)",
62+
"scan with aggregate push-down: REGR_INTERCEPT with DISTINCT (true)",
63+
"scan with aggregate push-down: REGR_INTERCEPT without DISTINCT (false)",
64+
"scan with aggregate push-down: REGR_INTERCEPT without DISTINCT (true)",
65+
"scan with aggregate push-down: REGR_SLOPE with DISTINCT (false)",
66+
"scan with aggregate push-down: REGR_SLOPE with DISTINCT (true)",
67+
"scan with aggregate push-down: REGR_SLOPE without DISTINCT (false)",
68+
"scan with aggregate push-down: REGR_SLOPE without DISTINCT (true)",
69+
"scan with aggregate push-down: REGR_R2 with DISTINCT (false)",
70+
"scan with aggregate push-down: REGR_R2 with DISTINCT (true)",
71+
"scan with aggregate push-down: REGR_R2 without DISTINCT (false)",
72+
"scan with aggregate push-down: REGR_R2 without DISTINCT (true)",
73+
"scan with aggregate push-down: REGR_SXY with DISTINCT (false)",
74+
"scan with aggregate push-down: REGR_SXY with DISTINCT (true)",
75+
"scan with aggregate push-down: REGR_SXY without DISTINCT (false)",
76+
"scan with aggregate push-down: REGR_SXY without DISTINCT (true)")
5877

5978
override val catalogName: String = "mysql"
6079
override val db = new MySQLDatabaseOnDocker

connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -58,18 +58,30 @@ import org.apache.spark.tags.DockerTest
5858
@DockerTest
5959
class OracleIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
6060

61+
// Following tests are disabled for both single and multiple partition read
6162
override def excluded: Seq[String] = Seq(
62-
"scan with aggregate push-down: VAR_POP with DISTINCT",
63-
"scan with aggregate push-down: VAR_SAMP with DISTINCT",
64-
"scan with aggregate push-down: STDDEV_POP with DISTINCT",
65-
"scan with aggregate push-down: STDDEV_SAMP with DISTINCT",
66-
"scan with aggregate push-down: COVAR_POP with DISTINCT",
67-
"scan with aggregate push-down: COVAR_SAMP with DISTINCT",
68-
"scan with aggregate push-down: CORR with DISTINCT",
69-
"scan with aggregate push-down: REGR_INTERCEPT with DISTINCT",
70-
"scan with aggregate push-down: REGR_SLOPE with DISTINCT",
71-
"scan with aggregate push-down: REGR_R2 with DISTINCT",
72-
"scan with aggregate push-down: REGR_SXY with DISTINCT")
63+
"scan with aggregate push-down: VAR_POP with DISTINCT (false)",
64+
"scan with aggregate push-down: VAR_POP with DISTINCT (true)",
65+
"scan with aggregate push-down: VAR_SAMP with DISTINCT (false)",
66+
"scan with aggregate push-down: VAR_SAMP with DISTINCT (true)",
67+
"scan with aggregate push-down: STDDEV_POP with DISTINCT (false)",
68+
"scan with aggregate push-down: STDDEV_POP with DISTINCT (true)",
69+
"scan with aggregate push-down: STDDEV_SAMP with DISTINCT (false)",
70+
"scan with aggregate push-down: STDDEV_SAMP with DISTINCT (true)",
71+
"scan with aggregate push-down: COVAR_POP with DISTINCT (false)",
72+
"scan with aggregate push-down: COVAR_POP with DISTINCT (true)",
73+
"scan with aggregate push-down: COVAR_SAMP with DISTINCT (false)",
74+
"scan with aggregate push-down: COVAR_SAMP with DISTINCT (true)",
75+
"scan with aggregate push-down: CORR with DISTINCT (false)",
76+
"scan with aggregate push-down: CORR with DISTINCT (true)",
77+
"scan with aggregate push-down: REGR_INTERCEPT with DISTINCT (false)",
78+
"scan with aggregate push-down: REGR_INTERCEPT with DISTINCT (true)",
79+
"scan with aggregate push-down: REGR_SLOPE with DISTINCT (false)",
80+
"scan with aggregate push-down: REGR_SLOPE with DISTINCT (true)",
81+
"scan with aggregate push-down: REGR_R2 with DISTINCT (false)",
82+
"scan with aggregate push-down: REGR_R2 with DISTINCT (true)",
83+
"scan with aggregate push-down: REGR_SXY with DISTINCT (false)",
84+
"scan with aggregate push-down: REGR_SXY with DISTINCT (true)")
7385

7486
override val catalogName: String = "oracle"
7587
override val namespaceOpt: Option[String] = Some("SYSTEM")

0 commit comments

Comments
 (0)