Skip to content

Commit cda84dd

Browse files
committed
[SPARK-52695][SQL] User Defined Type write support for xml file format
### What changes were proposed in this pull request? This PR adds UDT write support for the XML file format ### Why are the changes needed? IllegalArgumentException is being thrown while writing UDT values ### Does this PR introduce _any_ user-facing change? Yes, if the udt's sqlType is compatible with XML file format, it becomes writable ### How was this patch tested? new test ### Was this patch authored or co-authored using generative AI tooling? no Closes #51388 from yaooqinn/SPARK-52695. Authored-by: Kent Yao <yao@apache.org> Signed-off-by: Kent Yao <yao@apache.org>
1 parent 00cf5da commit cda84dd

File tree

2 files changed

+28
-2
lines changed

2 files changed

+28
-2
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,8 @@ class StaxXmlGenerator(
228228
writeChild(field.name, field.dataType, value)
229229
}
230230

231+
case (u: UserDefinedType[_], v) => writeElement(u.sqlType, v, options)
232+
231233
case (_, _) =>
232234
throw new SparkIllegalArgumentException(
233235
errorClass = "_LEGACY_ERROR_TEMP_3238",

sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ import java.io.{EOFException, File, FileOutputStream, StringWriter}
2020
import java.nio.charset.{StandardCharsets, UnsupportedCharsetException}
2121
import java.nio.file.{Files, Path, Paths}
2222
import java.sql.{Date, Timestamp}
23-
import java.time.{Instant, LocalDateTime}
23+
import java.time.{Instant, LocalDateTime, Year}
2424
import java.util.TimeZone
2525
import java.util.concurrent.ConcurrentHashMap
2626
import javax.xml.stream.{XMLOutputFactory, XMLStreamException}
@@ -38,7 +38,8 @@ import org.apache.hadoop.io.compress.{CompressionCodecFactory, GzipCodec}
3838

3939
import org.apache.spark.{DebugFilesystem, SparkException}
4040
import org.apache.spark.io.ZStdCompressionCodec
41-
import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, Encoders, QueryTest, Row, SaveMode}
41+
import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, Encoders, QueryTest, Row, SaveMode, YearUDT}
42+
import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.UDTEncoder
4243
import org.apache.spark.sql.catalyst.util._
4344
import org.apache.spark.sql.catalyst.util.TypeUtils.ordinalNumber
4445
import org.apache.spark.sql.catalyst.xml.{IndentingXMLStreamWriter, XmlOptions}
@@ -3490,6 +3491,29 @@ class XmlSuite
34903491
}
34913492
}
34923493
}
3494+
3495+
test("SPARK-52695: UDT write support for xml file format") {
3496+
val udt = new YearUDT()
3497+
val encoder = UDTEncoder(udt, classOf[YearUDT])
3498+
withTempDir { dir =>
3499+
val path = dir.getCanonicalPath
3500+
// Write a dataset of Year objects
3501+
val df1 = spark.range(2018, 2025).map(y => Year.of(y.toInt))(encoder)
3502+
3503+
df1
3504+
.write
3505+
.mode(SaveMode.Overwrite)
3506+
.option("rowTag", "ROW")
3507+
.xml(path)
3508+
3509+
val df = spark.read
3510+
.option("rowTag", "ROW")
3511+
.xml(path)
3512+
3513+
assert(df.schema === StructType(Seq(StructField("value", LongType))))
3514+
checkAnswer(df, spark.range(2018, 2025).toDF("value"))
3515+
}
3516+
}
34933517
}
34943518

34953519
// Mock file system that checks the number of open files

0 commit comments

Comments
 (0)