Skip to content

Commit 919be7b

Browse files
committed
Defaulting to including null fields.
This matches MLCP behavior for delimited files. And null fields are easy to query on and thus useful. And a user can easily drop them as well if desired.
1 parent bf13ef7 commit 919be7b

File tree

4 files changed

+18
-16
lines changed

4 files changed

+18
-16
lines changed

src/main/java/com/marklogic/spark/JsonRowSerializer.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ public String serializeRowToJson(InternalRow row) {
6161
*/
6262
private Map<String, String> buildOptionsForJsonOptions(Map<String, String> connectorProperties) {
6363
Map<String, String> options = new HashMap<>();
64+
// Default to include null fields, as they are easily queried in MarkLogic.
65+
options.put("ignoreNullFields", "false");
6466
connectorProperties.forEach((key, value) -> {
6567
if (key.startsWith(Options.WRITE_JSON_SERIALIZATION_OPTION_PREFIX)) {
6668
String optionName = key.substring(Options.WRITE_JSON_SERIALIZATION_OPTION_PREFIX.length());

src/test/java/com/marklogic/spark/writer/WriteNullValuesTest.java renamed to src/test/java/com/marklogic/spark/writer/IgnoreNullValuesTest.java

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
package com.marklogic.spark.writer;
55

66
import com.fasterxml.jackson.databind.JsonNode;
7-
import com.fasterxml.jackson.databind.node.JsonNodeType;
87
import com.marklogic.junit5.XmlNode;
98
import com.marklogic.spark.AbstractIntegrationTest;
109
import com.marklogic.spark.Options;
@@ -15,7 +14,7 @@
1514
import static org.junit.jupiter.api.Assertions.assertEquals;
1615
import static org.junit.jupiter.api.Assertions.assertFalse;
1716

18-
class WriteNullValuesTest extends AbstractIntegrationTest {
17+
class IgnoreNullValuesTest extends AbstractIntegrationTest {
1918

2019
@Test
2120
void jsonWithEmptyValues() {
@@ -29,21 +28,20 @@ void jsonWithEmptyValues() {
2928
.option(Options.CLIENT_URI, makeClientUri())
3029
.option(Options.WRITE_PERMISSIONS, DEFAULT_PERMISSIONS)
3130
.option(Options.WRITE_URI_TEMPLATE, "/a/{number}.json")
32-
.option(Options.WRITE_JSON_SERIALIZATION_OPTION_PREFIX + "ignoreNullFields", "false")
31+
.option(Options.WRITE_JSON_SERIALIZATION_OPTION_PREFIX + "ignoreNullFields", "true")
3332
.mode(SaveMode.Append)
3433
.save();
3534

3635
JsonNode doc = readJsonDocument("/a/1.json");
3736
assertEquals(1, doc.get("number").asInt());
3837
assertEquals("blue", doc.get("color").asText());
39-
assertEquals(JsonNodeType.NULL, doc.get("flag").getNodeType());
40-
assertEquals(3, doc.size(), "The file path column should not be included in the serialization.");
38+
assertEquals(2, doc.size(), "The flag column should not be included in the serialization.");
4139

4240
doc = readJsonDocument("/a/2.json");
4341
assertEquals(2, doc.get("number").asInt());
4442
assertEquals(" ", doc.get("color").asText(), "Verifies that whitespace is retained by default.");
4543
assertFalse(doc.get("flag").asBoolean());
46-
assertEquals(3, doc.size(), "The file path column should not be included in the serialization.");
44+
assertEquals(3, doc.size());
4745
}
4846

4947
@Test
@@ -58,14 +56,12 @@ void xmlWithEmptyValues() {
5856
.option(Options.WRITE_PERMISSIONS, DEFAULT_PERMISSIONS)
5957
.option(Options.WRITE_XML_ROOT_NAME, "test")
6058
.option(Options.WRITE_URI_TEMPLATE, "/a/{number}.xml")
61-
.option(Options.WRITE_JSON_SERIALIZATION_OPTION_PREFIX + "ignoreNullFields", "false")
59+
.option(Options.WRITE_JSON_SERIALIZATION_OPTION_PREFIX + "ignoreNullFields", "true")
6260
.mode(SaveMode.Append)
6361
.save();
6462

6563
XmlNode doc = readXmlDocument("/a/1.xml");
66-
doc.assertElementValue(
67-
"The empty flag column should be retained due to ignoreNullFields=true",
68-
"/test/flag", "");
64+
doc.assertElementMissing("The empty flag column should be ignored", "/test/flag");
6965
doc.assertElementValue("/test/number", "1");
7066
doc.assertElementValue("/test/color", "blue");
7167

@@ -87,7 +83,7 @@ void jsonLinesWithNestedFieldsConvertedToXml() {
8783
.option(Options.WRITE_PERMISSIONS, DEFAULT_PERMISSIONS)
8884
.option(Options.WRITE_XML_ROOT_NAME, "parent")
8985
.option(Options.WRITE_URI_TEMPLATE, "/a/{id}.xml")
90-
.option(Options.WRITE_JSON_SERIALIZATION_OPTION_PREFIX + "ignoreNullFields", "false")
86+
.option(Options.WRITE_JSON_SERIALIZATION_OPTION_PREFIX + "ignoreNullFields", "true")
9187
.mode(SaveMode.Append)
9288
.save();
9389

@@ -99,9 +95,8 @@ void jsonLinesWithNestedFieldsConvertedToXml() {
9995
doc.assertElementValue("/parent/id", "1");
10096

10197
doc = readXmlDocument("/a/2.xml");
102-
doc.assertElementValue(
103-
"'hello' is added even though it doesn't exist on the line. This is due to ignoreNullFields being false " +
104-
"and Spark adding 'hello' to the schema since it appears on the first line.",
105-
"/parent/hello", "");
98+
doc.assertElementMissing("'hello' should not appear. Spark JSON will actually include it in the schema and " +
99+
"give it a value of null. But with ignoreNullFields set to true, it should be discarded.",
100+
"/parent/hello");
106101
}
107102
}

src/test/java/com/marklogic/spark/writer/WriteRowsWithFilePathTest.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,9 @@ void test() {
4545

4646
JsonNode doc = readJsonDocument(uri);
4747
assertEquals(2, doc.size(), "The marklogic_spark_file_path column should not have been used when " +
48-
"constructing the JSON document.");
48+
"constructing the JSON document. This includes when ignoreNullFields is set to false. We still want " +
49+
"the column removed as the column is an implementation detail that should not be exposed to the user. " +
50+
"If we ever want the file path to be included in the document, we'll add an explicit feature for that.");
4951
assertTrue(doc.has("docNum"));
5052
assertTrue(doc.has("docName"));
5153
});

src/test/java/com/marklogic/spark/writer/WriteSparkJsonTest.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ void eachLineInJsonLinesFileBecomesADocument() {
3232
.option(Options.CLIENT_URI, makeClientUri())
3333
.option(Options.WRITE_PERMISSIONS, DEFAULT_PERMISSIONS)
3434
.option(Options.WRITE_URI_TEMPLATE, "/spark-json/{number}.json")
35+
.option(Options.WRITE_JSON_SERIALIZATION_OPTION_PREFIX + "ignoreNullFields", "true")
3536
.mode(SaveMode.Append)
3637
.save();
3738

@@ -58,6 +59,7 @@ void eachObjectInArrayBecomesADocument() {
5859
.option(Options.CLIENT_URI, makeClientUri())
5960
.option(Options.WRITE_PERMISSIONS, DEFAULT_PERMISSIONS)
6061
.option(Options.WRITE_URI_TEMPLATE, "/spark-json/{number}.json")
62+
.option(Options.WRITE_JSON_SERIALIZATION_OPTION_PREFIX + "ignoreNullFields", "true")
6163
.mode(SaveMode.Append)
6264
.save();
6365

@@ -80,6 +82,7 @@ void singleObjectFileAndArrayOfObjectsFile() {
8082
.option(Options.CLIENT_URI, makeClientUri())
8183
.option(Options.WRITE_PERMISSIONS, DEFAULT_PERMISSIONS)
8284
.option(Options.WRITE_URI_TEMPLATE, "/spark-json/{number}.json")
85+
.option(Options.WRITE_JSON_SERIALIZATION_OPTION_PREFIX + "ignoreNullFields", "true")
8386
.mode(SaveMode.Append)
8487
.save();
8588

0 commit comments

Comments
 (0)