Skip to content

Commit c61f337

Browse files
authored
Merge pull request #262 from marklogic/feature/ignore-null-2
Defaulting to including null fields.
2 parents bf13ef7 + 919be7b commit c61f337

File tree

4 files changed

+18
-16
lines changed

4 files changed

+18
-16
lines changed

src/main/java/com/marklogic/spark/JsonRowSerializer.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ public String serializeRowToJson(InternalRow row) {
6161
*/
6262
private Map<String, String> buildOptionsForJsonOptions(Map<String, String> connectorProperties) {
6363
Map<String, String> options = new HashMap<>();
64+
// Default to include null fields, as they are easily queried in MarkLogic.
65+
options.put("ignoreNullFields", "false");
6466
connectorProperties.forEach((key, value) -> {
6567
if (key.startsWith(Options.WRITE_JSON_SERIALIZATION_OPTION_PREFIX)) {
6668
String optionName = key.substring(Options.WRITE_JSON_SERIALIZATION_OPTION_PREFIX.length());

src/test/java/com/marklogic/spark/writer/WriteNullValuesTest.java renamed to src/test/java/com/marklogic/spark/writer/IgnoreNullValuesTest.java

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
package com.marklogic.spark.writer;
55

66
import com.fasterxml.jackson.databind.JsonNode;
7-
import com.fasterxml.jackson.databind.node.JsonNodeType;
87
import com.marklogic.junit5.XmlNode;
98
import com.marklogic.spark.AbstractIntegrationTest;
109
import com.marklogic.spark.Options;
@@ -15,7 +14,7 @@
1514
import static org.junit.jupiter.api.Assertions.assertEquals;
1615
import static org.junit.jupiter.api.Assertions.assertFalse;
1716

18-
class WriteNullValuesTest extends AbstractIntegrationTest {
17+
class IgnoreNullValuesTest extends AbstractIntegrationTest {
1918

2019
@Test
2120
void jsonWithEmptyValues() {
@@ -29,21 +28,20 @@ void jsonWithEmptyValues() {
2928
.option(Options.CLIENT_URI, makeClientUri())
3029
.option(Options.WRITE_PERMISSIONS, DEFAULT_PERMISSIONS)
3130
.option(Options.WRITE_URI_TEMPLATE, "/a/{number}.json")
32-
.option(Options.WRITE_JSON_SERIALIZATION_OPTION_PREFIX + "ignoreNullFields", "false")
31+
.option(Options.WRITE_JSON_SERIALIZATION_OPTION_PREFIX + "ignoreNullFields", "true")
3332
.mode(SaveMode.Append)
3433
.save();
3534

3635
JsonNode doc = readJsonDocument("/a/1.json");
3736
assertEquals(1, doc.get("number").asInt());
3837
assertEquals("blue", doc.get("color").asText());
39-
assertEquals(JsonNodeType.NULL, doc.get("flag").getNodeType());
40-
assertEquals(3, doc.size(), "The file path column should not be included in the serialization.");
38+
assertEquals(2, doc.size(), "The flag column should not be included in the serialization.");
4139

4240
doc = readJsonDocument("/a/2.json");
4341
assertEquals(2, doc.get("number").asInt());
4442
assertEquals(" ", doc.get("color").asText(), "Verifies that whitespace is retained by default.");
4543
assertFalse(doc.get("flag").asBoolean());
46-
assertEquals(3, doc.size(), "The file path column should not be included in the serialization.");
44+
assertEquals(3, doc.size());
4745
}
4846

4947
@Test
@@ -58,14 +56,12 @@ void xmlWithEmptyValues() {
5856
.option(Options.WRITE_PERMISSIONS, DEFAULT_PERMISSIONS)
5957
.option(Options.WRITE_XML_ROOT_NAME, "test")
6058
.option(Options.WRITE_URI_TEMPLATE, "/a/{number}.xml")
61-
.option(Options.WRITE_JSON_SERIALIZATION_OPTION_PREFIX + "ignoreNullFields", "false")
59+
.option(Options.WRITE_JSON_SERIALIZATION_OPTION_PREFIX + "ignoreNullFields", "true")
6260
.mode(SaveMode.Append)
6361
.save();
6462

6563
XmlNode doc = readXmlDocument("/a/1.xml");
66-
doc.assertElementValue(
67-
"The empty flag column should be retained due to ignoreNullFields=true",
68-
"/test/flag", "");
64+
doc.assertElementMissing("The empty flag column should be ignored", "/test/flag");
6965
doc.assertElementValue("/test/number", "1");
7066
doc.assertElementValue("/test/color", "blue");
7167

@@ -87,7 +83,7 @@ void jsonLinesWithNestedFieldsConvertedToXml() {
8783
.option(Options.WRITE_PERMISSIONS, DEFAULT_PERMISSIONS)
8884
.option(Options.WRITE_XML_ROOT_NAME, "parent")
8985
.option(Options.WRITE_URI_TEMPLATE, "/a/{id}.xml")
90-
.option(Options.WRITE_JSON_SERIALIZATION_OPTION_PREFIX + "ignoreNullFields", "false")
86+
.option(Options.WRITE_JSON_SERIALIZATION_OPTION_PREFIX + "ignoreNullFields", "true")
9187
.mode(SaveMode.Append)
9288
.save();
9389

@@ -99,9 +95,8 @@ void jsonLinesWithNestedFieldsConvertedToXml() {
9995
doc.assertElementValue("/parent/id", "1");
10096

10197
doc = readXmlDocument("/a/2.xml");
102-
doc.assertElementValue(
103-
"'hello' is added even though it doesn't exist on the line. This is due to ignoreNullFields being false " +
104-
"and Spark adding 'hello' to the schema since it appears on the first line.",
105-
"/parent/hello", "");
98+
doc.assertElementMissing("'hello' should not appear. Spark JSON will actually include it in the schema and " +
99+
"give it a value of null. But with ignoreNullFields set to true, it should be discarded.",
100+
"/parent/hello");
106101
}
107102
}

src/test/java/com/marklogic/spark/writer/WriteRowsWithFilePathTest.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,9 @@ void test() {
4545

4646
JsonNode doc = readJsonDocument(uri);
4747
assertEquals(2, doc.size(), "The marklogic_spark_file_path column should not have been used when " +
48-
"constructing the JSON document.");
48+
"constructing the JSON document. This includes when ignoreNullFields is set to false. We still want " +
49+
"the column removed as the column is an implementation detail that should not be exposed to the user. " +
50+
"If we ever want the file path to be included in the document, we'll add an explicit feature for that.");
4951
assertTrue(doc.has("docNum"));
5052
assertTrue(doc.has("docName"));
5153
});

src/test/java/com/marklogic/spark/writer/WriteSparkJsonTest.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ void eachLineInJsonLinesFileBecomesADocument() {
3232
.option(Options.CLIENT_URI, makeClientUri())
3333
.option(Options.WRITE_PERMISSIONS, DEFAULT_PERMISSIONS)
3434
.option(Options.WRITE_URI_TEMPLATE, "/spark-json/{number}.json")
35+
.option(Options.WRITE_JSON_SERIALIZATION_OPTION_PREFIX + "ignoreNullFields", "true")
3536
.mode(SaveMode.Append)
3637
.save();
3738

@@ -58,6 +59,7 @@ void eachObjectInArrayBecomesADocument() {
5859
.option(Options.CLIENT_URI, makeClientUri())
5960
.option(Options.WRITE_PERMISSIONS, DEFAULT_PERMISSIONS)
6061
.option(Options.WRITE_URI_TEMPLATE, "/spark-json/{number}.json")
62+
.option(Options.WRITE_JSON_SERIALIZATION_OPTION_PREFIX + "ignoreNullFields", "true")
6163
.mode(SaveMode.Append)
6264
.save();
6365

@@ -80,6 +82,7 @@ void singleObjectFileAndArrayOfObjectsFile() {
8082
.option(Options.CLIENT_URI, makeClientUri())
8183
.option(Options.WRITE_PERMISSIONS, DEFAULT_PERMISSIONS)
8284
.option(Options.WRITE_URI_TEMPLATE, "/spark-json/{number}.json")
85+
.option(Options.WRITE_JSON_SERIALIZATION_OPTION_PREFIX + "ignoreNullFields", "true")
8386
.mode(SaveMode.Append)
8487
.save();
8588

0 commit comments

Comments
 (0)