1
1
package com .marklogic .spark .writer ;
2
2
3
3
import com .fasterxml .jackson .core .JsonProcessingException ;
4
+ import com .fasterxml .jackson .databind .JsonNode ;
4
5
import com .fasterxml .jackson .databind .ObjectMapper ;
5
6
import com .fasterxml .jackson .databind .node .ObjectNode ;
7
+ import com .fasterxml .jackson .dataformat .xml .XmlMapper ;
6
8
import com .marklogic .client .io .Format ;
7
9
import com .marklogic .client .io .JacksonHandle ;
8
10
import com .marklogic .client .io .StringHandle ;
9
11
import com .marklogic .client .io .marker .AbstractWriteHandle ;
10
12
import com .marklogic .spark .ConnectorException ;
13
+ import com .marklogic .spark .JsonRowSerializer ;
11
14
import com .marklogic .spark .Options ;
12
- import com .marklogic .spark .Util ;
13
15
import org .apache .spark .sql .catalyst .InternalRow ;
14
- import org .apache .spark .sql .catalyst .json .JacksonGenerator ;
15
16
import org .apache .spark .sql .types .StructField ;
16
17
import org .apache .spark .sql .types .StructType ;
17
- import org .json .JSONObject ;
18
- import org .json .XML ;
19
18
20
- import java .io .StringWriter ;
21
19
import java .util .ArrayList ;
22
20
import java .util .List ;
23
21
import java .util .Optional ;
@@ -32,24 +30,23 @@ class ArbitraryRowConverter implements RowConverter {
32
30
private static final String MARKLOGIC_SPARK_FILE_PATH_COLUMN_NAME = "marklogic_spark_file_path" ;
33
31
34
32
private final ObjectMapper objectMapper ;
35
-
36
- private final StructType schema ;
33
+ private final XmlMapper xmlMapper ;
34
+ private final JsonRowSerializer jsonRowSerializer ;
37
35
private final String uriTemplate ;
38
36
private final String jsonRootName ;
39
37
private final String xmlRootName ;
40
38
private final String xmlNamespace ;
41
-
42
39
private final int filePathIndex ;
43
40
44
41
ArbitraryRowConverter (WriteContext writeContext ) {
45
- this .schema = writeContext .getSchema ();
46
- this .filePathIndex = determineFilePathIndex ();
47
-
42
+ this .filePathIndex = determineFilePathIndex (writeContext .getSchema ());
48
43
this .uriTemplate = writeContext .getStringOption (Options .WRITE_URI_TEMPLATE );
49
44
this .jsonRootName = writeContext .getStringOption (Options .WRITE_JSON_ROOT_NAME );
50
45
this .xmlRootName = writeContext .getStringOption (Options .WRITE_XML_ROOT_NAME );
51
46
this .xmlNamespace = writeContext .getStringOption (Options .WRITE_XML_NAMESPACE );
52
47
this .objectMapper = new ObjectMapper ();
48
+ this .xmlMapper = this .xmlRootName != null ? new XmlMapper () : null ;
49
+ this .jsonRowSerializer = new JsonRowSerializer (writeContext .getSchema (), writeContext .getProperties ());
53
50
}
54
51
55
52
@ Override
@@ -60,26 +57,49 @@ public Optional<DocBuilder.DocumentInputs> convertRow(InternalRow row) {
60
57
row .setNullAt (this .filePathIndex );
61
58
}
62
59
63
- final String json = convertRowToJSONString (row );
64
- AbstractWriteHandle contentHandle = this .xmlRootName != null ?
65
- new StringHandle (convertJsonToXml (json )).withFormat (Format .XML ) :
66
- new StringHandle (json ).withFormat (Format .JSON );
60
+ final String json = this .jsonRowSerializer .serializeRowToJson (row );
67
61
62
+ AbstractWriteHandle contentHandle = null ;
63
+ ObjectNode deserializedJson = null ;
68
64
ObjectNode uriTemplateValues = null ;
69
- if (this .uriTemplate != null || this .jsonRootName != null ) {
70
- ObjectNode jsonObject = readTree (json );
65
+ final boolean mustRemoveFilePathField = this .filePathIndex > 1 && jsonRowSerializer .isIncludeNullFields ();
66
+
67
+ if (this .jsonRootName != null || this .xmlRootName != null || this .uriTemplate != null || mustRemoveFilePathField ) {
68
+ deserializedJson = readTree (json );
69
+ if (mustRemoveFilePathField ) {
70
+ deserializedJson .remove (MARKLOGIC_SPARK_FILE_PATH_COLUMN_NAME );
71
+ }
72
+ }
73
+
74
+ if (this .uriTemplate != null ) {
75
+ uriTemplateValues = deserializedJson ;
76
+ }
77
+
78
+ if (this .jsonRootName != null ) {
79
+ ObjectNode jsonObjectWithRootName = objectMapper .createObjectNode ();
80
+ jsonObjectWithRootName .set (jsonRootName , deserializedJson );
81
+ contentHandle = new JacksonHandle (jsonObjectWithRootName );
71
82
if (this .uriTemplate != null ) {
72
- uriTemplateValues = jsonObject ;
83
+ uriTemplateValues = jsonObjectWithRootName ;
73
84
}
74
- if (this .jsonRootName != null ) {
75
- ObjectNode root = objectMapper .createObjectNode ();
76
- root .set (jsonRootName , jsonObject );
77
- contentHandle = new JacksonHandle (root );
78
- if (this .uriTemplate != null ) {
79
- uriTemplateValues = root ;
80
- }
85
+ }
86
+
87
+ if (contentHandle == null ) {
88
+ // If the user wants XML, then we've definitely deserialized the JSON and removed the file path if
89
+ // needed. So use that JsonNode to produce an XML string.
90
+ if (xmlRootName != null ) {
91
+ contentHandle = new StringHandle (convertJsonToXml (deserializedJson )).withFormat (Format .XML );
92
+ }
93
+ // If we've already gone to the effort of creating deserializedJson, use it for the content.
94
+ else if (deserializedJson != null ) {
95
+ contentHandle = new JacksonHandle (deserializedJson );
96
+ } else {
97
+ // Simplest scenario where we never have a reason to incur the expense of deserializing the JSON string,
98
+ // so we can just use StringHandle.
99
+ contentHandle = new StringHandle (json ).withFormat (Format .JSON );
81
100
}
82
101
}
102
+
83
103
return Optional .of (new DocBuilder .DocumentInputs (initialUri , contentHandle , uriTemplateValues , null ));
84
104
}
85
105
@@ -98,7 +118,7 @@ public List<DocBuilder.DocumentInputs> getRemainingDocumentInputs() {
98
118
*
99
119
* @return
100
120
*/
101
- private int determineFilePathIndex () {
121
+ private int determineFilePathIndex (StructType schema ) {
102
122
StructField [] fields = schema .fields ();
103
123
for (int i = 0 ; i < fields .length ; i ++) {
104
124
if (MARKLOGIC_SPARK_FILE_PATH_COLUMN_NAME .equals (fields [i ].name ())) {
@@ -118,33 +138,29 @@ private ObjectNode readTree(String json) {
118
138
}
119
139
}
120
140
121
- private String convertRowToJSONString (InternalRow row ) {
122
- StringWriter writer = new StringWriter ();
123
- JacksonGenerator jacksonGenerator = new JacksonGenerator (this .schema , writer , Util .DEFAULT_JSON_OPTIONS );
124
- jacksonGenerator .write (row );
125
- jacksonGenerator .flush ();
126
- return writer .toString ();
127
- }
128
-
129
141
/**
130
142
* jackson-xml-mapper unfortunately does not yet support a root namespace. Nor does it allow for the root element
131
143
* to be omitted. So we always end up with "ObjectNode" as a root element. See
132
- * https://github.com/FasterXML/jackson-dataformat-xml/issues/541 for more information.
133
- * <p>
134
- * While JSON-Java does not support a root namespace, it does allow for the root element to be omitted. That is
135
- * sufficient for us, as we can then generate our own root element - albeit via string concatentation - that
136
- * includes a user-defined namespace.
144
+ * https://github.com/FasterXML/jackson-dataformat-xml/issues/541 for more information. So this method does some
145
+ * work to replace that root element with one based on user inputs.
137
146
*
138
- * @param json
147
+ * @param doc
139
148
* @return
140
149
*/
141
- private String convertJsonToXml (String json ) {
142
- JSONObject jsonObject = new JSONObject (json );
143
- if (this .xmlNamespace != null ) {
144
- StringBuilder xml = new StringBuilder (String .format ("<%s xmlns='%s'>" , this .xmlRootName , this .xmlNamespace ));
145
- xml .append (XML .toString (jsonObject , null ));
146
- return xml .append (String .format ("</%s>" , this .xmlRootName )).toString ();
150
+ private String convertJsonToXml (JsonNode doc ) {
151
+ try {
152
+ String xml = xmlMapper .writer ().writeValueAsString (doc );
153
+ String startTag = this .xmlNamespace != null ?
154
+ String .format ("<%s xmlns='%s'>" , this .xmlRootName , this .xmlNamespace ) :
155
+ String .format ("<%s>" , this .xmlRootName );
156
+ return new StringBuilder (startTag )
157
+ .append (xml .substring ("<ObjectNode>" .length (), xml .length () - "</ObjectNode>" .length ()))
158
+ .append (String .format ("</%s>" , this .xmlRootName ))
159
+ .toString ();
160
+ } catch (JsonProcessingException e ) {
161
+ // We don't expect this occur; Jackson should be able to convert any JSON object that it created into
162
+ // a valid XML document.
163
+ throw new ConnectorException (String .format ("Unable to convert JSON to XML for doc: %s" , doc ), e );
147
164
}
148
- return XML .toString (jsonObject , this .xmlRootName );
149
165
}
150
166
}
0 commit comments