Skip to content

Commit 59d3514

Browse files
authored
Merge pull request #291 from marklogic/feature/export-fix
Fix for streaming documents
2 parents 091fa7a + eb59349 commit 59d3514

File tree

1 file changed

+20
-18
lines changed

1 file changed

+20
-18
lines changed

src/main/java/com/marklogic/spark/writer/file/ContentWriter.java

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,12 @@
66
import com.fasterxml.jackson.databind.JsonNode;
77
import com.fasterxml.jackson.databind.ObjectMapper;
88
import com.marklogic.client.document.GenericDocumentManager;
9-
import com.marklogic.client.io.BytesHandle;
9+
import com.marklogic.client.io.InputStreamHandle;
1010
import com.marklogic.spark.ConnectorException;
1111
import com.marklogic.spark.ContextSupport;
1212
import com.marklogic.spark.Options;
1313
import com.marklogic.spark.reader.document.DocumentRowSchema;
14+
import org.apache.commons.io.IOUtils;
1415
import org.apache.spark.sql.catalyst.InternalRow;
1516

1617
import javax.xml.XMLConstants;
@@ -19,6 +20,7 @@
1920
import javax.xml.transform.stream.StreamSource;
2021
import java.io.ByteArrayInputStream;
2122
import java.io.IOException;
23+
import java.io.InputStream;
2224
import java.io.OutputStream;
2325
import java.nio.charset.Charset;
2426
import java.util.Map;
@@ -35,6 +37,7 @@ class ContentWriter {
3537
private final boolean prettyPrint;
3638
private final Charset encoding;
3739

40+
private final boolean isStreamingFiles;
3841
// Only set when streaming.
3942
private final GenericDocumentManager documentManager;
4043

@@ -49,22 +52,22 @@ class ContentWriter {
4952
this.objectMapper = null;
5053
}
5154

52-
this.documentManager = "true".equalsIgnoreCase(properties.get(Options.STREAM_FILES)) ?
55+
this.isStreamingFiles = "true".equalsIgnoreCase(properties.get(Options.STREAM_FILES));
56+
this.documentManager = this.isStreamingFiles ?
5357
new ContextSupport(properties).connectToMarkLogic().newDocumentManager() : null;
5458
}
5559

5660
void writeContent(InternalRow row, OutputStream outputStream) throws IOException {
57-
if (this.prettyPrint) {
61+
if (this.isStreamingFiles) {
62+
streamDocumentToFile(row, outputStream);
63+
} else if (this.prettyPrint) {
5864
prettyPrintContent(row, outputStream);
65+
} else if (this.encoding != null) {
66+
// We know the string from MarkLogic is UTF-8, so we use getBytes to convert it to the user's
67+
// specified encoding (as opposed to new String(bytes, encoding)).
68+
outputStream.write(new String(row.getBinary(1)).getBytes(this.encoding));
5969
} else {
60-
byte[] bytes = getContentBytes(row);
61-
if (this.encoding != null) {
62-
// We know the string from MarkLogic is UTF-8, so we use getBytes to convert it to the user's
63-
// specified encoding (as opposed to new String(bytes, encoding)).
64-
outputStream.write(new String(bytes).getBytes(this.encoding));
65-
} else {
66-
outputStream.write(bytes);
67-
}
70+
outputStream.write(row.getBinary(1));
6871
}
6972
}
7073

@@ -116,7 +119,7 @@ private Transformer newTransformer() {
116119
}
117120

118121
private void prettyPrintContent(InternalRow row, OutputStream outputStream) throws IOException {
119-
final byte[] content = getContentBytes(row);
122+
final byte[] content = row.getBinary(1);
120123
final String format = row.isNullAt(2) ? null : row.getString(2);
121124
if ("JSON".equalsIgnoreCase(format)) {
122125
prettyPrintJson(content, outputStream);
@@ -151,11 +154,10 @@ private void prettyPrintXml(byte[] content, OutputStream outputStream) {
151154
}
152155
}
153156

154-
private byte[] getContentBytes(InternalRow row) {
155-
if (this.documentManager != null) {
156-
String uri = row.getString(0);
157-
return documentManager.read(uri, new BytesHandle()).get();
158-
}
159-
return row.getBinary(1);
157+
private void streamDocumentToFile(InternalRow row, OutputStream outputStream) throws IOException {
158+
String uri = row.getString(0);
159+
InputStream inputStream = documentManager.read(uri, new InputStreamHandle()).get();
160+
// commons-io is a dependency of Spark and a common utility for copying between two steams.
161+
IOUtils.copy(inputStream, outputStream);
160162
}
161163
}

0 commit comments

Comments
 (0)