7
7
import com .marklogic .client .datamovement .DataMovementManager ;
8
8
import com .marklogic .client .datamovement .WriteBatcher ;
9
9
import com .marklogic .client .document .DocumentWriteOperation ;
10
+ import com .marklogic .client .document .GenericDocumentManager ;
10
11
import com .marklogic .client .impl .HandleAccessor ;
11
12
import com .marklogic .client .io .DocumentMetadataHandle ;
12
13
import com .marklogic .client .io .marker .AbstractWriteHandle ;
14
+ import com .marklogic .client .io .marker .GenericWriteHandle ;
13
15
import com .marklogic .spark .ConnectorException ;
14
16
import com .marklogic .spark .Options ;
15
17
import com .marklogic .spark .Util ;
@@ -55,6 +57,10 @@ class WriteBatcherDataWriter implements DataWriter<InternalRow> {
55
57
56
58
private final RowConverter rowConverter ;
57
59
60
+ private final boolean isStreamingFiles ;
61
+ // Only initialized if streaming files.
62
+ private final GenericDocumentManager documentManager ;
63
+
58
64
// Updated as batches are processed.
59
65
private final AtomicInteger successItemCount = new AtomicInteger (0 );
60
66
private final AtomicInteger failedItemCount = new AtomicInteger (0 );
@@ -65,6 +71,8 @@ class WriteBatcherDataWriter implements DataWriter<InternalRow> {
65
71
this .docBuilder = this .writeContext .newDocBuilder ();
66
72
this .databaseClient = writeContext .connectToMarkLogic ();
67
73
this .rowConverter = determineRowConverter ();
74
+ this .isStreamingFiles = "true" .equals (writeContext .getStringOption (Options .STREAM_FILES ));
75
+ this .documentManager = this .isStreamingFiles ? databaseClient .newDocumentManager () : null ;
68
76
69
77
if (writeContext .isAbortOnFailure ()) {
70
78
this .batchRetrier = null ;
@@ -86,7 +94,12 @@ public void write(InternalRow row) {
86
94
throwWriteFailureIfExists ();
87
95
Optional <DocBuilder .DocumentInputs > document = rowConverter .convertRow (row );
88
96
if (document .isPresent ()) {
89
- this .writeBatcher .add (this .docBuilder .build (document .get ()));
97
+ DocumentWriteOperation writeOp = this .docBuilder .build (document .get ());
98
+ if (this .isStreamingFiles ) {
99
+ writeDocumentViaPutOperation (writeOp );
100
+ } else {
101
+ this .writeBatcher .add (writeOp );
102
+ }
90
103
}
91
104
}
92
105
@@ -183,8 +196,7 @@ private BatchRetrier makeBatchRetrier() {
183
196
writeContext .getStringOption (Options .WRITE_TEMPORAL_COLLECTION ),
184
197
successfulBatch -> successItemCount .getAndAdd (successfulBatch .size ()),
185
198
(failedDoc , failure ) -> {
186
- Util .MAIN_LOGGER .error ("Unable to write document with URI: {}; cause: {}" , failedDoc .getUri (), failure .getMessage ());
187
- failedItemCount .incrementAndGet ();
199
+ captureFailure (failure .getMessage (), failedDoc .getUri ());
188
200
if (this .archiveWriter != null ) {
189
201
writeFailedDocumentToArchive (failedDoc );
190
202
}
@@ -234,4 +246,28 @@ private void closeArchiveWriter() {
234
246
archiveWriter .close ();
235
247
}
236
248
}
249
+
250
+ /**
251
+ * A user typically chooses to stream a document due to its size. A PUT call to v1/documents can handle a document
252
+ * of any size. But a POST call seems to have a limitation due to the multipart nature of the request - the body
253
+ * part appears to be read into memory, which can cause the server to run out of memory. So for streaming, a PUT
254
+ * call is made, which means we don't use the WriteBatcher.
255
+ *
256
+ * @param writeOp
257
+ */
258
+ private void writeDocumentViaPutOperation (DocumentWriteOperation writeOp ) {
259
+ final String uri = writeOp .getUri ();
260
+ try {
261
+ this .documentManager .write (uri , writeOp .getMetadata (), (GenericWriteHandle ) writeOp .getContent ());
262
+ this .successItemCount .incrementAndGet ();
263
+ } catch (RuntimeException ex ) {
264
+ captureFailure (ex .getMessage (), uri );
265
+ this .writeFailure .compareAndSet (null , ex );
266
+ }
267
+ }
268
+
269
+ private void captureFailure (String message , String documentUri ) {
270
+ Util .MAIN_LOGGER .error ("Unable to write document with URI: {}; cause: {}" , documentUri , message );
271
+ failedItemCount .incrementAndGet ();
272
+ }
237
273
}
0 commit comments