9
9
import com .marklogic .client .io .BytesHandle ;
10
10
import com .marklogic .client .io .DocumentMetadataHandle ;
11
11
import com .marklogic .client .io .Format ;
12
+ import com .marklogic .client .io .InputStreamHandle ;
13
+ import com .marklogic .client .io .marker .AbstractWriteHandle ;
14
+ import com .marklogic .spark .ConnectorException ;
12
15
import com .marklogic .spark .Options ;
13
16
import com .marklogic .spark .reader .document .DocumentRowSchema ;
17
+ import com .marklogic .spark .reader .file .FileContext ;
14
18
import org .apache .spark .sql .catalyst .InternalRow ;
15
19
20
+ import java .io .ByteArrayInputStream ;
16
21
import java .io .IOException ;
22
+ import java .io .ObjectInputStream ;
17
23
import java .util .ArrayList ;
18
24
import java .util .List ;
19
25
import java .util .Optional ;
@@ -26,11 +32,13 @@ class DocumentRowConverter implements RowConverter {
26
32
private final ObjectMapper objectMapper ;
27
33
private final String uriTemplate ;
28
34
private final Format documentFormat ;
35
+ private final boolean isStreamingFromFiles ;
29
36
30
37
DocumentRowConverter (WriteContext writeContext ) {
31
38
this .uriTemplate = writeContext .getStringOption (Options .WRITE_URI_TEMPLATE );
32
39
this .documentFormat = writeContext .getDocumentFormat ();
33
40
this .objectMapper = new ObjectMapper ();
41
+ this .isStreamingFromFiles = writeContext .hasOption (Options .STREAM_FILES );
34
42
}
35
43
36
44
@ Override
@@ -43,25 +51,34 @@ public Optional<DocBuilder.DocumentInputs> convertRow(InternalRow row) {
43
51
return Optional .of (new DocBuilder .DocumentInputs (uri , null , null , metadata ));
44
52
}
45
53
46
- final BytesHandle content = new BytesHandle (row .getBinary (1 ));
47
- if (this .documentFormat != null ) {
48
- content .withFormat (this .documentFormat );
49
- }
54
+ Content content = this .isStreamingFromFiles ?
55
+ readContentFromFile (uri , row ) :
56
+ readContentFromRow (uri , row );
50
57
51
- JsonNode uriTemplateValues = null ;
52
- if (this .uriTemplate != null && this .uriTemplate .trim ().length () > 0 ) {
53
- String format = row .isNullAt (2 ) ? null : row .getString (2 );
54
- uriTemplateValues = deserializeContentToJson (uri , content , format );
55
- }
56
58
DocumentMetadataHandle metadata = DocumentRowSchema .makeDocumentMetadata (row );
57
- return Optional .of (new DocBuilder .DocumentInputs (uri , content , uriTemplateValues , metadata ));
59
+ return Optional .of (new DocBuilder .DocumentInputs (
60
+ uri , content .contentHandle , content .uriTemplateValues , metadata )
61
+ );
58
62
}
59
63
60
64
@ Override
61
65
public List <DocBuilder .DocumentInputs > getRemainingDocumentInputs () {
62
66
return new ArrayList <>();
63
67
}
64
68
69
+ private Content readContentFromRow (String uri , InternalRow row ) {
70
+ BytesHandle bytesHandle = new BytesHandle (row .getBinary (1 ));
71
+ if (this .documentFormat != null ) {
72
+ bytesHandle .withFormat (this .documentFormat );
73
+ }
74
+ JsonNode uriTemplateValues = null ;
75
+ if (this .uriTemplate != null && this .uriTemplate .trim ().length () > 0 ) {
76
+ String format = row .isNullAt (2 ) ? null : row .getString (2 );
77
+ uriTemplateValues = deserializeContentToJson (uri , bytesHandle , format );
78
+ }
79
+ return new Content (bytesHandle , uriTemplateValues );
80
+ }
81
+
65
82
private JsonNode deserializeContentToJson (String initialUri , BytesHandle contentHandle , String format ) {
66
83
try {
67
84
return objectMapper .readTree (contentHandle .get ());
@@ -75,4 +92,41 @@ private JsonNode deserializeContentToJson(String initialUri, BytesHandle content
75
92
return values ;
76
93
}
77
94
}
95
+
96
+ /**
97
+ * In a scenario where the user wants to stream a file into MarkLogic, the content column will contain a serialized
98
+ * instance of {@code FileContext}, which is used to stream the file into a {@code InputStreamHandle}.
99
+ */
100
+ private Content readContentFromFile (String uri , InternalRow row ) {
101
+ byte [] bytes = row .getBinary (1 );
102
+ try {
103
+ ObjectInputStream ois = new ObjectInputStream (new ByteArrayInputStream (bytes ));
104
+ FileContext fileContext = (FileContext ) ois .readObject ();
105
+ InputStreamHandle streamHandle = new InputStreamHandle (fileContext .openFile (uri ));
106
+ if (this .documentFormat != null ) {
107
+ streamHandle .withFormat (this .documentFormat );
108
+ }
109
+ return new Content (streamHandle , null );
110
+ } catch (Exception e ) {
111
+ throw new ConnectorException (String .format ("Unable to read from file %s; cause: %s" , uri , e .getMessage ()));
112
+ }
113
+ }
114
+
115
+ private static class Content {
116
+ private final AbstractWriteHandle contentHandle ;
117
+ private final JsonNode uriTemplateValues ;
118
+
119
+ public Content (AbstractWriteHandle contentHandle , JsonNode uriTemplateValues ) {
120
+ this .contentHandle = contentHandle ;
121
+ this .uriTemplateValues = uriTemplateValues ;
122
+ }
123
+
124
+ AbstractWriteHandle getContentHandle () {
125
+ return contentHandle ;
126
+ }
127
+
128
+ JsonNode getUriTemplateValues () {
129
+ return uriTemplateValues ;
130
+ }
131
+ }
78
132
}
0 commit comments