4
4
import com .marklogic .client .eval .EvalResultIterator ;
5
5
import com .marklogic .client .eval .ServerEvaluationCall ;
6
6
import com .marklogic .spark .Options ;
7
+ import com .marklogic .spark .ReadProgressLogger ;
7
8
import com .marklogic .spark .reader .JsonRowDeserializer ;
8
9
import org .apache .spark .sql .catalyst .InternalRow ;
9
10
import org .apache .spark .sql .catalyst .expressions .GenericInternalRow ;
@@ -19,6 +20,10 @@ class CustomCodePartitionReader implements PartitionReader<InternalRow> {
19
20
private final JsonRowDeserializer jsonRowDeserializer ;
20
21
private final DatabaseClient databaseClient ;
21
22
23
+ // Only needed for logging progress.
24
+ private final long batchSize ;
25
+ private long progressCounter ;
26
+
22
27
public CustomCodePartitionReader (CustomCodeContext customCodeContext , String partition ) {
23
28
this .databaseClient = customCodeContext .connectToMarkLogic ();
24
29
this .serverEvaluationCall = customCodeContext .buildCall (
@@ -31,6 +36,8 @@ public CustomCodePartitionReader(CustomCodeContext customCodeContext, String par
31
36
this .serverEvaluationCall .addVariable ("PARTITION" , partition );
32
37
}
33
38
39
+ this .batchSize = customCodeContext .getNumericOption (Options .READ_BATCH_SIZE , 1 , 1 );
40
+
34
41
this .isCustomSchema = customCodeContext .isCustomSchema ();
35
42
this .jsonRowDeserializer = new JsonRowDeserializer (customCodeContext .getSchema ());
36
43
}
@@ -49,6 +56,11 @@ public InternalRow get() {
49
56
if (this .isCustomSchema ) {
50
57
return this .jsonRowDeserializer .deserializeJson (val );
51
58
}
59
+ progressCounter ++;
60
+ if (progressCounter >= batchSize ) {
61
+ ReadProgressLogger .logProgressIfNecessary (progressCounter );
62
+ progressCounter = 0 ;
63
+ }
52
64
return new GenericInternalRow (new Object []{UTF8String .fromString (val )});
53
65
}
54
66
0 commit comments