Skip to content

Commit 33facf5

Browse files
authored
fix: Add paging to hbase client (#4166)
* Added paging to hbase client * minor fixes * minor fixes * Fix tests * Fix tests * Fix lint * test * test * test * test * test * test * test * test * test * test * test * test * test * test * test * test * test * test * test * test * test * test * test * test * test * Brought back test and fixed page size handling * fixed test * add tests * minor refactor * minor refactor * add error handling tests * fix format * Add protection against OOM exceptions * Add protection against OOM exceptions * Remove useless assertion * handle setCaching properly * handle setCaching properly * handle setCaching properly * handle setCaching properly * handle setCaching properly * handle setCaching properly * remove useless code * Get page size directly from the paginator * fix lint * cancel serverStream when reaching memory limit * add test for low memory * add test for low memory * fix lint * update java-bigtable dependency * Fixed several PR comments * Fixed several PR comments * Fixed several PR comments * Moved to async API * Fixed several PR comments * Fixed several PR comments * Fixed several PR comments * Fixed several PR comments * Fixed several PR comments * Fixed several PR comments * Fixed according to PR comments * Fix wrong advance * Fixed according to PR * Fixed according to PR * Fixed according to PR * remove test * adjust tests according to PR * adjust tests according to PR * fix bug found on beam
1 parent a7240cf commit 33facf5

File tree

6 files changed

+413
-9
lines changed

6 files changed

+413
-9
lines changed

bigtable-client-core-parent/bigtable-hbase-integration-tests-common/src/test/java/com/google/cloud/bigtable/hbase/TestScan.java

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -177,9 +177,33 @@ public void testGetScannerNoQualifiers() throws IOException {
177177
}
178178

179179
@Test
180-
public void test100ResultsInScanner() throws IOException {
180+
public void testManyResultsInScanner_lessThanPageSize() throws IOException {
181+
testManyResultsInScanner(95, true);
182+
}
183+
184+
@Test
185+
public void testManyResultsInScanner_equalToPageSize() throws IOException {
186+
testManyResultsInScanner(100, true);
187+
}
188+
189+
@Test
190+
public void testManyResultsInScanner_greaterThanPageSize() throws IOException {
191+
testManyResultsInScanner(105, true);
192+
}
193+
194+
@Test
195+
public void testManyResultsInScanner_greaterThanTwoPageSizes() throws IOException {
196+
testManyResultsInScanner(205, true);
197+
}
198+
199+
@Test
200+
public void testManyResultsInScanner_onePageSizeNoPagination() throws IOException {
201+
testManyResultsInScanner(100, false);
202+
}
203+
204+
private void testManyResultsInScanner(int rowsToWrite, boolean withPagination)
205+
throws IOException {
181206
String prefix = "scan_row_";
182-
int rowsToWrite = 100;
183207

184208
// Initialize variables
185209
Table table = getDefaultTable();
@@ -208,9 +232,13 @@ public void test100ResultsInScanner() throws IOException {
208232

209233
Scan scan = new Scan();
210234
scan.withStartRow(rowKeys[0])
211-
.withStopRow(rowFollowing(rowKeys[rowsToWrite - 1]))
235+
.withStopRow(rowFollowingSameLength(rowKeys[rowsToWrite - 1]))
212236
.addFamily(COLUMN_FAMILY);
213237

238+
if (withPagination) {
239+
scan = scan.setCaching(100);
240+
}
241+
214242
try (ResultScanner resultScanner = table.getScanner(scan)) {
215243
for (int rowIndex = 0; rowIndex < rowsToWrite; rowIndex++) {
216244
Result result = resultScanner.next();
@@ -275,7 +303,7 @@ public void testScanDelete() throws IOException {
275303

276304
Scan scan = new Scan();
277305
scan.withStartRow(rowKeys[0])
278-
.withStopRow(rowFollowing(rowKeys[rowsToWrite - 1]))
306+
.withStopRow(rowFollowingSameLength(rowKeys[rowsToWrite - 1]))
279307
.addFamily(COLUMN_FAMILY);
280308
int deleteCount = 0;
281309
try (ResultScanner resultScanner = table.getScanner(scan)) {

bigtable-client-core-parent/bigtable-hbase/src/main/java/com/google/cloud/bigtable/hbase/AbstractBigtableTable.java

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import com.google.api.core.InternalApi;
1919
import com.google.cloud.bigtable.data.v2.models.ConditionalRowMutation;
2020
import com.google.cloud.bigtable.data.v2.models.Filters;
21+
import com.google.cloud.bigtable.data.v2.models.Query;
2122
import com.google.cloud.bigtable.data.v2.models.ReadModifyWriteRow;
2223
import com.google.cloud.bigtable.data.v2.models.RowMutation;
2324
import com.google.cloud.bigtable.hbase.adapters.Adapters;
@@ -92,6 +93,14 @@ public abstract class AbstractBigtableTable implements Table {
9293

9394
private static final Tracer TRACER = Tracing.getTracer();
9495

96+
private static final int MIN_BYTE_BUFFER_SIZE = 100 * 1024 * 1024;
97+
private static final double DEFAULT_BYTE_LIMIT_PERCENTAGE = .1;
98+
private static final long DEFAULT_MAX_SEGMENT_SIZE =
99+
(long)
100+
Math.max(
101+
MIN_BYTE_BUFFER_SIZE,
102+
(Runtime.getRuntime().totalMemory() * DEFAULT_BYTE_LIMIT_PERCENTAGE));
103+
95104
private static class TableMetrics {
96105
Timer putTimer = BigtableClientMetrics.timer(MetricLevel.Info, "table.put.latency");
97106
Timer getTimer = BigtableClientMetrics.timer(MetricLevel.Info, "table.get.latency");
@@ -295,8 +304,14 @@ public ResultScanner getScanner(final Scan scan) throws IOException {
295304
LOG.trace("getScanner(Scan)");
296305
Span span = TRACER.spanBuilder("BigtableTable.scan").startSpan();
297306
try (Scope scope = TRACER.withSpan(span)) {
298-
299-
final ResultScanner scanner = clientWrapper.readRows(hbaseAdapter.adapt(scan));
307+
ResultScanner scanner;
308+
if (scan.getCaching() == -1) {
309+
scanner = clientWrapper.readRows(hbaseAdapter.adapt(scan));
310+
} else {
311+
Query.QueryPaginator paginator =
312+
hbaseAdapter.adapt(scan).createPaginator(scan.getCaching());
313+
scanner = clientWrapper.readRows(paginator, DEFAULT_MAX_SEGMENT_SIZE);
314+
}
300315
if (hasWhileMatchFilter(scan.getFilter())) {
301316
return Adapters.BIGTABLE_WHILE_MATCH_RESULT_RESULT_SCAN_ADAPTER.adapt(scanner, span);
302317
}

bigtable-client-core-parent/bigtable-hbase/src/main/java/com/google/cloud/bigtable/hbase/wrappers/DataClientWrapper.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,4 +91,10 @@ ApiFuture<Result> readRowAsync(
9191

9292
@Override
9393
void close() throws IOException;
94+
95+
/**
96+
* Perform a scan over {@link Result}s, in key order, using a paginator. maxSegmentByteSize is
97+
* used for testing purposes only.
98+
*/
99+
ResultScanner readRows(Query.QueryPaginator paginator, long maxSegmentByteSize);
94100
}

bigtable-client-core-parent/bigtable-hbase/src/main/java/com/google/cloud/bigtable/hbase/wrappers/veneer/DataClientVeneerApi.java

Lines changed: 159 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import com.google.api.core.InternalApi;
2222
import com.google.api.gax.grpc.GrpcCallContext;
2323
import com.google.api.gax.rpc.ApiCallContext;
24+
import com.google.api.gax.rpc.ResponseObserver;
2425
import com.google.api.gax.rpc.ServerStream;
2526
import com.google.api.gax.rpc.StateCheckingResponseObserver;
2627
import com.google.api.gax.rpc.StreamController;
@@ -43,12 +44,18 @@
4344
import com.google.cloud.bigtable.metrics.Timer;
4445
import com.google.cloud.bigtable.metrics.Timer.Context;
4546
import com.google.common.util.concurrent.MoreExecutors;
47+
import com.google.common.util.concurrent.SettableFuture;
4648
import com.google.protobuf.ByteString;
4749
import io.grpc.CallOptions;
4850
import io.grpc.Deadline;
4951
import io.grpc.stub.StreamObserver;
52+
import java.util.ArrayDeque;
53+
import java.util.ArrayList;
5054
import java.util.Iterator;
5155
import java.util.List;
56+
import java.util.Queue;
57+
import java.util.concurrent.ExecutionException;
58+
import java.util.concurrent.Future;
5259
import java.util.concurrent.TimeUnit;
5360
import javax.annotation.Nullable;
5461
import org.apache.hadoop.hbase.client.AbstractClientScanner;
@@ -134,6 +141,12 @@ public Result apply(Row row) {
134141
MoreExecutors.directExecutor());
135142
}
136143

144+
@Override
145+
public ResultScanner readRows(Query.QueryPaginator paginator, long maxSegmentByteSize) {
146+
return new PaginatedRowResultScanner(
147+
paginator, delegate, maxSegmentByteSize, this.createScanCallContext());
148+
}
149+
137150
@Override
138151
public ResultScanner readRows(Query request) {
139152
return new RowResultScanner(
@@ -228,6 +241,151 @@ protected void onCompleteImpl() {
228241
}
229242
}
230243

244+
/**
245+
* wraps {@link ServerStream} onto HBase {@link ResultScanner}. {@link PaginatedRowResultScanner}
246+
* gets a paginator and a {@link Query.QueryPaginator} used to get a {@link ServerStream}<{@link
247+
* Result}> using said paginator to iterate over pages of rows. The {@link Query.QueryPaginator}
248+
* pageSize property indicates the size of each page in every API call. A cache of a maximum size
249+
* of 1.1*pageSize and a minimum of 0.1*pageSize is held at all times. In order to avoid OOM
250+
* exceptions, there is a limit for the total byte size held in cache.
251+
*/
252+
static class PaginatedRowResultScanner extends AbstractClientScanner {
253+
// Percentage of max number of rows allowed in the buffer
254+
private static final double WATERMARK_PERCENTAGE = .1;
255+
private static final RowResultAdapter RESULT_ADAPTER = new RowResultAdapter();
256+
257+
private final Meter scannerResultMeter =
258+
BigtableClientMetrics.meter(BigtableClientMetrics.MetricLevel.Info, "scanner.results");
259+
private final Timer scannerResultTimer =
260+
BigtableClientMetrics.timer(
261+
BigtableClientMetrics.MetricLevel.Debug, "scanner.results.latency");
262+
263+
private ByteString lastSeenRowKey = ByteString.EMPTY;
264+
private Boolean hasMore = true;
265+
private final Queue<Result> buffer;
266+
private final Query.QueryPaginator paginator;
267+
private final int refillSegmentWaterMark;
268+
269+
private final BigtableDataClient dataClient;
270+
271+
private final long maxSegmentByteSize;
272+
273+
private long currentByteSize = 0;
274+
275+
private @Nullable Future<List<Result>> future;
276+
private GrpcCallContext scanCallContext;
277+
278+
PaginatedRowResultScanner(
279+
Query.QueryPaginator paginator,
280+
BigtableDataClient dataClient,
281+
long maxSegmentByteSize,
282+
GrpcCallContext scanCallContext) {
283+
this.maxSegmentByteSize = maxSegmentByteSize;
284+
285+
this.paginator = paginator;
286+
this.dataClient = dataClient;
287+
this.buffer = new ArrayDeque<>();
288+
this.refillSegmentWaterMark =
289+
(int) Math.max(1, paginator.getPageSize() * WATERMARK_PERCENTAGE);
290+
this.scanCallContext = scanCallContext;
291+
this.future = fetchNextSegment();
292+
}
293+
294+
@Override
295+
public Result next() {
296+
try (Context ignored = scannerResultTimer.time()) {
297+
if (this.future != null && this.future.isDone()) {
298+
this.consumeReadRowsFuture();
299+
}
300+
if (this.buffer.size() < this.refillSegmentWaterMark && this.future == null && hasMore) {
301+
future = fetchNextSegment();
302+
}
303+
if (this.buffer.isEmpty() && this.future != null) {
304+
this.consumeReadRowsFuture();
305+
}
306+
Result result = this.buffer.poll();
307+
if (result != null) {
308+
scannerResultMeter.mark();
309+
currentByteSize -= Result.getTotalSizeOfCells(result);
310+
}
311+
return result;
312+
}
313+
}
314+
315+
@Override
316+
public void close() {
317+
if (this.future != null) {
318+
this.future.cancel(true);
319+
}
320+
}
321+
322+
public boolean renewLease() {
323+
return true;
324+
}
325+
326+
private Future<List<Result>> fetchNextSegment() {
327+
SettableFuture<List<Result>> resultsFuture = SettableFuture.create();
328+
329+
dataClient
330+
.readRowsCallable(RESULT_ADAPTER)
331+
.call(
332+
paginator.getNextQuery(),
333+
new ResponseObserver<Result>() {
334+
private StreamController controller;
335+
List<Result> results = new ArrayList();
336+
337+
@Override
338+
public void onStart(StreamController controller) {
339+
this.controller = controller;
340+
}
341+
342+
@Override
343+
public void onResponse(Result result) {
344+
// calculate size of the response
345+
currentByteSize += Result.getTotalSizeOfCells(result);
346+
results.add(result);
347+
if (result != null && result.rawCells() != null) {
348+
lastSeenRowKey = RESULT_ADAPTER.getKey(result);
349+
}
350+
351+
if (currentByteSize > maxSegmentByteSize) {
352+
controller.cancel();
353+
return;
354+
}
355+
}
356+
357+
@Override
358+
public void onError(Throwable t) {
359+
if (currentByteSize > maxSegmentByteSize) {
360+
onComplete();
361+
} else {
362+
resultsFuture.setException(t);
363+
}
364+
}
365+
366+
@Override
367+
public void onComplete() {
368+
resultsFuture.set(results);
369+
}
370+
},
371+
this.scanCallContext);
372+
return resultsFuture;
373+
}
374+
375+
private void consumeReadRowsFuture() {
376+
try {
377+
List<Result> results = this.future.get();
378+
this.buffer.addAll(results);
379+
this.hasMore = this.paginator.advance(this.lastSeenRowKey);
380+
this.future = null;
381+
} catch (InterruptedException e) {
382+
Thread.currentThread().interrupt();
383+
} catch (ExecutionException e) {
384+
// Do nothing.
385+
}
386+
}
387+
}
388+
231389
/** wraps {@link ServerStream} onto HBase {@link ResultScanner}. */
232390
private static class RowResultScanner extends AbstractClientScanner {
233391

@@ -264,7 +422,7 @@ public void close() {
264422
}
265423

266424
public boolean renewLease() {
267-
throw new UnsupportedOperationException("renewLease");
425+
return true;
268426
}
269427
}
270428
}

bigtable-client-core-parent/bigtable-hbase/src/main/java/com/google/cloud/bigtable/hbase/wrappers/veneer/SharedDataClientWrapper.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,4 +108,9 @@ public void close() throws IOException {
108108
delegate.close();
109109
owner.release(key);
110110
}
111+
112+
@Override
113+
public ResultScanner readRows(Query.QueryPaginator paginator, long maxSegmentByteSize) {
114+
return delegate.readRows(paginator, maxSegmentByteSize);
115+
}
111116
}

0 commit comments

Comments
 (0)