Add query.eachBatch() to Parse.Query API (#1114)

noahsilas · acinader · web-flow · commit a3e99837db96 · 2020-03-12T08:53:51.000-07:00
When a query has a large enough result set that we don't want to
materialize it at once, the SDK provides `query.each()`, which yields
each matching object to a processor, one at a time. This is a handy
tool, but if the number of records to process is really so large, we
also can take advantage of batching the processing. Compare the
following operations:

```
// Processing N items involves N calls to Parse Server
new Parse.Query('Item').each((item) =&gt; {
  item.set('foo', 'bar');
  return item.save();
})

// Processing N items involves ceil(N / batchSize) calls to Parse Server
const batchSize = 200;
new Parse.Query('Item').eachBatch((items) =&gt; {
  items.forEach(item =&gt; item.set('foo', 'bar'));
  return Parse.Object.saveAll(items, { batchSize });
}, { batchSize });
```

The `.each()` method is already written to do fetch the objects in
batches; we effectively are splitting it out into two:
- `.eachBatch()` does the work to fetch objects in batches and yield
  each batch
- `.each()` calls `.eachBatch()` and handles invoking the callback for
  every item in the batch

Aside: I considered adding the undocumented `batchSize` attribute
already accepted by `.each()`, `.filter()`, `.map()` and `.reduce()` to
the public API, but I suspect that at the time that you are performance
sensitive enough to tune that parameter you are better served by
switching to `eachBatch()`; the current implementation of `.each()` is
to construct a promise chain with a node for every value in the batch,
and my experience with very long promise chains has been a bit
frustrating.

Co-authored-by: Arthur Cinader &lt;700572+acinader@users.noreply.github.com&gt;
diff --git a/src/ParseQuery.js b/src/ParseQuery.js
@@ -856,15 +856,16 @@ class ParseQuery {
   }
 
   /**
-   * Iterates over each result of a query, calling a callback for each one. If
-   * the callback returns a promise, the iteration will not continue until
+   * Iterates over objects matching a query, calling a callback for each batch.
+   * If the callback returns a promise, the iteration will not continue until
    * that promise has been fulfilled. If the callback returns a rejected
-   * promise, then iteration will stop with that error. The items are
-   * processed in an unspecified order. The query may not have any sort order,
-   * and may not use limit or skip.
+   * promise, then iteration will stop with that error. The items are processed
+   * in an unspecified order. The query may not have any sort order, and may
+   * not use limit or skip.
    * @param {Function} callback Callback that will be called with each result
    *     of the query.
    * @param {Object} options Valid options are:<ul>
+   *   <li>batchSize: How many objects to yield in each batch (default: 100)
    *   <li>useMasterKey: In Cloud Code and Node only, causes the Master Key to
    *     be used for this request.
    *   <li>sessionToken: A valid session token, used for making a request on
@@ -873,7 +874,7 @@ class ParseQuery {
    * @return {Promise} A promise that will be fulfilled once the
    *     iteration has completed.
    */
-  each(callback: (obj: ParseObject) => any, options?: BatchOptions): Promise<Array<ParseObject>> {
+  eachBatch(callback: (objs: Array<ParseObject>) => Promise<*>, options?: BatchOptions): Promise<void> {
     options = options || {};
 
     if (this._order || this._skip || (this._limit >= 0)) {
@@ -882,8 +883,6 @@ class ParseQuery {
     }
 
     const query = new ParseQuery(this.className);
-    // We can override the batch size from the options.
-    // This is undocumented, but useful for testing.
     query._limit = options.batchSize || 100;
     query._include = this._include.map((i) => {
       return i;
@@ -927,14 +926,7 @@ class ParseQuery {
       return !finished;
     }, () => {
       return query.find(findOptions).then((results) => {
-        let callbacksDone = Promise.resolve();
-        results.forEach((result) => {
-          callbacksDone = callbacksDone.then(() => {
-            return callback(result);
-          });
-        });
-
-        return callbacksDone.then(() => {
+        return Promise.resolve(callback(results)).then(() => {
           if (results.length >= query._limit) {
             query.greaterThan('objectId', results[results.length - 1].id);
           } else {
@@ -945,6 +937,36 @@ class ParseQuery {
     });
   }
 
+  /**
+   * Iterates over each result of a query, calling a callback for each one. If
+   * the callback returns a promise, the iteration will not continue until
+   * that promise has been fulfilled. If the callback returns a rejected
+   * promise, then iteration will stop with that error. The items are
+   * processed in an unspecified order. The query may not have any sort order,
+   * and may not use limit or skip.
+   * @param {Function} callback Callback that will be called with each result
+   *     of the query.
+   * @param {Object} options Valid options are:<ul>
+   *   <li>useMasterKey: In Cloud Code and Node only, causes the Master Key to
+   *     be used for this request.
+   *   <li>sessionToken: A valid session token, used for making a request on
+   *       behalf of a specific user.
+   * </ul>
+   * @return {Promise} A promise that will be fulfilled once the
+   *     iteration has completed.
+   */
+  each(callback: (obj: ParseObject) => any, options?: BatchOptions): Promise<void> {
+    return this.eachBatch((results) => {
+      let callbacksDone = Promise.resolve();
+      results.forEach((result) => {
+        callbacksDone = callbacksDone.then(() => {
+          return callback(result);
+        });
+      });
+      return callbacksDone;
+    }, options);
+  }
+
   /**
    * Adds a hint to force index selection. (https://docs.mongodb.com/manual/reference/operator/meta/hint/)
    *
diff --git a/src/__tests__/ParseQuery-test.js b/src/__tests__/ParseQuery-test.js
@@ -1503,6 +1503,134 @@ describe('ParseQuery', () => {
       });
   });
 
+  describe('iterating over batches with .eachBatch()', () => {
+    let findMock;
+    beforeEach(() => {
+      findMock = jest.fn();
+      findMock.mockReturnValueOnce(Promise.resolve({
+        results: [
+          { objectId: 'I55', size: 'medium', name: 'Product 55' },
+          { objectId: 'I89', size: 'small', name: 'Product 89' },
+        ]
+      }));
+      findMock.mockReturnValueOnce(Promise.resolve({
+        results: [
+          { objectId: 'I91', size: 'small', name: 'Product 91' },
+        ]
+      }));
+      CoreManager.setQueryController({
+        aggregate() {},
+        find: findMock,
+      });
+    });
+
+    it('passes query attributes through to the REST API', async () => {
+      const q = new ParseQuery('Item');
+      q.containedIn('size', ['small', 'medium']);
+      q.matchesKeyInQuery(
+        'name',
+        'productName',
+        new ParseQuery('Review').equalTo('stars', 5)
+      );
+      q.equalTo('valid', true);
+      q.select('size', 'name');
+      q.includeAll();
+      q.hint('_id_');
+
+      await q.eachBatch(() => {});
+
+      expect(findMock).toHaveBeenCalledTimes(1);
+      const [className, params, options] = findMock.mock.calls[0];
+      expect(className).toBe('Item')
+      expect(params).toEqual({
+        limit: 100,
+        order: 'objectId',
+        keys: 'size,name',
+        include: '*',
+        hint: '_id_',
+        where: {
+          size: {
+            $in: ['small', 'medium']
+          },
+          name: {
+            $select: {
+              key: 'productName',
+              query: {
+                className: 'Review',
+                where: {
+                  stars: 5
+                }
+              }
+            }
+          },
+          valid: true
+        }
+      });
+      expect(options.requestTask).toBeDefined();
+    });
+
+    it('passes options through to the REST API', async () => {
+      const batchOptions = {
+        useMasterKey: true,
+        sessionToken: '1234',
+        batchSize: 50,
+      };
+      const q = new ParseQuery('Item');
+      await q.eachBatch(() => {}, batchOptions);
+      expect(findMock).toHaveBeenCalledTimes(1);
+      const [className, params, options] = findMock.mock.calls[0];
+      expect(className).toBe('Item');
+      expect(params).toEqual({
+        limit: 50,
+        order: 'objectId',
+        where: {},
+      });
+      expect(options.useMasterKey).toBe(true);
+      expect(options.sessionToken).toEqual('1234');
+    });
+
+    it('only makes one request when the results fit in one page', async () => {
+      const q = new ParseQuery('Item');
+      await q.eachBatch(() => {});
+      expect(findMock).toHaveBeenCalledTimes(1);
+    });
+
+    it('makes more requests when the results do not fit in one page', async () => {
+      const q = new ParseQuery('Item');
+      await q.eachBatch(() => {}, { batchSize: 2 });
+      expect(findMock).toHaveBeenCalledTimes(2);
+    })
+
+    it('stops iteration when the callback returns a promise that rejects', async () => {
+      let callCount = 0;
+      const callback = () => {
+        callCount++;
+        return Promise.reject(new Error('Callback rejecting'));
+      };
+      const q = new ParseQuery('Item');
+      await q.eachBatch(callback, { batchSize: 2 }).catch(() => {});
+      expect(callCount).toBe(1);
+    });
+
+    it('handles a synchronous callback', async () => {
+      const results = [];
+      const q = new ParseQuery('Item');
+      await q.eachBatch((items) => {
+        items.map(item => results.push(item.attributes.size))
+      });
+      expect(results).toEqual(['medium', 'small']);
+    });
+
+    it('handles an asynchronous callback', async () => {
+      const results = [];
+      const q = new ParseQuery('Item');
+      await q.eachBatch((items) => {
+        items.map(item => results.push(item.attributes.size))
+        return new Promise(resolve => setImmediate(resolve));
+      });
+      expect(results).toEqual(['medium', 'small']);
+    });
+  });
 
   it('can iterate over results with each()', (done) => {
     CoreManager.setQueryController({