From e43a843d23c2affde6b1382ecdf2a44d897ff409 Mon Sep 17 00:00:00 2001 From: Noah Silas <noah@hustle.com> Date: Fri, 13 Mar 2020 12:47:22 -0700 Subject: [PATCH 1/3] Inflate included pointers in-place I'd like to parallelize the fetching of included documents, but it would be tricky to do in the world where these methods are cloning the response. Instead of trying to do tricky processing on the responses to merge all the pointers, we can mutate the response in-place. This kind of in-place mutation can be viewed as more dangerous, particularly with multiple promises trying to run this kind of replacement in a row. However, we have some nice guarantees here: - Each path passed is unique; we don't need to worry about multiple calls each trying to replace the same documents. - When we start doing a replacement, all the path prefixes have already been replaced. In the current implementation this is known because we sort the include paths by path length, so if a path `a.b.c` is being processed, then the prefix paths `a` and `a.b` must have already been processed. This in-place mutation might also have a nice benefit in terms of memory usage; queries with lots of includes would have been rebuilding the result tree multiple times, causing extra allocations. The in-place method avoids this, and so might reduce memory usage for those queries. --- src/RestQuery.js | 43 ++++++++++++++++++------------------------- 1 file changed, 18 insertions(+), 25 deletions(-) diff --git a/src/RestQuery.js b/src/RestQuery.js index 468446561c..56c8b6f373 100644 --- a/src/RestQuery.js +++ b/src/RestQuery.js @@ -760,8 +760,7 @@ RestQuery.prototype.handleInclude = function() { this.restOptions ); if (pathResponse.then) { - return pathResponse.then(newResponse => { - this.response = newResponse; + return pathResponse.then(() => { this.include = this.include.slice(1); return this.handleInclude(); }); @@ -769,8 +768,6 @@ RestQuery.prototype.handleInclude = function() { this.include = this.include.slice(1); return this.handleInclude(); } - - return pathResponse; }; //Returns a promise of a processed set of results @@ -821,7 +818,10 @@ RestQuery.prototype.runAfterFindTrigger = function() { // Adds included values to the response. // Path is a list of field names. -// Returns a promise for an augmented response. +// +// Modifies the response in place by replacing pointers with fetched objects +// Returns a promise that resolves when all includes have been fetched and +// substituted into the response. function includePath(config, auth, response, path, restOptions = {}) { var pointers = findPointers(response.results, path); if (pointers.length == 0) { @@ -905,19 +905,12 @@ function includePath(config, auth, response, path, restOptions = {}) { return replace; }, {}); - var resp = { - results: replacePointers(response.results, path, replace), - }; - if (response.count) { - resp.count = response.count; - } - return resp; + replacePointers(response.results, path, replace); }); } // Object may be a list of REST-format object to find pointers in, or // it may be a single object. -// If the path yields things that aren't pointers, this throws an error. // Path is a list of fields to search into. // Returns a list of pointers in REST format. function findPointers(object, path) { @@ -951,8 +944,8 @@ function findPointers(object, path) { // in, or it may be a single object. // Path is a list of fields to search into. // replace is a map from object id -> object. -// Returns something analogous to object, but with the appropriate -// pointers inflated. +// +// Performs in-place replacements on object function replacePointers(object, path, replace) { if (object instanceof Array) { return object @@ -964,6 +957,7 @@ function replacePointers(object, path, replace) { return object; } + // base case: we're returning a replacement if (path.length === 0) { if (object && object.__type === 'Pointer') { return replace[object.objectId]; @@ -971,20 +965,19 @@ function replacePointers(object, path, replace) { return object; } + // penultimate case: we're looking at the object holding a reference + // to be mutated + else if (path.length === 1) { + object[path[0]] = replacePointers(object[path[0]], [], replace); + return; + } + var subobject = object[path[0]]; if (!subobject) { return object; } - var newsub = replacePointers(subobject, path.slice(1), replace); - var answer = {}; - for (var key in object) { - if (key == path[0]) { - answer[key] = newsub; - } else { - answer[key] = object[key]; - } - } - return answer; + + return replacePointers(subobject, path.slice(1), replace); } // Finds a subobject that has the given key, if there is one. From e1c134578a06e734eb9f4beab07e54d60bd259f4 Mon Sep 17 00:00:00 2001 From: Noah Silas <noah@hustle.com> Date: Thu, 12 Mar 2020 16:22:03 -0700 Subject: [PATCH 2/3] Fetch multiple includes concurrently When preloading multiple fields, we can get some extra performance by doing multiple fetches simultaneously. Consider a query fetching comments, and requesting that ParseServer preload "post" and "author" pointers: > GET /classes/Comment?include=post,author In this case, we first need to fetch the resulting "Comment" documents, but after that we should be able to fetch the documents referenced by the `post` and `author` fields of the results simultaneously, as they don't depend on each other at all. Things get a little trickier when we have nested fields: > GET /classes/Comment?include=post,post.author,post.category To resolve this query, we first need to fetch the related posts, and only once we've added the data about those posts into the results tree can we scan it for the `post.author` and `post.category` pointers. But, once that first fetch is completed, we can unblock both of those nested queries! The solution here is to build what is basically a dependency graph out of promises; each include path blocks while it is waiting for whatever path it depends on to finish loading. Finally, once we have that graph, we return a promise that depends on every node in it. Aside: Technically we only need to depend on leaf nodes, but there shouldn't be any meaningful difference between waiting on the leafs and waiting on the entire graph, and this way we don't have to do any analysis to find the leafs) It's possible that for degenerate cases (hundreds of includes in a single query), this could result in performance degradation as many queries are kicked off in parallel. For the more common case of just a few top level includes, this should be a noticeable speed up as we remove a "waterfall" style dependency graph. Improve readability of `RestQuery.handleInclude` There's quite a bit of trickiness going on here, so for the benefit of future maintainers, lets add some documentation and clear up some variable names. - Documented the preconditions that we expect when we're entering this function. These are all met by the current implementation of the caller, but it's helpful to someone trying to verify the correctness of this function. - Added a lengthier description of what is going on with the map of `promises` - Renamed some variables to help make their purpose clearer --- src/RestQuery.js | 66 ++++++++++++++++++++++++++++++++++++------------ 1 file changed, 50 insertions(+), 16 deletions(-) diff --git a/src/RestQuery.js b/src/RestQuery.js index 56c8b6f373..fbc3d1982b 100644 --- a/src/RestQuery.js +++ b/src/RestQuery.js @@ -747,27 +747,61 @@ RestQuery.prototype.handleExcludeKeys = function() { }; // Augments this.response with data at the paths provided in this.include. +// +// Preconditions: +// - `this.include` is an array of arrays of strings; (in flow parlance, Array<Array<string>>) +// +// - `this.include` is de-duplicated. This ensures that we don't try to fetch +// the same objects twice. +// +// - For each value in `this.include` with length > 1, there is also +// an earlier value for the prefix of that value. +// +// Example: ['a', 'b', 'c'] in the array implies that ['a', 'b'] is also in +// the array, at an earlier position). +// +// This prevents trying to follow pointers on unfetched objects. RestQuery.prototype.handleInclude = function() { if (this.include.length == 0) { return; } - var pathResponse = includePath( - this.config, - this.auth, - this.response, - this.include[0], - this.restOptions - ); - if (pathResponse.then) { - return pathResponse.then(() => { - this.include = this.include.slice(1); - return this.handleInclude(); - }); - } else if (this.include.length > 0) { - this.include = this.include.slice(1); - return this.handleInclude(); - } + // The list of includes form a sort of a tree - Each path should wait to + // start trying to load until its parent path has finished loading (so that + // the pointers it is trying to read and fetch are in the object tree). + // + // So, for instance, if we have an include of ['a', 'b', 'c'], that must + // wait on the include of ['a', 'b'] to finish, which must wait on the include + // of ['a'] to finish. + // + // This `promises` object is a map of dotted paths to promises that resolve + // when that path has finished loading into the tree. One special case is the + // empty path (represented by the empty string). This represents the root of + // the tree, which is `this.response` and is already fetched. We set a + // pre-resolved promise at that level, meaning that include paths with only + // one component (like `['a']`) will chain onto that resolved promise and + // are immediately unblocked. + const promises = { '': Promise.resolve() }; + + this.include.forEach(path => { + const dottedPath = path.join('.'); + + // Get the promise for the parent path + const parentDottedPath = path.slice(0, -1).join('.'); + const parentPromise = promises[parentDottedPath]; + + // Once the parent promise has resolved, do this path's load step + const loadPromise = parentPromise.then(() => + includePath(this.config, this.auth, this.response, path, this.restOptions) + ); + + // Put our promise into the promises map, so child paths can find and chain + // off of it + promises[dottedPath] = loadPromise; + }); + + // Wait for all includes to be fetched and merged in to the response tree + return Promise.all(Object.values(promises)); }; //Returns a promise of a processed set of results From c23b09a8670f1d3c16800660777e8f1984bc9fa3 Mon Sep 17 00:00:00 2001 From: Noah Silas <noah@hustle.com> Date: Sun, 15 Mar 2020 11:53:09 -0700 Subject: [PATCH 3/3] Add a test for empty nested includes I had a thought that I might not be handling the case where `includePath` doesn't return a promise, but it turns out to work because now I'm only calling that method from inside a promise handler. Let's still commit the test to prevent any regressions in the future. --- spec/ParseQuery.spec.js | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/spec/ParseQuery.spec.js b/spec/ParseQuery.spec.js index ee4727404a..5075a08926 100644 --- a/spec/ParseQuery.spec.js +++ b/spec/ParseQuery.spec.js @@ -2439,6 +2439,27 @@ describe('Parse.Query testing', () => { }); }); + it('handles nested includes with no results', async () => { + const obj = new TestObject({ foo: 'bar' }); + await obj.save(); + + const query = new Parse.Query(TestObject) + .equalTo('objectId', obj.id) + .include('fake.path'); + + // This query with should not reject + const results = await query.find(); + + // ... and we should get back the one object we saved + expect(results.length).toBe(1); + + // ... and it should not add the `fake` attribute from the query that + // doesn't exist on the object + expect( + Object.prototype.hasOwnProperty.call(results[0].attributes, 'fake') + ).toBeFalse(); + }); + it("include doesn't make dirty wrong", function(done) { const Parent = Parse.Object.extend('ParentObject'); const Child = Parse.Object.extend('ChildObject');