Improve generation of plans once all path options are computed (#2316)

At a very high level, query planning works in 2 main steps: 1. for every "leaf" of the query, we compute which possible options (paths) can lead to it. 2. we take all those options and generates the corresponding plans, which means we compute all the (non trivially inefficient) possible plan that would be valid for the query. For each of those plan, we compute the cost of the plan and keep the best plan. In some cases, the 1st step gives quite a few possible options, and generating all the corresponding plans is costly. This patch attempts to first generate plans that are a bit more likely to be the most efficient, or at least fairly good. When then use the cost of those plans to something cut the generation of other plans early. Essentially, as we generate a plan corresponding to a set of options, if we notice that the cost gets higher than the best we've found so far before we've handled all the paths, then we can give up on that set of options without generating everything. Doing so have little to no impact on very simple plans, but for more complex ones it can drastically reduce the generation time (anecdotically, some real world plan gets more than a 10x improvement on generation speed).
apollographql · Jan 10, 2023 · 88ae498 · 88ae498
1 parent a940e11
commit 88ae498
Show file tree

Hide file tree

Showing 5 changed files with 299 additions and 47 deletions.
diff --git a/gateway-js/CHANGELOG.md b/gateway-js/CHANGELOG.md
@@ -19,6 +19,7 @@ This CHANGELOG pertains only to Apollo Federation packages in the 2.x range. The
 - Adds support for the 0.3 version of the tag spec, which adds `@tag` directive support for the `SCHEMA` location [PR #2314](https://github.com/apollographql/federation/pull/2314).
 - Fix potential issue with nested `@defer` in non-deferrable case [PR #2312](https://github.com/apollographql/federation/pull/2312).
 - Fixes composition issues with `@interfaceObject` [PR #2318](https://github.com/apollographql/federation/pull/2318).
+- Improves generation of plans once all path options are computed [PR #2316](https://github.com/apollographql/federation/pull/2316).
 
 ## 2.2.2
 

diff --git a/query-planner-js/CHANGELOG.md b/query-planner-js/CHANGELOG.md
@@ -6,6 +6,7 @@ This CHANGELOG pertains only to Apollo Federation packages in the 2.x range. The
 
 - Fix potential issue with nested `@defer` in non-deferrable case [PR #2312](https://github.com/apollographql/federation/pull/2312).
 - Fix possible assertion error during query planning [PR #2299](https://github.com/apollographql/federation/pull/2299).
+- Improves generation of plans once all path options are computed [PR #2316](https://github.com/apollographql/federation/pull/2316).
 
 ## 2.2.2
 

diff --git a/query-planner-js/src/__tests__/generateAllPlans.test.ts b/query-planner-js/src/__tests__/generateAllPlans.test.ts
@@ -0,0 +1,96 @@
+import { assert } from '@apollo/federation-internals';
+import { generateAllPlansAndFindBest } from '../generateAllPlans';
+
+function generateTestPlans(initial: string[], choices: string[][]): { best: string[], generated: string[][], evaluated: string[][] } {
+  const generated: string[][] = [];
+  const evaluated: string[][] = [];
+  const { best } = generateAllPlansAndFindBest({
+    initial,
+    toAdd: choices,
+    addFct: (p, c) => {
+      const r = p.concat(c);
+      if (r.length === initial.length + choices.length) {
+        generated.push(r);
+      }
+      return r;
+    },
+    costFct: (p) => {
+      evaluated.push(p);
+      return p.reduce((acc, v) => acc + v.length, 0);
+    },
+  });
+  return { best, generated, evaluated };
+}
+
+function expectSamePlans(expected: string[][], actual: string[][]) {
+  // We don't want to rely on ordering (the tests ensures we get the best plan that we want, and the rest doesn't matter).
+  const normalize = (v: string[]) => v.join('');
+  const expectedSet = new Set<string>(expected.map((e) => normalize(e)));
+  for (const value of actual) {
+    const normalized = normalize(value);
+    assert(expectedSet.has(normalized), `Unexpected plan [${value.join(', ')}] is not in [\n${expected.map((e) => `[ ${e.join(', ')} ]`).join('\n')}\n]`);
+  }
+
+  const actualSet = new Set<string>(actual.map((e) => normalize(e)));
+  for (const value of expected) {
+    const normalized = normalize(value);
+    assert(actualSet.has(normalized), `Expected plan [${value.join(', ')}] not found in [\n${actual.map((e) => `[ ${e.join(', ')} ]`).join('\n')}\n]`);
+  }
+}
+
+
+test('Pick elements at same index first', () => {
+  const { best, generated } = generateTestPlans(
+    ['I'], 
+    [
+      [ 'A1', 'B1'],
+      [ 'A2', 'B2'],
+      [ 'A3', 'B3'],
+    ],
+  );
+  expect(best).toEqual(['I', 'A1', 'A2', 'A3']);
+  expect(generated[0]).toEqual(['I', 'A1', 'A2', 'A3']);
+  expect(generated[1]).toEqual(['I', 'B1', 'B2', 'B3']);
+})
+
+test('Bail early for more costly elements', () => {
+  const { best, generated } = generateTestPlans(
+    ['I'], 
+    [
+      [ 'A1', 'B1VeryCostly'],
+      [ 'A2', 'B2Co'],
+      [ 'A3', 'B3'],
+    ],
+  );
+
+  expect(best).toEqual(['I', 'A1', 'A2', 'A3']);
+  // We should ignore plans with both B1 and B2 due there cost. So we should have just 2 plans.
+  expect(generated).toHaveLength(2);
+  expect(generated[0]).toEqual(['I', 'A1', 'A2', 'A3']);
+  expect(generated[1]).toEqual(['I', 'A1', 'A2', 'B3']);
+})
+
+test('Handles branches of various sizes', () => {
+  const { best, generated } = generateTestPlans(
+    ['I'], 
+    [
+      [ 'A1x', 'B1'],
+      [ 'A2', 'B2Costly', 'C2'],
+      [ 'A3'],
+      [ 'A4', 'B4' ],
+    ],
+  );
+
+  expect(best).toEqual(['I', 'B1', 'A2', 'A3', 'A4']);
+  // We should generate every option, except those including `B2Costly`
+  expectSamePlans([
+    [ 'I', 'A1x', 'A2', 'A3', 'A4' ],
+    [ 'I', 'A1x', 'A2', 'A3', 'B4' ],
+    [ 'I', 'A1x', 'C2', 'A3', 'A4' ],
+    [ 'I', 'A1x', 'C2', 'A3', 'B4' ],
+    [ 'I', 'B1', 'A2', 'A3', 'A4' ],
+    [ 'I', 'B1', 'A2', 'A3', 'B4' ],
+    [ 'I', 'B1', 'C2', 'A3', 'A4' ],
+    [ 'I', 'B1', 'C2', 'A3', 'B4' ],
+  ], generated);
+})
diff --git a/query-planner-js/src/buildPlan.ts b/query-planner-js/src/buildPlan.ts
@@ -93,6 +93,7 @@ import {
 import { stripIgnoredCharacters, print, parse, OperationTypeNode } from "graphql";
 import { DeferredNode, FetchDataInputRewrite, FetchDataOutputRewrite } from ".";
 import { enforceQueryPlannerConfigDefaults, QueryPlannerConfig } from "./config";
+import { generateAllPlansAndFindBest } from "./generateAllPlans";
 import { QueryPlan, ResponsePath, SequenceNode, PlanNode, ParallelNode, FetchNode, trimSelectionNodes } from "./QueryPlan";
 
 const debug = newDebugLogger('plan');
@@ -507,45 +508,35 @@ class QueryPlanningTaversal<RV extends Vertex> {
       initialDependencyGraph = this.updatedDependencyGraph(this.newDependencyGraph(), initialTree);
       if (idxFirstOfLengthOne === 0) {
         // Well, we have the only possible plan; it's also the best.
-        this.onNewPlan(initialDependencyGraph, initialTree);
+        this.bestPlan = [initialDependencyGraph, initialTree, this.cost(initialDependencyGraph)];
         return;
       }
     }
 
     const otherTrees = this.closedBranches.slice(0, idxFirstOfLengthOne).map(b => b.map(opt => PathTree.createFromOpPaths(this.subgraphs, this.startVertex, opt)));
-    this.generateAllPlans(initialDependencyGraph, initialTree, otherTrees);
-  }
-
-  generateAllPlans(initialDependencyGraph: FetchDependencyGraph, initialTree: OpPathTree<RV>, others: OpPathTree<RV>[][]) {
-    // Track, for each element, at which index we are
-    const eltIndexes = new Array<number>(others.length);
-    let totalCombinations = 1;
-    for (let i = 0; i < others.length; ++i) {
-      const eltSize = others[i].length;
-      assert(eltSize > 0, "Got empty option: this shouldn't have happened");
-      eltIndexes[i] = 0;
-      totalCombinations *= eltSize;
-    }
-
-    for (let i = 0; i < totalCombinations; ++i){
-      const dependencyGraph = initialDependencyGraph.clone();
-      let tree = initialTree;
-      for (let j = 0; j < others.length; ++j) {
-        const t = others[j][eltIndexes[j]];
-        this.updatedDependencyGraph(dependencyGraph, t);
-        tree = tree.merge(t);
-      }
-      this.onNewPlan(dependencyGraph, tree);
-
-      for (let idx = 0; idx < others.length; ++idx) {
-        if (eltIndexes[idx] == others[idx].length - 1) {
-          eltIndexes[idx] = 0;
-        } else {
-          eltIndexes[idx] += 1;
-          break;
-        }
-      }
-    }
+    const { best, cost} = generateAllPlansAndFindBest({
+      initial: { graph: initialDependencyGraph, tree: initialTree },
+      toAdd: otherTrees,
+      addFct: (p, t) => {
+        const updatedDependencyGraph = p.graph.clone();
+        this.updatedDependencyGraph(updatedDependencyGraph, t);
+        const updatedTree = p.tree.merge(t);
+        return { graph: updatedDependencyGraph, tree: updatedTree };
+      },
+      costFct: (p) => this.cost(p.graph),
+      onPlan: (p, cost, prevCost) => {
+        debug.log(() => {
+          if (!prevCost) {
+            return `Computed plan with cost ${cost}: ${p.tree}`;
+          } else if (cost > prevCost) {
+            return `Found better with cost ${cost} (previous had cost ${prevCost}: ${p.tree}`;
+          } else {
+            return `Ignoring plan with cost ${cost} (a better plan with cost ${prevCost} exists): ${p.tree}`
+          }
+        });
+      },
+    });
+    this.bestPlan = [best.graph, best.tree, cost];
   }
 
   private cost(dependencyGraph: FetchDependencyGraph): number {
@@ -581,19 +572,6 @@ class QueryPlanningTaversal<RV extends Vertex> {
     // condition" within `advanceSimultaneousPathsWithOperation`.
     return bestPlan ? { satisfied: true, cost: bestPlan[2], pathTree: bestPlan[1] } : unsatisfiedConditionsResolution;
   }
-
-  private onNewPlan(dependencyGraph: FetchDependencyGraph, tree: OpPathTree<RV>) {
-    const cost = this.cost(dependencyGraph);
-    //if (isTopLevel) {
-    //  console.log(`[PLAN] cost: ${cost}, path:\n${pathSet.toString('', true)}`);
-    //}
-    if (!this.bestPlan || cost < this.bestPlan[2]) {
-      debug.log(() => this.bestPlan ? `Found better with cost ${cost} (previous had cost ${this.bestPlan[2]}): ${tree}`: `Computed plan with cost ${cost}: ${tree}`);
-      this.bestPlan = [dependencyGraph, tree, cost];
-    } else {
-      debug.log(() => `Ignoring plan with cost ${cost} (a better plan with cost ${this.bestPlan![2]} exists): ${tree}`);
-    }
-  }
 }
 
 type UnhandledGroups = [FetchGroup, UnhandledParentRelations][];

diff --git a/query-planner-js/src/generateAllPlans.ts b/query-planner-js/src/generateAllPlans.ts
@@ -0,0 +1,176 @@
+import { assert } from "@apollo/federation-internals";
+
+type Choices<T> = (T | undefined)[];
+
+type Partial<P, T> = {
+  partial: P,
+  remaining: Choices<T>[],
+  isRoot: boolean,
+  index?: number,
+}
+
+/**
+ * Given some initial partial plan and a list of options for the remaining parts that need to be added to that plan to make it complete,
+ * this method "efficiently" generates (or at least evaluate) all the possible complete plans and the returns the "best" one (the one
+ * with the lowest cost).
+ *
+ * Note that this method abstracts the actualy types of both plans and additional elements to add to the plan, and this both for clarity
+ * and to make testing of this method easier. But type parameter `P` should be though of as abstracting a query plan (in practice, it
+ * is instanciated to a pair of a [`DependencyGraph`, corresponding `PathTree`]), whith `E` should be though of as an additional element
+ * to add to the plan to make it complete (instanciated in practice by a `PathTree` for ... reasons ... but one that really correspond to
+ * a single `GraphPath`).
+ *
+ * As said above, this method takes 2 arguments:
+ * - `initial` is a partial plan, and corresponds to all the parts of the query being planned for which there no choices (and
+ *   theoretically can be empty, though very very rarely is in practice).
+ * - `toAdd` is the list of additional elements to add to `initial` ot make a full plan of the query being planned. Each element of
+ *   `toAdd` corresponds to one of the query "leaf" and is itself a list of all the possible options for that "leaf".
+ *
+ * In other words, a comple plan is obtained by picking one choice in each of the element of `toAdd` (so picking `toAdd.length` element)
+ * and adding them all to `initial`. The question being, which particular choice for each element of `toAdd` yield the best plan.
+ *
+ * Of course, the total number of possible plans is the cartesian product of `toAdd`, which can get large, and so this method is trying
+ * to trim some of the options. For that, the general idea is that we first generate one of the plan, compute its cost, and then as
+ * we build other options, we can check as we pick elements of `toAdd` the cost of what we get, and if we ever get a higher cost than
+ * the one fo the complete plan we already have, then there is no point in checking the remaining elements, and we can thus cut all
+ * the options for the remaining elements. In other words, if a partial plan is ever already more costly than another full plan we
+ * have computed, then adding more will never get us a better plan.
+ *
+ * Of course, this method is not guaranteed to save work, and in the worst case, we'll still generate all plans. But when a "good" 
+ * plan is generated early, it can save a lot of computing work.
+ *
+ * And the 2nd "trick" of this method is that it starts by generating the plans that correspond to picking choices in `toAdd` at
+ * the same indexes, and this because this often actually generate good plans. The reason is that the order of choices for each
+ * element of `toAdd` is not necessarily random, because the algorithm generating paths is not random either. In other words, elements
+ * at similar indexes have some good change to correspond to similar choices, and so will tend to correspond to good plans.
+ *
+ * @param initial - the initial partial plan to use. 
+ * @param toAdd - a list of the remaining "elements" to add to `initial`. Each element of `toAdd` correspond to multiple choice we can 
+ *   use to plan that particular element.
+ * @param addFct - how to obtain a new plan by taking some plan and adding a new element to it.
+ * @param costFct - how to compute the cost of a plan.
+ * @param onPlan - an optional method called on every _complete_ plan generated by this method, with both the cost of that plan and
+ *   the best cost we have generated thus far (if that's not the first plan generated). This mostly exists to allow some debugging.
+ */
+export function generateAllPlansAndFindBest<P, E>({
+  initial,
+  toAdd,
+  addFct,
+  costFct,
+  onPlan = () => {},
+}: {
+  initial: P,
+  toAdd: E[][],
+  addFct: (p: P, e: E) => P,
+  costFct: (p: P) => number,
+  onPlan?: (p: P, cost: number, previousCost: number | undefined) => void,
+}): {
+  best: P,
+  cost: number,
+}{
+  const stack: Partial<P, E>[] = [{
+    partial: initial,
+    remaining: toAdd,
+    isRoot: true,
+    index: 0,
+  }];
+
+  let min: { best: P, cost: number } | undefined = undefined;
+
+  while (stack.length > 0) {
+    const { partial, remaining, isRoot, index } = stack.pop()!;
+    const nextChoices = remaining[0];
+    const otherChoices = remaining.slice(1);
+
+    const pickedIndex = pickNext(index, nextChoices);
+    const { extracted, updatedChoices, isLast } = extract(pickedIndex, nextChoices);
+
+    if (!isLast) {
+      // First, re-insert what correspond to all the choices that dot _not_ pick `extracted`.
+      insertInStack({
+        partial,
+        remaining: [updatedChoices].concat(otherChoices),
+        isRoot,
+        index: isRoot && index !== undefined && index < nextChoices.length - 1 ? index + 1 : undefined,
+      }, stack);
+    }
+
+    const newPartial = addFct(partial, extracted);
+    if (otherChoices.length === 0) {
+      // We have a complete plan. Compute the cost, check if it is best and based on that,
+      // provide it to `onGenerated` or discard it.
+      const cost = costFct(newPartial);
+      const isNewMin = min === undefined || cost < min.cost;
+      onPlan(newPartial, cost, min?.cost);
+      if (isNewMin) {
+        min = {
+          best: newPartial,
+          cost
+        };
+      }
+      continue;
+    }
+
+    if (min !== undefined) {
+      // We're not done, but we've already generated a plan with a score, so we check if
+      // what we have so far is already more costly, and if it is, we skip this branch
+      // entirely.
+      const cost = costFct(newPartial);
+      if (cost >= min.cost) {
+        continue;
+      }
+    }
+
+    insertInStack({
+      partial: newPartial,
+      remaining: otherChoices,
+      isRoot: false,
+      index
+    }, stack);
+  }
+
+  assert(min, 'A plan should have been found');
+  return min;
+}
+
+function insertInStack<P, E>(elt: Partial<P, E>, stack: Partial<P, E>[]) {
+  // We push elements with a fixed index at the end so they are handled first.
+  if (elt.index !== undefined) {
+    stack.push(elt);
+  } else {
+    stack.unshift(elt);
+  }
+}
+
+function pickNext<E>(index: number | undefined, remaining: Choices<E>): number {
+  if (index === undefined || index >= remaining.length) {
+    for (let i = 0; i < remaining.length; i++) {
+      if (remaining[i] !== undefined) {
+        return i;
+      }
+    }
+    assert(false, 'Passed a "remaining" with all undefined');
+  } else {
+    assert(remaining[index] !== undefined, () => `Invalid index ${index}`);
+    return index;
+  }
+}
+
+function extract<E>(index: number, choices: Choices<E>): { extracted: E, isLast: boolean, updatedChoices: Choices<E>} {
+  const extracted = choices[index];
+  assert(extracted !== undefined, () => `Index ${index} of ${choices} is undefined`)
+  const updatedChoices = new Array<E | undefined>(choices.length);
+  let isLast = true;
+  for (let i = 0; i < choices.length; i++) {
+    if (i !== index) {
+      isLast &&= choices[i] === undefined;
+      updatedChoices[i] = choices[i];
+    }
+  }
+  return {
+    extracted,
+    isLast,
+    updatedChoices,
+  };
+} 
+