From 3e9e2fa393916134d2ded6320bac34fb787a7ccf Mon Sep 17 00:00:00 2001
From: Paul Armstrong <paularmstrong@users.noreply.github.com>
Date: Fri, 5 Jan 2024 09:25:54 -0800
Subject: [PATCH] feat(tasks): add --shard (#534)

**Problem:**

Current GH action strategy may not be best for all use cases. It can
possibly overload systems and use too many action runners at once.

**Solution:**

Add a `--shard` argument to `tasks`.
---
 .changeset/hungry-pugs-protect.md             |  5 ++
 .github/workflows/pull-request.yaml           | 36 +--------
 docs/tailwind.config.ts                       |  8 +-
 modules/onerepo/src/core/tasks/README.md      | 79 +++++++++++++++----
 .../tasks/commands/__tests__/tasks.test.ts    | 48 +++++++++++
 .../onerepo/src/core/tasks/commands/tasks.ts  | 56 ++++++++++++-
 modules/onerepo/vitest.config.js              |  4 +
 7 files changed, 178 insertions(+), 58 deletions(-)
 create mode 100644 .changeset/hungry-pugs-protect.md
 create mode 100644 modules/onerepo/vitest.config.js

diff --git a/.changeset/hungry-pugs-protect.md b/.changeset/hungry-pugs-protect.md
new file mode 100644
index 00000000..5b37cf64
--- /dev/null
+++ b/.changeset/hungry-pugs-protect.md
@@ -0,0 +1,5 @@
+---
+'onerepo': minor
+---
+
+Added `--shard` argument to `one tasks` to shard tasks across multiple runners.
diff --git a/.github/workflows/pull-request.yaml b/.github/workflows/pull-request.yaml
index 61fdee02..e8c7198a 100644
--- a/.github/workflows/pull-request.yaml
+++ b/.github/workflows/pull-request.yaml
@@ -3,43 +3,16 @@ name: Pull request
 on: pull_request
 
 jobs:
-  setup:
-    runs-on: ubuntu-latest
-    outputs:
-      tasks: ${{ steps.tasks.outputs.tasks }}
-    steps:
-      - uses: actions/checkout@v3
-        with:
-          fetch-depth: 0
-
-      - uses: actions/setup-node@v3
-        with:
-          node-version: 18
-          cache: 'yarn'
-
-      - run: yarn
-
-      - uses: paularmstrong/onerepo/actions/get-tasks@main
-        id: tasks
-        with:
-          cli: ./bin/one.mjs
-          lifecycle: pre-merge
-          verbosity: 5
-
   tasks:
     runs-on: ubuntu-latest
-    needs: setup
-    if: ${{ fromJSON(needs.setup.outputs.tasks).parallel != '[]' && fromJSON(needs.setup.outputs.tasks).parallel != '[]' }}
     strategy:
       fail-fast: false
       matrix:
-        task:
-          - ${{ fromJSON(needs.setup.outputs.tasks).parallel }}
-          - ${{ fromJSON(needs.setup.outputs.tasks).serial }}
+        index: [1, 2, 3]
         node:
           - 18
           - 20
-    name: v${{ matrix.node }} ${{ join(matrix.task.*.name, ', ') }}
+    name: v${{ matrix.node }} ${{ matrix.index }}/3
     steps:
       - uses: actions/checkout@v3
         with:
@@ -52,7 +25,4 @@ jobs:
 
       - run: yarn
 
-      - uses: paularmstrong/onerepo/actions/run-task@main
-        with:
-          task: |
-            ${{ toJSON(matrix.task) }}
+      - run: ./bin/one.mjs tasks -c pre-merge --shard=${{ matrix.index }}/3 -vvvv
diff --git a/docs/tailwind.config.ts b/docs/tailwind.config.ts
index 59f5526a..a9fd0871 100644
--- a/docs/tailwind.config.ts
+++ b/docs/tailwind.config.ts
@@ -2,7 +2,7 @@ import type { Config } from 'tailwindcss';
 import typography from '@tailwindcss/typography';
 
 export default {
-	content: ['src/**/*.{astro,md,mdx,tsx}', '../**/*.{mdx,md}', 'astro.config.mjs'],
+	content: ['src/**/*.{astro,md,mdx,tsx}', '../**/*.{mdx,md}', 'astro.config.ts'],
 	darkMode: 'class',
 	plugins: [
 		typography,
@@ -39,7 +39,7 @@ export default {
 					},
 					'[data-line-numbers]': {
 						counterReset: 'line',
-						'& .line::before': {
+						'& [data-line]::before': {
 							counterIncrement: 'line',
 							content: 'counter(line)',
 							display: 'inline-block',
@@ -75,10 +75,6 @@ export default {
 							paddingLeft: '0',
 							paddingRight: '0',
 						},
-						'> pre > code > span': {
-							paddingBottom: theme('spacing.1'),
-							paddingTop: theme('spacing.1'),
-						},
 					},
 					'[data-rehype-pretty-code-title]': {
 						width: 'max-content',
diff --git a/modules/onerepo/src/core/tasks/README.md b/modules/onerepo/src/core/tasks/README.md
index f0a51575..3cca9fe7 100644
--- a/modules/onerepo/src/core/tasks/README.md
+++ b/modules/onerepo/src/core/tasks/README.md
@@ -158,9 +158,71 @@ Some tokens in tasks can be used as special replacement values that the `tasks`
 
 ### GitHub Actions
 
-While the `tasks` command does its best to split out parallel and serial tasks to run as fast as possible on a single machine, using GitHub Actions can save even more time by spreading out each individual task to single instances using a matrix strategy.
+While the `tasks` command does its best to split out parallel and serial tasks to run as fast as possible on a single machine, using GitHub Actions can save even more time by spreading out to separate runners using a matrix strategy. oneRepo offers a few options for this:
 
-To do this, we make use of the `task --list` argument to write a JSON-formatted list of tasks to standard output, then read that in with a matrix strategy as a second job.
+#### 1. Single runner
+
+The following strategy will run all tasks on a single runner, the same way as if they were run on a developer's machine.
+
+```yaml title=".github/workflows/pull-request.yaml" showLineNumbers {20}
+name: Pull request
+on: pull_request
+
+jobs:
+  tasks:
+    runs-on: ubuntu-latest
+    name: oneRepo pre-merge tasks
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - uses: actions/setup-node@v3
+        with:
+          node-version: ${{ matrix.node }}
+          cache: 'yarn'
+
+      - run: yarn
+
+      - run: yarn one tasks -c pre-merge
+```
+
+#### 2. Sharding
+
+This strategy creates a known number of action runners and distributes tasks across them. If you have a limited number of action runners, sharding may be the best option.
+
+```yaml title=".github/workflows/pull-request.yaml" showLineNumbers {9-10,24}
+name: Pull request
+on: pull_request
+
+jobs:
+  tasks:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        index: [1, 2, 3]
+    name: oneRepo ${{ matrix.index }}/3
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - uses: actions/setup-node@v3
+        with:
+          node-version: ${{ matrix.node }}
+          cache: 'yarn'
+
+      - run: yarn
+
+      - run: yarn one tasks -c pre-merge --shard=${{ matrix.index }}/3 -vvvv
+```
+
+#### 3. Task per runner
+
+This strategy is the most distributed and best if you have a lot of capacity and available action runners. It also gives the clearest and fastest feedback.
+
+To do this, we make use of the `task --list` argument to write a JSON-formatted list of tasks to standard output using a `setup` job, then read that in with a matrix strategy as a second job.
 
 ```yaml title=".github/workflows/pull-request.yaml" showLineNumbers {8-11, 15-18, 26, 36-38, 60-63}
 name: Pull request
@@ -228,19 +290,6 @@ jobs:
             ${{ toJSON(matrix.task) }}
 ```
 
-## Disabling
-
-So you have decided that `tasks` are not for you? That’s okay. You can deactivate the core plugin by passing `false` to the configuration:
-
-```js
-setup({
-	core: {
-		// Prevents all usage of `tasks` from your CLI
-		tasks: false,
-	},
-}).then(({ run }) => run());
-```
-
 ## Usage
 
 <!-- start-auto-generated-from-cli-tasks -->
diff --git a/modules/onerepo/src/core/tasks/commands/__tests__/tasks.test.ts b/modules/onerepo/src/core/tasks/commands/__tests__/tasks.test.ts
index 650b0101..bcfa0d43 100644
--- a/modules/onerepo/src/core/tasks/commands/__tests__/tasks.test.ts
+++ b/modules/onerepo/src/core/tasks/commands/__tests__/tasks.test.ts
@@ -271,4 +271,52 @@ describe('handler', () => {
 			serial: [],
 		});
 	});
+
+	test('can shard the tasks', async () => {
+		vi.spyOn(git, 'getModifiedFiles').mockResolvedValue(['root.ts']);
+		const graph = getGraph(path.join(__dirname, '__fixtures__', 'repo'));
+
+		await run('--lifecycle deploy --list --shard=1/2', { graph });
+		expect(JSON.parse(out)).toEqual({
+			parallel: [
+				[
+					{
+						args: ['"deployroot"'],
+						cmd: 'echo',
+						meta: { name: 'fixture-root', slug: 'fixture-root' },
+						name: 'echo "deployroot" (fixture-root)',
+						opts: { cwd: '.' },
+					},
+				],
+				[
+					{
+						args: ['"deployburritos"'],
+						cmd: 'echo',
+						meta: { name: 'fixture-burritos', slug: 'fixture-burritos' },
+						name: 'echo "deployburritos" (fixture-burritos)',
+						opts: { cwd: 'modules/burritos' },
+					},
+				],
+			],
+			serial: [],
+		});
+
+		out = '';
+
+		await run('--lifecycle deploy --list --shard=2/2', { graph });
+		expect(JSON.parse(out)).toEqual({
+			parallel: [
+				[
+					{
+						args: ['"deploytacos"'],
+						cmd: 'echo',
+						meta: { name: 'fixture-tacos', slug: 'fixture-tacos' },
+						name: 'echo "deploytacos" (fixture-tacos)',
+						opts: { cwd: 'modules/tacos' },
+					},
+				],
+			],
+			serial: [],
+		});
+	});
 });
diff --git a/modules/onerepo/src/core/tasks/commands/tasks.ts b/modules/onerepo/src/core/tasks/commands/tasks.ts
index 388d9e35..5360998d 100644
--- a/modules/onerepo/src/core/tasks/commands/tasks.ts
+++ b/modules/onerepo/src/core/tasks/commands/tasks.ts
@@ -7,7 +7,7 @@ import * as builders from '@onerepo/builders';
 import type { PromiseFn, RunSpec } from '@onerepo/subprocess';
 import type { Graph, Lifecycle, Task, TaskDef, Workspace } from '@onerepo/graph';
 import type { Builder, Handler } from '@onerepo/yargs';
-import { bufferSubLogger } from '@onerepo/logger';
+import { bufferSubLogger, getLogger } from '@onerepo/logger';
 import type { Logger } from '@onerepo/logger';
 import createYargs from 'yargs/yargs';
 import { StagingWorkflow } from '@onerepo/git';
@@ -32,6 +32,7 @@ export type Argv = {
 	ignore: Array<string>;
 	lifecycle: Lifecycle;
 	list?: boolean;
+	shard?: string;
 	'ignore-unstaged'?: boolean;
 } & builders.WithWorkspaces &
 	builders.WithAffected;
@@ -73,18 +74,41 @@ export const builder: Builder<Argv> = (yargs) =>
 			default: [],
 			hidden: true,
 		})
+		.option('shard', {
+			type: 'string',
+			description: 'Shard the lifecycle across multiple instances. Format as `<shard-number>/<total-shards>`',
+		})
+		.example(
+			'$0 --lifecycle=pre-merge --shard=1/5',
+			'Shard all tasks for the `pre-merge` lifecycle into 5 groups and runs the first shard.',
+		)
+		.example(
+			'$0 --lifecycle=pre-merge --shard=3/5',
+			'Shard all tasks for the `pre-merge` lifecycle into 5 groups and runs the third shard.',
+		)
 		.option('ignore-unstaged', {
 			description:
 				'Force staged-changes mode on or off. If `true`, task determination and runners will ignore unstaged changes.',
 			type: 'boolean',
 		})
+		.middleware(async (argv) => {
+			const logger = getLogger();
+			if ('shard' in argv && typeof argv.shard === 'string') {
+				if (!/\d+\/\d+/.test(argv.shard)) {
+					const msg = '--shard must be in the format <shard-num>/<total-shards>. Example: --shard=1/2';
+					logger.error(msg);
+					await logger.end();
+					yargs.exit(1, new Error(msg));
+				}
+			}
+		})
 		.describe(
 			'staged',
 			'Backup unstaged files and use only those on the git stage to calculate affected files or workspaces. Will re-apply the unstaged files upon exit.',
 		);
 
 export const handler: Handler<Argv> = async (argv, { getWorkspaces, graph, logger, config }) => {
-	const { affected, ignore, lifecycle, list, 'from-ref': fromRef, staged, 'through-ref': throughRef } = argv;
+	const { affected, ignore, lifecycle, list, 'from-ref': fromRef, shard, staged, 'through-ref': throughRef } = argv;
 
 	const stagingWorkflow = new StagingWorkflow({ graph, logger });
 	if (staged) {
@@ -115,8 +139,8 @@ export const handler: Handler<Argv> = async (argv, { getWorkspaces, graph, logge
 		return;
 	}
 
-	const serialTasks: TaskList = [];
-	const parallelTasks: TaskList = [];
+	let serialTasks: TaskList = [];
+	let parallelTasks: TaskList = [];
 	let hasTasks = false;
 
 	for (const workspace of graph.workspaces) {
@@ -145,6 +169,14 @@ export const handler: Handler<Argv> = async (argv, { getWorkspaces, graph, logge
 		});
 	}
 
+	if (shard) {
+		const [shardNum, total] = shard.split('/').map((n) => parseInt(n, 10));
+		serialTasks = shardTasks(serialTasks, shardNum, total);
+		setupStep.debug(serialTasks);
+		parallelTasks = shardTasks(parallelTasks, shardNum, total);
+		setupStep.debug(parallelTasks);
+	}
+
 	await setupStep.end();
 
 	if (list) {
@@ -297,3 +329,19 @@ function slugify(str: string) {
 
 type ExtendedRunSpec = RunSpec & { meta: { name: string; slug: string }; fn?: PromiseFn };
 type TaskList = Array<Array<ExtendedRunSpec>>;
+
+function shardTasks(tasks: TaskList, shard: number, totalShards: number) {
+	if (!tasks.length) {
+		return tasks;
+	}
+	const shardSize = Math.ceil(tasks.length / totalShards);
+	let index = 0;
+	let resIndex = 0;
+
+	const result = new Array(Math.ceil(tasks.length / shardSize));
+
+	while (index < tasks.length) {
+		result[resIndex++] = tasks.slice(index, (index += shardSize));
+	}
+	return result[shard - 1];
+}
diff --git a/modules/onerepo/vitest.config.js b/modules/onerepo/vitest.config.js
new file mode 100644
index 00000000..b47f48e0
--- /dev/null
+++ b/modules/onerepo/vitest.config.js
@@ -0,0 +1,4 @@
+// eslint-disable-next-line import/no-extraneous-dependencies
+import { defineProject } from '@internal/vitest-config';
+
+export default defineProject({});