From 3e9e2fa393916134d2ded6320bac34fb787a7ccf Mon Sep 17 00:00:00 2001 From: Paul Armstrong Date: Fri, 5 Jan 2024 09:25:54 -0800 Subject: [PATCH] feat(tasks): add --shard (#534) **Problem:** Current GH action strategy may not be best for all use cases. It can possibly overload systems and use too many action runners at once. **Solution:** Add a `--shard` argument to `tasks`. --- .changeset/hungry-pugs-protect.md | 5 ++ .github/workflows/pull-request.yaml | 36 +-------- docs/tailwind.config.ts | 8 +- modules/onerepo/src/core/tasks/README.md | 79 +++++++++++++++---- .../tasks/commands/__tests__/tasks.test.ts | 48 +++++++++++ .../onerepo/src/core/tasks/commands/tasks.ts | 56 ++++++++++++- modules/onerepo/vitest.config.js | 4 + 7 files changed, 178 insertions(+), 58 deletions(-) create mode 100644 .changeset/hungry-pugs-protect.md create mode 100644 modules/onerepo/vitest.config.js diff --git a/.changeset/hungry-pugs-protect.md b/.changeset/hungry-pugs-protect.md new file mode 100644 index 00000000..5b37cf64 --- /dev/null +++ b/.changeset/hungry-pugs-protect.md @@ -0,0 +1,5 @@ +--- +'onerepo': minor +--- + +Added `--shard` argument to `one tasks` to shard tasks across multiple runners. diff --git a/.github/workflows/pull-request.yaml b/.github/workflows/pull-request.yaml index 61fdee02..e8c7198a 100644 --- a/.github/workflows/pull-request.yaml +++ b/.github/workflows/pull-request.yaml @@ -3,43 +3,16 @@ name: Pull request on: pull_request jobs: - setup: - runs-on: ubuntu-latest - outputs: - tasks: ${{ steps.tasks.outputs.tasks }} - steps: - - uses: actions/checkout@v3 - with: - fetch-depth: 0 - - - uses: actions/setup-node@v3 - with: - node-version: 18 - cache: 'yarn' - - - run: yarn - - - uses: paularmstrong/onerepo/actions/get-tasks@main - id: tasks - with: - cli: ./bin/one.mjs - lifecycle: pre-merge - verbosity: 5 - tasks: runs-on: ubuntu-latest - needs: setup - if: ${{ fromJSON(needs.setup.outputs.tasks).parallel != '[]' && fromJSON(needs.setup.outputs.tasks).parallel != '[]' }} strategy: fail-fast: false matrix: - task: - - ${{ fromJSON(needs.setup.outputs.tasks).parallel }} - - ${{ fromJSON(needs.setup.outputs.tasks).serial }} + index: [1, 2, 3] node: - 18 - 20 - name: v${{ matrix.node }} ${{ join(matrix.task.*.name, ', ') }} + name: v${{ matrix.node }} ${{ matrix.index }}/3 steps: - uses: actions/checkout@v3 with: @@ -52,7 +25,4 @@ jobs: - run: yarn - - uses: paularmstrong/onerepo/actions/run-task@main - with: - task: | - ${{ toJSON(matrix.task) }} + - run: ./bin/one.mjs tasks -c pre-merge --shard=${{ matrix.index }}/3 -vvvv diff --git a/docs/tailwind.config.ts b/docs/tailwind.config.ts index 59f5526a..a9fd0871 100644 --- a/docs/tailwind.config.ts +++ b/docs/tailwind.config.ts @@ -2,7 +2,7 @@ import type { Config } from 'tailwindcss'; import typography from '@tailwindcss/typography'; export default { - content: ['src/**/*.{astro,md,mdx,tsx}', '../**/*.{mdx,md}', 'astro.config.mjs'], + content: ['src/**/*.{astro,md,mdx,tsx}', '../**/*.{mdx,md}', 'astro.config.ts'], darkMode: 'class', plugins: [ typography, @@ -39,7 +39,7 @@ export default { }, '[data-line-numbers]': { counterReset: 'line', - '& .line::before': { + '& [data-line]::before': { counterIncrement: 'line', content: 'counter(line)', display: 'inline-block', @@ -75,10 +75,6 @@ export default { paddingLeft: '0', paddingRight: '0', }, - '> pre > code > span': { - paddingBottom: theme('spacing.1'), - paddingTop: theme('spacing.1'), - }, }, '[data-rehype-pretty-code-title]': { width: 'max-content', diff --git a/modules/onerepo/src/core/tasks/README.md b/modules/onerepo/src/core/tasks/README.md index f0a51575..3cca9fe7 100644 --- a/modules/onerepo/src/core/tasks/README.md +++ b/modules/onerepo/src/core/tasks/README.md @@ -158,9 +158,71 @@ Some tokens in tasks can be used as special replacement values that the `tasks` ### GitHub Actions -While the `tasks` command does its best to split out parallel and serial tasks to run as fast as possible on a single machine, using GitHub Actions can save even more time by spreading out each individual task to single instances using a matrix strategy. +While the `tasks` command does its best to split out parallel and serial tasks to run as fast as possible on a single machine, using GitHub Actions can save even more time by spreading out to separate runners using a matrix strategy. oneRepo offers a few options for this: -To do this, we make use of the `task --list` argument to write a JSON-formatted list of tasks to standard output, then read that in with a matrix strategy as a second job. +#### 1. Single runner + +The following strategy will run all tasks on a single runner, the same way as if they were run on a developer's machine. + +```yaml title=".github/workflows/pull-request.yaml" showLineNumbers {20} +name: Pull request +on: pull_request + +jobs: + tasks: + runs-on: ubuntu-latest + name: oneRepo pre-merge tasks + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - uses: actions/setup-node@v3 + with: + node-version: ${{ matrix.node }} + cache: 'yarn' + + - run: yarn + + - run: yarn one tasks -c pre-merge +``` + +#### 2. Sharding + +This strategy creates a known number of action runners and distributes tasks across them. If you have a limited number of action runners, sharding may be the best option. + +```yaml title=".github/workflows/pull-request.yaml" showLineNumbers {9-10,24} +name: Pull request +on: pull_request + +jobs: + tasks: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + index: [1, 2, 3] + name: oneRepo ${{ matrix.index }}/3 + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - uses: actions/setup-node@v3 + with: + node-version: ${{ matrix.node }} + cache: 'yarn' + + - run: yarn + + - run: yarn one tasks -c pre-merge --shard=${{ matrix.index }}/3 -vvvv +``` + +#### 3. Task per runner + +This strategy is the most distributed and best if you have a lot of capacity and available action runners. It also gives the clearest and fastest feedback. + +To do this, we make use of the `task --list` argument to write a JSON-formatted list of tasks to standard output using a `setup` job, then read that in with a matrix strategy as a second job. ```yaml title=".github/workflows/pull-request.yaml" showLineNumbers {8-11, 15-18, 26, 36-38, 60-63} name: Pull request @@ -228,19 +290,6 @@ jobs: ${{ toJSON(matrix.task) }} ``` -## Disabling - -So you have decided that `tasks` are not for you? That’s okay. You can deactivate the core plugin by passing `false` to the configuration: - -```js -setup({ - core: { - // Prevents all usage of `tasks` from your CLI - tasks: false, - }, -}).then(({ run }) => run()); -``` - ## Usage diff --git a/modules/onerepo/src/core/tasks/commands/__tests__/tasks.test.ts b/modules/onerepo/src/core/tasks/commands/__tests__/tasks.test.ts index 650b0101..bcfa0d43 100644 --- a/modules/onerepo/src/core/tasks/commands/__tests__/tasks.test.ts +++ b/modules/onerepo/src/core/tasks/commands/__tests__/tasks.test.ts @@ -271,4 +271,52 @@ describe('handler', () => { serial: [], }); }); + + test('can shard the tasks', async () => { + vi.spyOn(git, 'getModifiedFiles').mockResolvedValue(['root.ts']); + const graph = getGraph(path.join(__dirname, '__fixtures__', 'repo')); + + await run('--lifecycle deploy --list --shard=1/2', { graph }); + expect(JSON.parse(out)).toEqual({ + parallel: [ + [ + { + args: ['"deployroot"'], + cmd: 'echo', + meta: { name: 'fixture-root', slug: 'fixture-root' }, + name: 'echo "deployroot" (fixture-root)', + opts: { cwd: '.' }, + }, + ], + [ + { + args: ['"deployburritos"'], + cmd: 'echo', + meta: { name: 'fixture-burritos', slug: 'fixture-burritos' }, + name: 'echo "deployburritos" (fixture-burritos)', + opts: { cwd: 'modules/burritos' }, + }, + ], + ], + serial: [], + }); + + out = ''; + + await run('--lifecycle deploy --list --shard=2/2', { graph }); + expect(JSON.parse(out)).toEqual({ + parallel: [ + [ + { + args: ['"deploytacos"'], + cmd: 'echo', + meta: { name: 'fixture-tacos', slug: 'fixture-tacos' }, + name: 'echo "deploytacos" (fixture-tacos)', + opts: { cwd: 'modules/tacos' }, + }, + ], + ], + serial: [], + }); + }); }); diff --git a/modules/onerepo/src/core/tasks/commands/tasks.ts b/modules/onerepo/src/core/tasks/commands/tasks.ts index 388d9e35..5360998d 100644 --- a/modules/onerepo/src/core/tasks/commands/tasks.ts +++ b/modules/onerepo/src/core/tasks/commands/tasks.ts @@ -7,7 +7,7 @@ import * as builders from '@onerepo/builders'; import type { PromiseFn, RunSpec } from '@onerepo/subprocess'; import type { Graph, Lifecycle, Task, TaskDef, Workspace } from '@onerepo/graph'; import type { Builder, Handler } from '@onerepo/yargs'; -import { bufferSubLogger } from '@onerepo/logger'; +import { bufferSubLogger, getLogger } from '@onerepo/logger'; import type { Logger } from '@onerepo/logger'; import createYargs from 'yargs/yargs'; import { StagingWorkflow } from '@onerepo/git'; @@ -32,6 +32,7 @@ export type Argv = { ignore: Array; lifecycle: Lifecycle; list?: boolean; + shard?: string; 'ignore-unstaged'?: boolean; } & builders.WithWorkspaces & builders.WithAffected; @@ -73,18 +74,41 @@ export const builder: Builder = (yargs) => default: [], hidden: true, }) + .option('shard', { + type: 'string', + description: 'Shard the lifecycle across multiple instances. Format as `/`', + }) + .example( + '$0 --lifecycle=pre-merge --shard=1/5', + 'Shard all tasks for the `pre-merge` lifecycle into 5 groups and runs the first shard.', + ) + .example( + '$0 --lifecycle=pre-merge --shard=3/5', + 'Shard all tasks for the `pre-merge` lifecycle into 5 groups and runs the third shard.', + ) .option('ignore-unstaged', { description: 'Force staged-changes mode on or off. If `true`, task determination and runners will ignore unstaged changes.', type: 'boolean', }) + .middleware(async (argv) => { + const logger = getLogger(); + if ('shard' in argv && typeof argv.shard === 'string') { + if (!/\d+\/\d+/.test(argv.shard)) { + const msg = '--shard must be in the format /. Example: --shard=1/2'; + logger.error(msg); + await logger.end(); + yargs.exit(1, new Error(msg)); + } + } + }) .describe( 'staged', 'Backup unstaged files and use only those on the git stage to calculate affected files or workspaces. Will re-apply the unstaged files upon exit.', ); export const handler: Handler = async (argv, { getWorkspaces, graph, logger, config }) => { - const { affected, ignore, lifecycle, list, 'from-ref': fromRef, staged, 'through-ref': throughRef } = argv; + const { affected, ignore, lifecycle, list, 'from-ref': fromRef, shard, staged, 'through-ref': throughRef } = argv; const stagingWorkflow = new StagingWorkflow({ graph, logger }); if (staged) { @@ -115,8 +139,8 @@ export const handler: Handler = async (argv, { getWorkspaces, graph, logge return; } - const serialTasks: TaskList = []; - const parallelTasks: TaskList = []; + let serialTasks: TaskList = []; + let parallelTasks: TaskList = []; let hasTasks = false; for (const workspace of graph.workspaces) { @@ -145,6 +169,14 @@ export const handler: Handler = async (argv, { getWorkspaces, graph, logge }); } + if (shard) { + const [shardNum, total] = shard.split('/').map((n) => parseInt(n, 10)); + serialTasks = shardTasks(serialTasks, shardNum, total); + setupStep.debug(serialTasks); + parallelTasks = shardTasks(parallelTasks, shardNum, total); + setupStep.debug(parallelTasks); + } + await setupStep.end(); if (list) { @@ -297,3 +329,19 @@ function slugify(str: string) { type ExtendedRunSpec = RunSpec & { meta: { name: string; slug: string }; fn?: PromiseFn }; type TaskList = Array>; + +function shardTasks(tasks: TaskList, shard: number, totalShards: number) { + if (!tasks.length) { + return tasks; + } + const shardSize = Math.ceil(tasks.length / totalShards); + let index = 0; + let resIndex = 0; + + const result = new Array(Math.ceil(tasks.length / shardSize)); + + while (index < tasks.length) { + result[resIndex++] = tasks.slice(index, (index += shardSize)); + } + return result[shard - 1]; +} diff --git a/modules/onerepo/vitest.config.js b/modules/onerepo/vitest.config.js new file mode 100644 index 00000000..b47f48e0 --- /dev/null +++ b/modules/onerepo/vitest.config.js @@ -0,0 +1,4 @@ +// eslint-disable-next-line import/no-extraneous-dependencies +import { defineProject } from '@internal/vitest-config'; + +export default defineProject({});