Skip to content

Commit

Permalink
feat(tasks): add --shard (#534)
Browse files Browse the repository at this point in the history
**Problem:**

Current GH action strategy may not be best for all use cases. It can
possibly overload systems and use too many action runners at once.

**Solution:**

Add a `--shard` argument to `tasks`.
  • Loading branch information
paularmstrong authored Jan 5, 2024
1 parent c6a3d76 commit 3e9e2fa
Show file tree
Hide file tree
Showing 7 changed files with 178 additions and 58 deletions.
5 changes: 5 additions & 0 deletions .changeset/hungry-pugs-protect.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'onerepo': minor
---

Added `--shard` argument to `one tasks` to shard tasks across multiple runners.
36 changes: 3 additions & 33 deletions .github/workflows/pull-request.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,43 +3,16 @@ name: Pull request
on: pull_request

jobs:
setup:
runs-on: ubuntu-latest
outputs:
tasks: ${{ steps.tasks.outputs.tasks }}
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0

- uses: actions/setup-node@v3
with:
node-version: 18
cache: 'yarn'

- run: yarn

- uses: paularmstrong/onerepo/actions/get-tasks@main
id: tasks
with:
cli: ./bin/one.mjs
lifecycle: pre-merge
verbosity: 5

tasks:
runs-on: ubuntu-latest
needs: setup
if: ${{ fromJSON(needs.setup.outputs.tasks).parallel != '[]' && fromJSON(needs.setup.outputs.tasks).parallel != '[]' }}
strategy:
fail-fast: false
matrix:
task:
- ${{ fromJSON(needs.setup.outputs.tasks).parallel }}
- ${{ fromJSON(needs.setup.outputs.tasks).serial }}
index: [1, 2, 3]
node:
- 18
- 20
name: v${{ matrix.node }} ${{ join(matrix.task.*.name, ', ') }}
name: v${{ matrix.node }} ${{ matrix.index }}/3
steps:
- uses: actions/checkout@v3
with:
Expand All @@ -52,7 +25,4 @@ jobs:

- run: yarn

- uses: paularmstrong/onerepo/actions/run-task@main
with:
task: |
${{ toJSON(matrix.task) }}
- run: ./bin/one.mjs tasks -c pre-merge --shard=${{ matrix.index }}/3 -vvvv
8 changes: 2 additions & 6 deletions docs/tailwind.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import type { Config } from 'tailwindcss';
import typography from '@tailwindcss/typography';

export default {
content: ['src/**/*.{astro,md,mdx,tsx}', '../**/*.{mdx,md}', 'astro.config.mjs'],
content: ['src/**/*.{astro,md,mdx,tsx}', '../**/*.{mdx,md}', 'astro.config.ts'],
darkMode: 'class',
plugins: [
typography,
Expand Down Expand Up @@ -39,7 +39,7 @@ export default {
},
'[data-line-numbers]': {
counterReset: 'line',
'& .line::before': {
'& [data-line]::before': {
counterIncrement: 'line',
content: 'counter(line)',
display: 'inline-block',
Expand Down Expand Up @@ -75,10 +75,6 @@ export default {
paddingLeft: '0',
paddingRight: '0',
},
'> pre > code > span': {
paddingBottom: theme('spacing.1'),
paddingTop: theme('spacing.1'),
},
},
'[data-rehype-pretty-code-title]': {
width: 'max-content',
Expand Down
79 changes: 64 additions & 15 deletions modules/onerepo/src/core/tasks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -158,9 +158,71 @@ Some tokens in tasks can be used as special replacement values that the `tasks`

### GitHub Actions

While the `tasks` command does its best to split out parallel and serial tasks to run as fast as possible on a single machine, using GitHub Actions can save even more time by spreading out each individual task to single instances using a matrix strategy.
While the `tasks` command does its best to split out parallel and serial tasks to run as fast as possible on a single machine, using GitHub Actions can save even more time by spreading out to separate runners using a matrix strategy. oneRepo offers a few options for this:

To do this, we make use of the `task --list` argument to write a JSON-formatted list of tasks to standard output, then read that in with a matrix strategy as a second job.
#### 1. Single runner

The following strategy will run all tasks on a single runner, the same way as if they were run on a developer's machine.

```yaml title=".github/workflows/pull-request.yaml" showLineNumbers {20}
name: Pull request
on: pull_request

jobs:
tasks:
runs-on: ubuntu-latest
name: oneRepo pre-merge tasks
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0

- uses: actions/setup-node@v3
with:
node-version: ${{ matrix.node }}
cache: 'yarn'

- run: yarn

- run: yarn one tasks -c pre-merge
```
#### 2. Sharding
This strategy creates a known number of action runners and distributes tasks across them. If you have a limited number of action runners, sharding may be the best option.
```yaml title=".github/workflows/pull-request.yaml" showLineNumbers {9-10,24}
name: Pull request
on: pull_request

jobs:
tasks:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
index: [1, 2, 3]
name: oneRepo ${{ matrix.index }}/3
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0

- uses: actions/setup-node@v3
with:
node-version: ${{ matrix.node }}
cache: 'yarn'

- run: yarn

- run: yarn one tasks -c pre-merge --shard=${{ matrix.index }}/3 -vvvv
```
#### 3. Task per runner
This strategy is the most distributed and best if you have a lot of capacity and available action runners. It also gives the clearest and fastest feedback.
To do this, we make use of the `task --list` argument to write a JSON-formatted list of tasks to standard output using a `setup` job, then read that in with a matrix strategy as a second job.

```yaml title=".github/workflows/pull-request.yaml" showLineNumbers {8-11, 15-18, 26, 36-38, 60-63}
name: Pull request
Expand Down Expand Up @@ -228,19 +290,6 @@ jobs:
${{ toJSON(matrix.task) }}
```
## Disabling
So you have decided that `tasks` are not for you? That’s okay. You can deactivate the core plugin by passing `false` to the configuration:

```js
setup({
core: {
// Prevents all usage of `tasks` from your CLI
tasks: false,
},
}).then(({ run }) => run());
```

## Usage
<!-- start-auto-generated-from-cli-tasks -->
Expand Down
48 changes: 48 additions & 0 deletions modules/onerepo/src/core/tasks/commands/__tests__/tasks.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -271,4 +271,52 @@ describe('handler', () => {
serial: [],
});
});

test('can shard the tasks', async () => {
vi.spyOn(git, 'getModifiedFiles').mockResolvedValue(['root.ts']);
const graph = getGraph(path.join(__dirname, '__fixtures__', 'repo'));

await run('--lifecycle deploy --list --shard=1/2', { graph });
expect(JSON.parse(out)).toEqual({
parallel: [
[
{
args: ['"deployroot"'],
cmd: 'echo',
meta: { name: 'fixture-root', slug: 'fixture-root' },
name: 'echo "deployroot" (fixture-root)',
opts: { cwd: '.' },
},
],
[
{
args: ['"deployburritos"'],
cmd: 'echo',
meta: { name: 'fixture-burritos', slug: 'fixture-burritos' },
name: 'echo "deployburritos" (fixture-burritos)',
opts: { cwd: 'modules/burritos' },
},
],
],
serial: [],
});

out = '';

await run('--lifecycle deploy --list --shard=2/2', { graph });
expect(JSON.parse(out)).toEqual({
parallel: [
[
{
args: ['"deploytacos"'],
cmd: 'echo',
meta: { name: 'fixture-tacos', slug: 'fixture-tacos' },
name: 'echo "deploytacos" (fixture-tacos)',
opts: { cwd: 'modules/tacos' },
},
],
],
serial: [],
});
});
});
56 changes: 52 additions & 4 deletions modules/onerepo/src/core/tasks/commands/tasks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import * as builders from '@onerepo/builders';
import type { PromiseFn, RunSpec } from '@onerepo/subprocess';
import type { Graph, Lifecycle, Task, TaskDef, Workspace } from '@onerepo/graph';
import type { Builder, Handler } from '@onerepo/yargs';
import { bufferSubLogger } from '@onerepo/logger';
import { bufferSubLogger, getLogger } from '@onerepo/logger';
import type { Logger } from '@onerepo/logger';
import createYargs from 'yargs/yargs';
import { StagingWorkflow } from '@onerepo/git';
Expand All @@ -32,6 +32,7 @@ export type Argv = {
ignore: Array<string>;
lifecycle: Lifecycle;
list?: boolean;
shard?: string;
'ignore-unstaged'?: boolean;
} & builders.WithWorkspaces &
builders.WithAffected;
Expand Down Expand Up @@ -73,18 +74,41 @@ export const builder: Builder<Argv> = (yargs) =>
default: [],
hidden: true,
})
.option('shard', {
type: 'string',
description: 'Shard the lifecycle across multiple instances. Format as `<shard-number>/<total-shards>`',
})
.example(
'$0 --lifecycle=pre-merge --shard=1/5',
'Shard all tasks for the `pre-merge` lifecycle into 5 groups and runs the first shard.',
)
.example(
'$0 --lifecycle=pre-merge --shard=3/5',
'Shard all tasks for the `pre-merge` lifecycle into 5 groups and runs the third shard.',
)
.option('ignore-unstaged', {
description:
'Force staged-changes mode on or off. If `true`, task determination and runners will ignore unstaged changes.',
type: 'boolean',
})
.middleware(async (argv) => {
const logger = getLogger();
if ('shard' in argv && typeof argv.shard === 'string') {
if (!/\d+\/\d+/.test(argv.shard)) {
const msg = '--shard must be in the format <shard-num>/<total-shards>. Example: --shard=1/2';
logger.error(msg);
await logger.end();
yargs.exit(1, new Error(msg));
}
}
})
.describe(
'staged',
'Backup unstaged files and use only those on the git stage to calculate affected files or workspaces. Will re-apply the unstaged files upon exit.',
);

export const handler: Handler<Argv> = async (argv, { getWorkspaces, graph, logger, config }) => {
const { affected, ignore, lifecycle, list, 'from-ref': fromRef, staged, 'through-ref': throughRef } = argv;
const { affected, ignore, lifecycle, list, 'from-ref': fromRef, shard, staged, 'through-ref': throughRef } = argv;

const stagingWorkflow = new StagingWorkflow({ graph, logger });
if (staged) {
Expand Down Expand Up @@ -115,8 +139,8 @@ export const handler: Handler<Argv> = async (argv, { getWorkspaces, graph, logge
return;
}

const serialTasks: TaskList = [];
const parallelTasks: TaskList = [];
let serialTasks: TaskList = [];
let parallelTasks: TaskList = [];
let hasTasks = false;

for (const workspace of graph.workspaces) {
Expand Down Expand Up @@ -145,6 +169,14 @@ export const handler: Handler<Argv> = async (argv, { getWorkspaces, graph, logge
});
}

if (shard) {
const [shardNum, total] = shard.split('/').map((n) => parseInt(n, 10));
serialTasks = shardTasks(serialTasks, shardNum, total);
setupStep.debug(serialTasks);
parallelTasks = shardTasks(parallelTasks, shardNum, total);
setupStep.debug(parallelTasks);
}

await setupStep.end();

if (list) {
Expand Down Expand Up @@ -297,3 +329,19 @@ function slugify(str: string) {

type ExtendedRunSpec = RunSpec & { meta: { name: string; slug: string }; fn?: PromiseFn };
type TaskList = Array<Array<ExtendedRunSpec>>;

function shardTasks(tasks: TaskList, shard: number, totalShards: number) {
if (!tasks.length) {
return tasks;
}
const shardSize = Math.ceil(tasks.length / totalShards);
let index = 0;
let resIndex = 0;

const result = new Array(Math.ceil(tasks.length / shardSize));

while (index < tasks.length) {
result[resIndex++] = tasks.slice(index, (index += shardSize));
}
return result[shard - 1];
}
4 changes: 4 additions & 0 deletions modules/onerepo/vitest.config.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
// eslint-disable-next-line import/no-extraneous-dependencies
import { defineProject } from '@internal/vitest-config';

export default defineProject({});

0 comments on commit 3e9e2fa

Please sign in to comment.