diff --git a/apps/webapp/app/components/runs/v3/CheckBatchCompletionDialog.tsx b/apps/webapp/app/components/runs/v3/CheckBatchCompletionDialog.tsx new file mode 100644 index 0000000000..70a51a3cf6 --- /dev/null +++ b/apps/webapp/app/components/runs/v3/CheckBatchCompletionDialog.tsx @@ -0,0 +1,57 @@ +import { DialogClose } from "@radix-ui/react-dialog"; +import { Form, useNavigation } from "@remix-run/react"; +import { Button } from "~/components/primitives/Buttons"; +import { DialogContent, DialogHeader } from "~/components/primitives/Dialog"; +import { FormButtons } from "~/components/primitives/FormButtons"; +import { Paragraph } from "~/components/primitives/Paragraph"; + +type CheckBatchCompletionDialogProps = { + batchId: string; + redirectPath: string; +}; + +export function CheckBatchCompletionDialog({ + batchId, + redirectPath, +}: CheckBatchCompletionDialogProps) { + const navigation = useNavigation(); + + const formAction = `/resources/batches/${batchId}/check-completion`; + const isLoading = navigation.formAction === formAction; + + return ( + + Try and resume batch +
+ + In rare cases, parent runs don't continue after child runs have completed. + + + If this doesn't help, please get in touch. We are working on a permanent fix for this. + + + + + } + cancelButton={ + + + + } + /> +
+
+ ); +} diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.v3.$projectParam.batches/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.v3.$projectParam.batches/route.tsx index d0b72b2043..a67414af3a 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.v3.$projectParam.batches/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.v3.$projectParam.batches/route.tsx @@ -1,6 +1,10 @@ -import { ExclamationCircleIcon } from "@heroicons/react/20/solid"; +import { + ArrowPathRoundedSquareIcon, + ArrowRightIcon, + ExclamationCircleIcon, +} from "@heroicons/react/20/solid"; import { BookOpenIcon } from "@heroicons/react/24/solid"; -import { useNavigation } from "@remix-run/react"; +import { useLocation, useNavigation } from "@remix-run/react"; import { LoaderFunctionArgs } from "@remix-run/server-runtime"; import { formatDuration } from "@trigger.dev/core/v3/utils/durations"; import { typedjson, useTypedLoaderData } from "remix-typedjson"; @@ -8,16 +12,19 @@ import { ListPagination } from "~/components/ListPagination"; import { AdminDebugTooltip } from "~/components/admin/debugTooltip"; import { EnvironmentLabel } from "~/components/environments/EnvironmentLabel"; import { PageBody, PageContainer } from "~/components/layout/AppLayout"; -import { LinkButton } from "~/components/primitives/Buttons"; +import { Button, LinkButton } from "~/components/primitives/Buttons"; import { DateTime } from "~/components/primitives/DateTime"; +import { Dialog, DialogTrigger } from "~/components/primitives/Dialog"; import { NavBar, PageAccessories, PageTitle } from "~/components/primitives/PageHeader"; import { Paragraph } from "~/components/primitives/Paragraph"; +import { PopoverMenuItem } from "~/components/primitives/Popover"; import { Spinner } from "~/components/primitives/Spinner"; import { Table, TableBlankRow, TableBody, TableCell, + TableCellMenu, TableHeader, TableHeaderCell, TableRow, @@ -29,12 +36,17 @@ import { BatchStatusCombo, descriptionForBatchStatus, } from "~/components/runs/v3/BatchStatus"; +import { CheckBatchCompletionDialog } from "~/components/runs/v3/CheckBatchCompletionDialog"; import { LiveTimer } from "~/components/runs/v3/LiveTimer"; import { useOrganization } from "~/hooks/useOrganizations"; import { useProject } from "~/hooks/useProject"; import { redirectWithErrorMessage } from "~/models/message.server"; import { findProjectBySlug } from "~/models/project.server"; -import { BatchList, BatchListPresenter } from "~/presenters/v3/BatchListPresenter.server"; +import { + BatchList, + BatchListItem, + BatchListPresenter, +} from "~/presenters/v3/BatchListPresenter.server"; import { requireUserId } from "~/services/session.server"; import { docsPath, ProjectParamSchema, v3BatchRunsPath } from "~/utils/pathBuilder"; @@ -150,11 +162,14 @@ function BatchesTable({ batches, hasFilters, filters }: BatchList) { Duration Created Finished + + Go to batch + {batches.length === 0 && !hasFilters ? ( - + {!isLoading && (
No batches @@ -162,7 +177,7 @@ function BatchesTable({ batches, hasFilters, filters }: BatchList) { )} ) : batches.length === 0 ? ( - +
No batches match these filters
@@ -215,13 +230,14 @@ function BatchesTable({ batches, hasFilters, filters }: BatchList) { {batch.finishedAt ? : "–"} + ); }) )} {isLoading && ( Loading… @@ -231,3 +247,48 @@ function BatchesTable({ batches, hasFilters, filters }: BatchList) { ); } + +function BatchActionsCell({ batch, path }: { batch: BatchListItem; path: string }) { + const location = useLocation(); + + if (batch.hasFinished) return {""}; + + return ( + + + {!batch.hasFinished && ( + + + + + + + )} + + } + /> + ); +} diff --git a/apps/webapp/app/routes/resources.batches.$batchId.check-completion.ts b/apps/webapp/app/routes/resources.batches.$batchId.check-completion.ts new file mode 100644 index 0000000000..bd1eff8b1e --- /dev/null +++ b/apps/webapp/app/routes/resources.batches.$batchId.check-completion.ts @@ -0,0 +1,70 @@ +import { parse } from "@conform-to/zod"; +import { ActionFunction, json } from "@remix-run/node"; +import { assertExhaustive } from "@trigger.dev/core"; +import { z } from "zod"; +import { redirectWithErrorMessage, redirectWithSuccessMessage } from "~/models/message.server"; +import { logger } from "~/services/logger.server"; +import { ResumeBatchRunService } from "~/v3/services/resumeBatchRun.server"; + +export const checkCompletionSchema = z.object({ + redirectUrl: z.string(), +}); + +const ParamSchema = z.object({ + batchId: z.string(), +}); + +export const action: ActionFunction = async ({ request, params }) => { + const { batchId } = ParamSchema.parse(params); + + const formData = await request.formData(); + const submission = parse(formData, { schema: checkCompletionSchema }); + + if (!submission.value) { + return json(submission); + } + + try { + const resumeBatchRunService = new ResumeBatchRunService(); + const resumeResult = await resumeBatchRunService.call(batchId); + + let message: string | undefined; + + switch (resumeResult) { + case "ERROR": { + throw "Unknown error during batch completion check"; + } + case "ALREADY_COMPLETED": { + message = "Batch already completed."; + break; + } + case "COMPLETED": { + message = "Batch completed and parent tasks resumed."; + break; + } + case "PENDING": { + message = "Child runs still in progress. Please try again later."; + break; + } + default: { + assertExhaustive(resumeResult); + } + } + + return redirectWithSuccessMessage(submission.value.redirectUrl, request, message); + } catch (error) { + if (error instanceof Error) { + logger.error("Failed to check batch completion", { + error: { + name: error.name, + message: error.message, + stack: error.stack, + }, + }); + return redirectWithErrorMessage(submission.value.redirectUrl, request, error.message); + } else { + logger.error("Failed to check batch completion", { error }); + return redirectWithErrorMessage(submission.value.redirectUrl, request, "Unknown error"); + } + } +}; diff --git a/apps/webapp/app/services/worker.server.ts b/apps/webapp/app/services/worker.server.ts index d486335df1..a7b43f4215 100644 --- a/apps/webapp/app/services/worker.server.ts +++ b/apps/webapp/app/services/worker.server.ts @@ -563,7 +563,7 @@ function getWorkerQueue() { handler: async (payload, job) => { const service = new ResumeBatchRunService(); - return await service.call(payload.batchRunId); + await service.call(payload.batchRunId); }, }, "v3.resumeTaskDependency": { diff --git a/apps/webapp/app/v3/services/resumeBatchRun.server.ts b/apps/webapp/app/v3/services/resumeBatchRun.server.ts index 5089e96103..aebff7da7a 100644 --- a/apps/webapp/app/v3/services/resumeBatchRun.server.ts +++ b/apps/webapp/app/v3/services/resumeBatchRun.server.ts @@ -35,7 +35,8 @@ export class ResumeBatchRunService extends BaseService { batchRunId, } ); - return; + + return "ERROR"; } if (batchRun.status === "COMPLETED") { @@ -46,7 +47,8 @@ export class ResumeBatchRunService extends BaseService { status: batchRun.status, }, }); - return; + + return "ERROR"; } if (batchRun.items.some((item) => !finishedBatchRunStatuses.includes(item.status))) { @@ -57,7 +59,8 @@ export class ResumeBatchRunService extends BaseService { status: batchRun.status, }, }); - return; + + return "PENDING"; } // If we are in development, or there is no dependent attempt, we can just mark the batch as completed and return @@ -71,7 +74,8 @@ export class ResumeBatchRunService extends BaseService { status: "COMPLETED", }, }); - return; + + return "COMPLETED"; } const dependentTaskAttempt = await this._prisma.taskRunAttempt.findFirst({ @@ -98,12 +102,11 @@ export class ResumeBatchRunService extends BaseService { dependentTaskAttemptId: batchRun.dependentTaskAttemptId, }); - return; + return "ERROR"; } // This batch has a dependent attempt and just finalized, we should resume that attempt const environment = batchRun.runtimeEnvironment; - const dependentRun = dependentTaskAttempt.taskRun; if (dependentTaskAttempt.status === "PAUSED" && batchRun.checkpointEventId) { @@ -115,11 +118,13 @@ export class ResumeBatchRunService extends BaseService { // We need to update the batchRun status so we don't resume it again const wasUpdated = await this.#setBatchToCompletedOnce(batchRun.id); + if (wasUpdated) { logger.debug("ResumeBatchRunService: Resuming dependent run with checkpoint", { batchRunId: batchRun.id, dependentTaskAttemptId: dependentTaskAttempt.id, }); + await marqs?.enqueueMessage( environment, dependentRun.queue, @@ -136,6 +141,8 @@ export class ResumeBatchRunService extends BaseService { }, dependentRun.concurrencyKey ?? undefined ); + + return "COMPLETED"; } else { logger.debug("ResumeBatchRunService: with checkpoint was already completed", { batchRunId: batchRun.id, @@ -143,6 +150,8 @@ export class ResumeBatchRunService extends BaseService { checkpointEventId: batchRun.checkpointEventId, hasCheckpointEvent: !!batchRun.checkpointEventId, }); + + return "ALREADY_COMPLETED"; } } else { logger.debug("ResumeBatchRunService: attempt is not paused or there's no checkpoint event", { @@ -161,11 +170,13 @@ export class ResumeBatchRunService extends BaseService { checkpointEventId: batchRun.checkpointEventId, hasCheckpointEvent: !!batchRun.checkpointEventId, }); - return; + + return "ERROR"; } // We need to update the batchRun status so we don't resume it again const wasUpdated = await this.#setBatchToCompletedOnce(batchRun.id); + if (wasUpdated) { logger.debug("ResumeBatchRunService: Resuming dependent run without checkpoint", { batchRunId: batchRun.id, @@ -173,6 +184,7 @@ export class ResumeBatchRunService extends BaseService { checkpointEventId: batchRun.checkpointEventId, hasCheckpointEvent: !!batchRun.checkpointEventId, }); + await marqs?.replaceMessage(dependentRun.id, { type: "RESUME", completedAttemptIds: batchRun.items.map((item) => item.taskRunAttemptId).filter(Boolean), @@ -183,6 +195,8 @@ export class ResumeBatchRunService extends BaseService { environmentId: environment.id, environmentType: environment.type, }); + + return "COMPLETED"; } else { logger.debug("ResumeBatchRunService: without checkpoint was already completed", { batchRunId: batchRun.id, @@ -190,6 +204,8 @@ export class ResumeBatchRunService extends BaseService { checkpointEventId: batchRun.checkpointEventId, hasCheckpointEvent: !!batchRun.checkpointEventId, }); + + return "ALREADY_COMPLETED"; } } }