From 38fba1b7864aee582f666bac934b99c7b2097fd0 Mon Sep 17 00:00:00 2001 From: Dmitry Meyer Date: Thu, 5 Dec 2024 13:53:26 +0000 Subject: [PATCH] [shim] Clean up and document API * Add OpenAPI 3.1 definition * Remove all unused fields in the `/pull` response. In the OAD, these fields are currently marked as `deprecated` and will be removed in the next version of the document * Remove corresponding data structures and code --- runner/docs/shim.openapi.yaml | 363 ++++++++++++++++++ runner/go.mod | 1 - runner/go.sum | 4 - runner/internal/runner/api/http_test.go | 9 +- runner/internal/shim/api/http.go | 18 +- runner/internal/shim/api/schemas.go | 12 +- runner/internal/shim/api/server.go | 2 +- runner/internal/shim/docker.go | 137 ++----- src/dstack/_internal/server/schemas/runner.py | 8 - 9 files changed, 398 insertions(+), 156 deletions(-) create mode 100644 runner/docs/shim.openapi.yaml diff --git a/runner/docs/shim.openapi.yaml b/runner/docs/shim.openapi.yaml new file mode 100644 index 000000000..90c80919a --- /dev/null +++ b/runner/docs/shim.openapi.yaml @@ -0,0 +1,363 @@ +openapi: 3.1.1 + +info: + title: dstack-shim API + version: &shim-version 0.18.29 + +servers: + - url: http://localhost:10998/api + +paths: + /submit: + post: + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/TaskConfigBody" + responses: + "200": + description: "" + content: + text/plain: + schema: + type: string + examples: + - OK + "409": + description: Another task is already submitted (`RunnerStatus` is not `pending`) + content: + text/plain: + schema: + type: string + examples: + - Conflict + + /pull: + get: + responses: + "200": + description: "" + content: + application/json: + schema: + $ref: "#/components/schemas/PullResponse" + + /stop: + post: + requestBody: + required: false + content: + application/json: + schema: + $ref: "#/components/schemas/StopBody" + responses: + "200": + description: "" + content: + application/json: + schema: + $ref: "#/components/schemas/StopResponse" + + /healthcheck: + get: + responses: + "200": + description: "" + content: + application/json: + schema: + $ref: "#/components/schemas/HealthcheckResponse" + +components: + schemas: + RunnerStatus: + title: shim.RunnerStatus + type: string + enum: + - pending + - pulling + - creating + - running + + JobResult: + title: shim.JobResult + type: object + properties: + reason: + type: string + enum: + - EXECUTOR_ERROR + - CREATING_CONTAINER_ERROR + - CONTAINER_EXITED_WITH_ERROR + - DONE_BY_RUNNER + reason_message: + type: string + default: "" + description: > + `State.Error` from Docker API, or shim-generated message (e.g., if the container + is OOM killed or failed to prepare volumes), or N last lines from the container logs + required: + - reason + - reason_message + additionalProperties: false + + VolumeMountPoint: + title: shim.VolumeMountPoint + type: object + properties: + name: + type: string + default: "" + description: > + `dstack` volume [name](https://dstack.ai/docs/reference/dstack.yml/volume/#name) + path: + type: string + default: "" + description: Mount point inside container + + VolumeInfo: + title: shim.VolumeInfo + type: object + properties: + backend: + type: string + enum: [aws, gcp] + name: + type: string + default: "" + description: > + `dstack` volume [name](https://dstack.ai/docs/reference/dstack.yml/volume/#name) + volume_id: + type: string + default: "" + init_fs: + type: boolean + default: false + description: > + Create a filesystem when it doesn't exist if `true`, fail with error if `false` + + InstanceMountPoint: + title: shim.InstanceMountPoint + type: object + properties: + instance_name: + type: string + default: "" + description: Instance (host) path + path: + type: string + default: "" + description: Mount point inside container + + TaskConfigBody: + title: shim.api.TaskConfigBody + description: Same as `shim.TaskConfig` + type: object + properties: + username: + type: string + default: "" + description: Private container registry username + examples: + - registry-user + password: + type: string + default: "" + description: Private container registry password + examples: + - registry-token + image_name: + type: string + default: "" + examples: + - ubuntu:22.04 + privileged: + type: boolean + default: false + description: > + (since [0.18.18](https://github.com/dstackai/dstack/releases/tag/0.18.18)) + Start container in privileged mode + container_name: + type: string + default: "" + examples: + - horrible-mule-1-0-0 + container_user: + type: string + default: "" + description: > + If not set, the default image user is used. As of 0.18.24, `dstack` always uses `root` + examples: + - root + shm_size: + type: integer + default: 0 + description: > + POSIX shared memory, bytes. + If not set or `0`, the Docker's default is used + examples: + - 1073741824 + public_keys: + type: array + items: + type: string + default: [] + description: > + SSH public keys for `container_user`. As of 0.18.24, `dstack` submits two keys: + project key (generated by the server) and user key (either generated by + the CLI client or provided by the user) + examples: + - ["ssh-rsa project@dstack", "ssh-ed25519 me@laptop"] + ssh_user: + type: string + default: "" + description: > + Instance (host) user for SSH access, either directly (`ssh {run_name}-host`) + or for `ProxyJump`ing inside the container. Ignored if `ssh_key` is not set + examples: + - root + ssh_key: + type: string + default: "" + description: > + SSH public key for access to the instance (host). If set, the key will be added + to the `ssh_users`'s `~/.ssh/authorized_keys` when the run starts and removed + when the run exits. + examples: + - "ssh-ed25519 me@laptop" + mounts: + type: array + items: + $ref: "#/components/schemas/VolumeMountPoint" + default: [] + volumes: + type: array + items: + $ref: "#/components/schemas/VolumeInfo" + default: [] + instance_mounts: + type: array + items: + $ref: "#/components/schemas/InstanceMountPoint" + default: [] + description: (since [0.18.21](https://github.com/dstackai/dstack/releases/tag/0.18.21)) + + PullResponse: + title: shim.api.PullResponse + type: object + properties: + state: + allOf: + - $ref: "#/components/schemas/RunnerStatus" + - examples: + - pending + executor_error: + deprecated: true + type: string + default: "" + description: > + A line from the `runner.log` containing `Executor failed` substring (if any). + Not used by `dstack` server + container_name: + deprecated: true + type: string + examples: + - horrible-mule-1-0-0 + status: + deprecated: true + type: string + examples: + - exited + description: > + `State.Status` from Docker API. Not used by `dstack` server + running: + deprecated: true + type: boolean + examples: + - false + description: > + `State.Running` from Docker API. Not used by `dstack` server + oom_killed: + deprecated: true + type: boolean + examples: + - false + description: > + `State.OOMKilled` from Docker API. Not used by `dstack` server + dead: + deprecated: true + type: boolean + examples: + - false + description: > + `State.Dead` from Docker API. Not used by `dstack` server + exit_code: + deprecated: true + type: integer + minimum: 0 + maximum: 255 + description: > + `State.ExitCode` from Docker API. Not used by `dstack` server + examples: + - 137 + error: + deprecated: true + type: string + description: > + Either `State.Error` from Docker API or the same as `result.reason_message`. + Not used by `dstack` server + examples: + - "" + result: + allOf: + - $ref: "#/components/schemas/JobResult" + - examples: + - reason: CONTAINER_EXITED_WITH_ERROR + message: "" + required: + - state + - executor_error + - container_name + - status + - running + - oom_killed + - dead + - exit_code + - error + - result + additionalProperties: false + + StopBody: + title: shim.api.StopBody + type: object + properties: + force: + type: boolean + default: false + + StopResponse: + title: shim.api.StopResponse + type: object + properties: + state: + $ref: "#/components/schemas/RunnerStatus" + required: + - state + additionalProperties: false + + HealthcheckResponse: + title: shim.api.HealthcheckResponse + type: object + properties: + service: + const: dstack-shim + version: + type: string + examples: + - *shim-version + required: + - service + - version + additionalProperties: false diff --git a/runner/go.mod b/runner/go.mod index 146dfe94b..9e247989a 100644 --- a/runner/go.mod +++ b/runner/go.mod @@ -11,7 +11,6 @@ require ( github.com/docker/go-units v0.5.0 github.com/go-git/go-git/v5 v5.12.0 github.com/golang/gddo v0.0.0-20210115222349-20d68f94ee1f - github.com/icza/backscanner v0.0.0-20240328210400-b40c3a86dec5 github.com/inhies/go-bytesize v0.0.0-20220417184213-4913239db9cf github.com/shirou/gopsutil/v3 v3.24.3 github.com/sirupsen/logrus v1.9.3 diff --git a/runner/go.sum b/runner/go.sum index 66462f890..ef3df8d62 100644 --- a/runner/go.sum +++ b/runner/go.sum @@ -96,10 +96,6 @@ github.com/grpc-ecosystem/grpc-gateway/v2 v2.19.0/go.mod h1:qmOFXW2epJhM0qSnUUYp github.com/h2non/filetype v1.1.3 h1:FKkx9QbD7HR/zjK1Ia5XiBsq9zdLi5Kf3zGyFTAFkGg= github.com/h2non/filetype v1.1.3/go.mod h1:319b3zT68BvV+WRj7cwy856M2ehB3HqNOt6sy1HndBY= github.com/hashicorp/hcl v0.0.0-20170914154624-68e816d1c783/go.mod h1:oZtUIOe8dh44I2q6ScRibXws4Ajl+d+nod3AaR9vL5w= -github.com/icza/backscanner v0.0.0-20240328210400-b40c3a86dec5 h1:FcxwOojw6pUiPpsf7Q6Fw/pI+7cR6FlapLBEGV/902A= -github.com/icza/backscanner v0.0.0-20240328210400-b40c3a86dec5/go.mod h1:GYeBD1CF7AqnKZK+UCytLcY3G+UKo0ByXX/3xfdNyqQ= -github.com/icza/mighty v0.0.0-20180919140131-cfd07d671de6 h1:8UsGZ2rr2ksmEru6lToqnXgA8Mz1DP11X4zSJ159C3k= -github.com/icza/mighty v0.0.0-20180919140131-cfd07d671de6/go.mod h1:xQig96I1VNBDIWGCdTt54nHt6EeI639SmHycLYL7FkA= github.com/inconshreveable/log15 v0.0.0-20170622235902-74a0988b5f80/go.mod h1:cOaXtrgN4ScfRrD9Bre7U1thNq5RtJ8ZoP4iXVGRj6o= github.com/inhies/go-bytesize v0.0.0-20220417184213-4913239db9cf h1:FtEj8sfIcaaBfAKrE1Cwb61YDtYq9JxChK1c7AKce7s= github.com/inhies/go-bytesize v0.0.0-20220417184213-4913239db9cf/go.mod h1:yrqSXGoD/4EKfF26AOGzscPOgTTJcyAwM2rpixWT+t4= diff --git a/runner/internal/runner/api/http_test.go b/runner/internal/runner/api/http_test.go index 98a6f5180..b44d31b2e 100644 --- a/runner/internal/runner/api/http_test.go +++ b/runner/internal/runner/api/http_test.go @@ -12,13 +12,12 @@ import ( ) type DummyRunner struct { - State shim.RunnerStatus - ContainerStatus shim.ContainerStatus - JobResult shim.JobResult + State shim.RunnerStatus + JobResult shim.JobResult } -func (ds DummyRunner) GetState() (shim.RunnerStatus, shim.ContainerStatus, string, shim.JobResult) { - return ds.State, ds.ContainerStatus, "", ds.JobResult +func (ds DummyRunner) GetState() (shim.RunnerStatus, shim.JobResult) { + return ds.State, ds.JobResult } func (ds DummyRunner) Run(context.Context, shim.TaskConfig) error { diff --git a/runner/internal/shim/api/http.go b/runner/internal/shim/api/http.go index c0fbce5fd..dc6e27b12 100644 --- a/runner/internal/shim/api/http.go +++ b/runner/internal/shim/api/http.go @@ -23,7 +23,7 @@ func (s *ShimServer) HealthcheckGetHandler(w http.ResponseWriter, r *http.Reques func (s *ShimServer) SubmitPostHandler(w http.ResponseWriter, r *http.Request) (interface{}, error) { s.mu.RLock() defer s.mu.RUnlock() - state, _, _, _ := s.runner.GetState() + state, _ := s.runner.GetState() if state != shim.Pending { return nil, &api.Error{Status: http.StatusConflict} } @@ -48,19 +48,11 @@ func (s *ShimServer) PullGetHandler(w http.ResponseWriter, r *http.Request) (int s.mu.RLock() defer s.mu.RUnlock() - state, containerStatus, executorError, jobResult := s.runner.GetState() + state, jobResult := s.runner.GetState() return &PullResponse{ - State: string(state), - ExecutorError: executorError, - ContainerName: containerStatus.ContainerName, - Status: containerStatus.Status, - Running: containerStatus.Running, - OOMKilled: containerStatus.OOMKilled, - Dead: containerStatus.Dead, - ExitCode: containerStatus.ExitCode, - Error: containerStatus.Error, - Result: jobResult, + State: string(state), + Result: jobResult, }, nil } @@ -68,7 +60,7 @@ func (s *ShimServer) StopPostHandler(w http.ResponseWriter, r *http.Request) (in s.mu.RLock() defer s.mu.RUnlock() - state, _, _, _ := s.runner.GetState() + state, _ := s.runner.GetState() if state == shim.Pending { return &StopResponse{ State: string(state), diff --git a/runner/internal/shim/api/schemas.go b/runner/internal/shim/api/schemas.go index bc76401b5..fa1c33aa5 100644 --- a/runner/internal/shim/api/schemas.go +++ b/runner/internal/shim/api/schemas.go @@ -14,16 +14,8 @@ type HealthcheckResponse struct { } type PullResponse struct { - State string `json:"state"` - ExecutorError string `json:"executor_error"` - ContainerName string `json:"container_name"` - Status string `json:"status"` - Running bool `json:"running"` - OOMKilled bool `json:"oom_killed"` - Dead bool `json:"dead"` - ExitCode int `json:"exit_code"` - Error string `json:"error"` - Result shim.JobResult `json:"result"` + State string `json:"state"` + Result shim.JobResult `json:"result"` } type StopResponse struct { diff --git a/runner/internal/shim/api/server.go b/runner/internal/shim/api/server.go index 03ed8a7ac..6e433c4de 100644 --- a/runner/internal/shim/api/server.go +++ b/runner/internal/shim/api/server.go @@ -11,7 +11,7 @@ import ( type TaskRunner interface { Run(context.Context, shim.TaskConfig) error - GetState() (shim.RunnerStatus, shim.ContainerStatus, string, shim.JobResult) + GetState() (shim.RunnerStatus, shim.JobResult) Stop(bool) } diff --git a/runner/internal/shim/docker.go b/runner/internal/shim/docker.go index bd65d4ea2..32418054c 100644 --- a/runner/internal/shim/docker.go +++ b/runner/internal/shim/docker.go @@ -26,9 +26,7 @@ import ( "github.com/docker/docker/pkg/stdcopy" "github.com/docker/go-connections/nat" "github.com/docker/go-units" - "github.com/dstackai/dstack/runner/consts" "github.com/dstackai/dstack/runner/internal/shim/backends" - "github.com/icza/backscanner" bytesize "github.com/inhies/go-bytesize" "github.com/ztrue/tracerr" ) @@ -42,34 +40,21 @@ const ( LabelValueTrue = "true" ) -// Depricated: Remove on next release (0.19) -type ContainerStatus struct { - ContainerID string - ContainerName string - Status string - Running bool - OOMKilled bool - Dead bool - ExitCode int - Error string -} - type JobResult struct { Reason string `json:"reason"` ReasonMessage string `json:"reason_message"` } type DockerRunner struct { - client *docker.Client - dockerParams DockerParameters - currentContainer string - state RunnerStatus + client *docker.Client + dockerParams DockerParameters + containerID string // ID of the running container, empty if state != Running + containerName string // Name of the last created container, for logging only + state RunnerStatus cancelPull context.CancelFunc - containerStatus ContainerStatus // TODO: remove on next release (0.19) - executorError string // TODO: remove on next release (0.19) - jobResult JobResult + jobResult JobResult } func NewDockerRunner(dockerParams DockerParameters) (*DockerRunner, error) { @@ -94,7 +79,6 @@ func (d *DockerRunner) Run(ctx context.Context, cfg TaskConfig) error { if err := ak.AppendPublicKeys([]string{cfg.SshKey}); err != nil { d.state = Pending errMessage := fmt.Sprintf("ak.AppendPublicKeys error: %s", err.Error()) - d.containerStatus.Error = errMessage log.Println(errMessage) d.jobResult = JobResult{Reason: "EXECUTOR_ERROR", ReasonMessage: errMessage} return tracerr.Wrap(err) @@ -116,7 +100,6 @@ func (d *DockerRunner) Run(ctx context.Context, cfg TaskConfig) error { if err != nil { d.state = Pending errMessage := fmt.Sprintf("prepareVolumes error: %s", err.Error()) - d.containerStatus.Error = errMessage log.Println(errMessage) d.jobResult = JobResult{Reason: "EXECUTOR_ERROR", ReasonMessage: errMessage} return tracerr.Wrap(err) @@ -125,17 +108,11 @@ func (d *DockerRunner) Run(ctx context.Context, cfg TaskConfig) error { if err != nil { d.state = Pending errMessage := fmt.Sprintf("prepareInstanceMountPoints error: %s", err.Error()) - d.containerStatus.Error = errMessage log.Println(errMessage) d.jobResult = JobResult{Reason: "EXECUTOR_ERROR", ReasonMessage: errMessage} return tracerr.Wrap(err) } - d.containerStatus = ContainerStatus{ - ContainerName: cfg.ContainerName, - } - d.executorError = "" - pullCtx, cancel := context.WithTimeout(ctx, ImagePullTimeout) defer cancel() d.cancelPull = cancel @@ -145,7 +122,6 @@ func (d *DockerRunner) Run(ctx context.Context, cfg TaskConfig) error { if err = pullImage(pullCtx, d.client, cfg); err != nil { d.state = Pending errMessage := fmt.Sprintf("pullImage error: %s", err.Error()) - d.containerStatus.Error = errMessage log.Print(errMessage + "\n") d.jobResult = JobResult{Reason: "CREATING_CONTAINER_ERROR", ReasonMessage: errMessage} return tracerr.Wrap(err) @@ -155,7 +131,6 @@ func (d *DockerRunner) Run(ctx context.Context, cfg TaskConfig) error { if err != nil { d.state = Pending errMessage := fmt.Sprintf("Cannot create dir for runner: %s", err.Error()) - d.containerStatus.Error = errMessage log.Print(errMessage + "\n") d.jobResult = JobResult{Reason: "CREATING_CONTAINER_ERROR", ReasonMessage: errMessage} return tracerr.Wrap(err) @@ -167,7 +142,6 @@ func (d *DockerRunner) Run(ctx context.Context, cfg TaskConfig) error { if err != nil { d.state = Pending errMessage := fmt.Sprintf("createContainer error: %s", err.Error()) - d.containerStatus.Error = errMessage d.jobResult = JobResult{Reason: "CREATING_CONTAINER_ERROR", ReasonMessage: errMessage} log.Print(errMessage + "\n") return tracerr.Wrap(err) @@ -195,51 +169,37 @@ func (d *DockerRunner) Run(ctx context.Context, cfg TaskConfig) error { } }() - d.containerStatus, _ = inspectContainer(d.client, containerID) d.state = Running - d.currentContainer = containerID - d.executorError = "" - log.Printf("Running container, name=%s, id=%s\n", d.containerStatus.ContainerName, containerID) + d.containerID = containerID + d.containerName = cfg.ContainerName + log.Printf("Running container, name=%s, id=%s\n", d.containerName, containerID) if err = runContainer(ctx, d.client, containerID); err != nil { log.Printf("runContainer error: %s\n", err.Error()) d.state = Pending - d.containerStatus, _ = inspectContainer(d.client, containerID) - d.executorError = FindExecutorError(runnerDir) - d.currentContainer = "" - var errMessage string = d.containerStatus.Error - if d.containerStatus.OOMKilled { - errMessage = "Container killed by OOM" - } - if errMessage == "" { - lastLogs, err := getContainerLastLogs(d.client, containerID, 5) - if err == nil { - errMessage = strings.Join(lastLogs, "\n") - } else { - log.Printf("getContainerLastLogs error: %s\n", err.Error()) - } + d.containerID = "" + var errMessage string + if lastLogs, err := getContainerLastLogs(d.client, containerID, 5); err == nil { + errMessage = strings.Join(lastLogs, "\n") + } else { + log.Printf("getContainerLastLogs error: %s\n", err.Error()) + errMessage = "" } d.jobResult = JobResult{Reason: "CONTAINER_EXITED_WITH_ERROR", ReasonMessage: errMessage} return tracerr.Wrap(err) } - log.Printf("Container finished successfully, name=%s, id=%s", d.containerStatus.ContainerName, containerID) - d.containerStatus, _ = inspectContainer(d.client, containerID) - d.executorError = FindExecutorError(runnerDir) + log.Printf("Container finished successfully, name=%s, id=%s", d.containerName, containerID) d.state = Pending - d.currentContainer = "" + d.containerID = "" - jobResult := JobResult{Reason: "DONE_BY_RUNNER"} - if d.containerStatus.ExitCode != 0 { - jobResult = JobResult{Reason: "CONTAINER_EXITED_WITH_ERROR", ReasonMessage: d.containerStatus.Error} - } - d.jobResult = jobResult + d.jobResult = JobResult{Reason: "DONE_BY_RUNNER"} return nil } func (d *DockerRunner) Stop(force bool) { - if d.state == Pulling && d.currentContainer == "" { + if d.state == Pulling && d.containerID == "" { d.cancelPull() return } @@ -250,14 +210,14 @@ func (d *DockerRunner) Stop(force bool) { stopOptions.Timeout = &timeout } - err := d.client.ContainerStop(context.Background(), d.currentContainer, stopOptions) + err := d.client.ContainerStop(context.Background(), d.containerID, stopOptions) if err != nil { log.Printf("Failed to stop container: %s", err) } } -func (d *DockerRunner) GetState() (RunnerStatus, ContainerStatus, string, JobResult) { - return d.state, d.containerStatus, d.executorError, d.jobResult +func (d *DockerRunner) GetState() (RunnerStatus, JobResult) { + return d.state, d.jobResult } func getBackend(backendType string) (backends.Backend, error) { @@ -849,54 +809,3 @@ func (c *CLIArgs) MakeRunnerDir() (string, error) { } return runnerTemp, nil } - -func inspectContainer(client *docker.Client, containerID string) (ContainerStatus, error) { - inspection, err := client.ContainerInspect(context.Background(), containerID) - if err != nil { - s := ContainerStatus{} - return s, err - } - containerStatus := ContainerStatus{ - ContainerID: containerID, - ContainerName: strings.TrimLeft(inspection.Name, "/"), - Status: inspection.State.Status, - Running: inspection.State.Running, - OOMKilled: inspection.State.OOMKilled, - Dead: inspection.State.Dead, - ExitCode: inspection.State.ExitCode, - Error: inspection.State.Error, - } - return containerStatus, nil -} - -func FindExecutorError(runnerDir string) string { - filename := filepath.Join(runnerDir, consts.RunnerLogFileName) - file, err := os.Open(filename) - if err != nil { - log.Printf("Cannot open file %s: %s\n", filename, err) - return "" - } - defer file.Close() - - fileStatus, err := file.Stat() - if err != nil { - log.Printf("Cannot stat file %s: %s\n", filename, err) - return "" - } - - scanner := backscanner.New(file, int(fileStatus.Size())) - what := []byte(consts.ExecutorFailedSignature) - for { - line, _, err := scanner.LineBytes() - if err != nil { - if errors.Is(err, io.EOF) { - return "" // consts.ExecutorFailedSignature is not found in file - } - log.Printf("FindExecutorError scan error: %s\n", err) - return "" - } - if bytes.Contains(line, what) { - return string(line) - } - } -} diff --git a/src/dstack/_internal/server/schemas/runner.py b/src/dstack/_internal/server/schemas/runner.py index cf99d4d4d..2baddfb49 100644 --- a/src/dstack/_internal/server/schemas/runner.py +++ b/src/dstack/_internal/server/schemas/runner.py @@ -120,12 +120,4 @@ class JobResult(CoreModel): class PullBody(CoreModel): state: str - executor_error: Optional[str] - container_name: Optional[str] - status: Optional[str] - running: Optional[bool] - oom_killed: Optional[bool] - dead: Optional[bool] - exit_code: Optional[int] - error: Optional[str] result: Optional[JobResult]