Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add namespace scope to compute template operations #244

Merged
merged 8 commits into from
May 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 15 additions & 7 deletions apiserver/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,33 +6,35 @@ KubeRay APIServer provides the gRPC and HTTP API to manage kuberay resources.

### Compute Template

#### Create compute templates
#### Create compute templates in a given namespace

```
POST {{baseUrl}}/apis/v1alpha1/compute_templates
POST {{baseUrl}}/apis/v1alpha2/namespaces/<namespace>/compute_templates
```

```
{
"name": "default-template",
"namespace": "<namespace>",
"cpu": 2,
"memory": 4,
"gpu": 1,
"gpuAccelerator": "Tesla-V100"
}
```

#### List all compute templates
#### List all compute templates in a given namespace

```
GET {{baseUrl}}/apis/v1alpha1/compute_templates
GET {{baseUrl}}/apis/v1alpha2/namespaces/<namespace>/compute_templates
```

```
{
"compute_templates": [
{
"id": "",
"name": "default-template",
"namespace": "<namespace>",
"cpu": 2,
"memory": 4,
"gpu": 1,
Expand All @@ -42,16 +44,22 @@ GET {{baseUrl}}/apis/v1alpha1/compute_templates
}
```

#### List all compute templates in all namespaces

```
GET {{baseUrl}}/apis/v1alpha2/compute_templates
```

#### Get compute template by name

```
GET {{baseUrl}}/apis/v1alpha1/compute_templates/?name=<compute_template_name>
GET {{baseUrl}}/apis/v1alpha2/namespaces/<namespace>/compute_templates/<compute_template_name>
```

#### Delete compute template by name

```
DELETE {{baseUrl}}/apis/v1alpha1/compute_templates/?name=<compute_template_name>
DELETE {{baseUrl}}/apis/v1alpha2/namespaces/<namespace>/compute_templates/<compute_template_name>
```

### Clusters
Expand Down
56 changes: 39 additions & 17 deletions apiserver/pkg/manager/resource_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ type ResourceManagerInterface interface {
ListAllClusters(ctx context.Context) ([]*v1alpha1.RayCluster, error)
DeleteCluster(ctx context.Context, clusterName string, namespace string) error
CreateComputeTemplate(ctx context.Context, runtime *api.ComputeTemplate) (*v1.ConfigMap, error)
GetComputeTemplate(ctx context.Context, name string) (*v1.ConfigMap, error)
ListComputeTemplates(ctx context.Context) ([]*v1.ConfigMap, error)
DeleteComputeTemplate(ctx context.Context, name string) error
GetComputeTemplate(ctx context.Context, name string, namespace string) (*v1.ConfigMap, error)
ListComputeTemplates(ctx context.Context, namespace string) ([]*v1.ConfigMap, error)
DeleteComputeTemplate(ctx context.Context, name string, namespace string) error
}

type ResourceManager struct {
Expand Down Expand Up @@ -82,7 +82,7 @@ func (r *ResourceManager) populateComputeTemplate(ctx context.Context, cluster *
dict := map[string]*api.ComputeTemplate{}
// populate head compute template
name := cluster.ClusterSpec.HeadGroupSpec.ComputeTemplate
configMap, err := r.GetComputeTemplate(ctx, name)
configMap, err := r.GetComputeTemplate(ctx, name, cluster.Namespace)
if err != nil {
return nil, err
}
Expand All @@ -93,7 +93,7 @@ func (r *ResourceManager) populateComputeTemplate(ctx context.Context, cluster *
for _, spec := range cluster.ClusterSpec.WorkerGroupSepc {
name := spec.ComputeTemplate
if _, exist := dict[name]; !exist {
configMap, err := r.GetComputeTemplate(ctx, name)
configMap, err := r.GetComputeTemplate(ctx, name, cluster.Namespace)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -164,32 +164,32 @@ func (r *ResourceManager) DeleteCluster(ctx context.Context, clusterName string,

// Compute Runtimes
func (r *ResourceManager) CreateComputeTemplate(ctx context.Context, runtime *api.ComputeTemplate) (*v1.ConfigMap, error) {
_, err := r.GetComputeTemplate(ctx, runtime.Name)
_, err := r.GetComputeTemplate(ctx, runtime.Name, runtime.Namespace)
if err == nil {
return nil, util.NewAlreadyExistError("Compute template with name %s already exists in namespace %s", runtime.Name, DefaultNamespace)
return nil, util.NewAlreadyExistError("Compute template with name %s already exists in namespace %s", runtime.Name, runtime.Namespace)
}

computeTemplate, err := util.NewComputeTemplate(runtime, DefaultNamespace)
computeTemplate, err := util.NewComputeTemplate(runtime)
if err != nil {
return nil, util.NewInternalServerError(err, "Failed to convert compute runtime (%s/%s)", DefaultNamespace, runtime.Name)
return nil, util.NewInternalServerError(err, "Failed to convert compute runtime (%s/%s)", runtime.Namespace, runtime.Name)
}

client := r.getKubernetesConfigMapClient(DefaultNamespace)
client := r.getKubernetesConfigMapClient(runtime.Namespace)
newRuntime, err := client.Create(ctx, computeTemplate, metav1.CreateOptions{})
if err != nil {
return nil, util.NewInternalServerError(err, "Failed to create a compute runtime for (%s/%s)", DefaultNamespace, runtime.Name)
return nil, util.NewInternalServerError(err, "Failed to create a compute runtime for (%s/%s)", runtime.Namespace, runtime.Name)
}

return newRuntime, nil
}

func (r *ResourceManager) GetComputeTemplate(ctx context.Context, name string) (*v1.ConfigMap, error) {
client := r.getKubernetesConfigMapClient(DefaultNamespace)
func (r *ResourceManager) GetComputeTemplate(ctx context.Context, name string, namespace string) (*v1.ConfigMap, error) {
client := r.getKubernetesConfigMapClient(namespace)
return getComputeTemplateByName(ctx, client, name)
}

func (r *ResourceManager) ListComputeTemplates(ctx context.Context) ([]*v1.ConfigMap, error) {
client := r.getKubernetesConfigMapClient(DefaultNamespace)
func (r *ResourceManager) ListComputeTemplates(ctx context.Context, namespace string) ([]*v1.ConfigMap, error) {
client := r.getKubernetesConfigMapClient(namespace)
configMapList, err := client.List(ctx, metav1.ListOptions{LabelSelector: "ray.io/config-type=compute-template"})
if err != nil {
return nil, util.Wrap(err, "List compute runtimes failed")
Expand All @@ -204,8 +204,30 @@ func (r *ResourceManager) ListComputeTemplates(ctx context.Context) ([]*v1.Confi
return result, nil
}

func (r *ResourceManager) DeleteComputeTemplate(ctx context.Context, name string) error {
client := r.getKubernetesConfigMapClient(DefaultNamespace)
func (r *ResourceManager) ListAllComputeTemplates(ctx context.Context) ([]*v1.ConfigMap, error) {
namespaces, err := r.getKubernetesNamespaceClient().List(ctx, metav1.ListOptions{})
if err != nil {
return nil, util.Wrap(err, "Failed to fetch all Kubernetes namespaces")
}

var result []*v1.ConfigMap
for _, namespace := range namespaces.Items {
client := r.getKubernetesConfigMapClient(namespace.Name)
configMapList, err := client.List(ctx, metav1.ListOptions{LabelSelector: "ray.io/config-type=compute-template"})
if err != nil {
return nil, util.Wrap(err, fmt.Sprintf("List compute templates failed in %s", namespace.Name))
}

length := len(configMapList.Items)
for i := 0; i < length; i++ {
result = append(result, &configMapList.Items[i])
}
}
return result, nil
}

func (r *ResourceManager) DeleteComputeTemplate(ctx context.Context, name string, namespace string) error {
client := r.getKubernetesConfigMapClient(namespace)

configMap, err := getComputeTemplateByName(ctx, client, name)
if err != nil {
Expand Down
1 change: 1 addition & 0 deletions apiserver/pkg/model/converter.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ func FromKubeToAPIComputeTemplate(configMap *v1.ConfigMap) *api.ComputeTemplate

runtime := &api.ComputeTemplate{}
runtime.Name = configMap.Name
runtime.Namespace = configMap.Namespace
runtime.Cpu = uint32(cpu)
runtime.Memory = uint32(memory)
runtime.Gpu = uint32(gpu)
Expand Down
55 changes: 47 additions & 8 deletions apiserver/pkg/server/compute_template_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package server

import (
"context"
"fmt"

"github.com/ray-project/kuberay/apiserver/pkg/manager"
"github.com/ray-project/kuberay/apiserver/pkg/model"
Expand All @@ -24,48 +25,86 @@ type ComputeTemplateServer struct {

func (s *ComputeTemplateServer) CreateComputeTemplate(ctx context.Context, request *api.CreateComputeTemplateRequest) (*api.ComputeTemplate, error) {
if err := ValidateCreateComputeTemplateRequest(request); err != nil {
return nil, util.Wrap(err, "Validate compute runtime request failed.")
return nil, util.Wrap(err, "Validate compute template runtime request failed.")
}

// use the namespace in the request to override the namespace in the compute template definition
request.ComputeTemplate.Namespace = request.Namespace

runtime, err := s.resourceManager.CreateComputeTemplate(ctx, request.ComputeTemplate)
if err != nil {
return nil, util.Wrap(err, "Create Compute Runtime failed.")
return nil, util.Wrap(err, "Create compute template failed.")
}

return model.FromKubeToAPIComputeTemplate(runtime), nil
}

func (s *ComputeTemplateServer) GetComputeTemplate(ctx context.Context, request *api.GetComputeTemplateRequest) (*api.ComputeTemplate, error) {
runtime, err := s.resourceManager.GetComputeTemplate(ctx, request.Name)
if request.Name == "" {
return nil, util.NewInvalidInputError("Compute template name is empty. Please specify a valid value.")
}

if request.Namespace == "" {
return nil, util.NewInvalidInputError("Namespace is empty. Please specify a valid value.")
}

runtime, err := s.resourceManager.GetComputeTemplate(ctx, request.Name, request.Namespace)
if err != nil {
return nil, util.Wrap(err, "Get cluster runtime failed.")
return nil, util.Wrap(err, "Get compute template failed.")
}

return model.FromKubeToAPIComputeTemplate(runtime), nil
}

func (s *ComputeTemplateServer) ListComputeTemplates(ctx context.Context, request *api.ListComputeTemplatesRequest) (*api.ListComputeTemplatesResponse, error) {
runtimes, err := s.resourceManager.ListComputeTemplates(ctx)
if request.Namespace == "" {
return nil, util.NewInvalidInputError("Namespace is empty. Please specify a valid value.")
}

runtimes, err := s.resourceManager.ListComputeTemplates(ctx, request.Namespace)
if err != nil {
return nil, util.Wrap(err, "List cluster runtime failed.")
return nil, util.Wrap(err, fmt.Sprintf("List compute templates in namespace %s failed.", request.Namespace))
}

return &api.ListComputeTemplatesResponse{
ComputeTemplates: model.FromKubeToAPIComputeTemplates(runtimes),
}, nil
}

func (s *ComputeTemplateServer) ListAllComputeTemplates(ctx context.Context, request *api.ListAllComputeTemplatesRequest) (*api.ListAllComputeTemplatesResponse, error) {
runtimes, err := s.resourceManager.ListAllComputeTemplates(ctx)
if err != nil {
return nil, util.Wrap(err, "List all compute templates from all namespaces failed.")
}

return &api.ListAllComputeTemplatesResponse{
ComputeTemplates: model.FromKubeToAPIComputeTemplates(runtimes),
}, nil
}

func (s *ComputeTemplateServer) DeleteComputeTemplate(ctx context.Context, request *api.DeleteComputeTemplateRequest) (*emptypb.Empty, error) {
if err := s.resourceManager.DeleteComputeTemplate(ctx, request.Name); err != nil {
if request.Name == "" {
return nil, util.NewInvalidInputError("Compute template name is empty. Please specify a valid value.")
}

if request.Namespace == "" {
return nil, util.NewInvalidInputError("Namespace is empty. Please specify a valid value.")
}

if err := s.resourceManager.DeleteComputeTemplate(ctx, request.Name, request.Namespace); err != nil {
return nil, err
}

return &emptypb.Empty{}, nil
}

func ValidateCreateComputeTemplateRequest(request *api.CreateComputeTemplateRequest) error {
if request.Namespace == "" {
return util.NewInvalidInputError("Namespace is empty. Please specify a valid value.")
}

if request.ComputeTemplate.Name == "" {
return util.NewInvalidInputError("Cluster name is empty. Please specify a valid value.")
return util.NewInvalidInputError("Compute template name is empty. Please specify a valid value.")
}

if request.ComputeTemplate.Cpu == 0 {
Expand Down
5 changes: 3 additions & 2 deletions apiserver/pkg/util/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -320,18 +320,19 @@ func (c *RayCluster) SetAnnotationsToAllTemplates(key string, value string) {
// TODO: reserved for common parameters.
}

func NewComputeTemplate(runtime *api.ComputeTemplate, namespace string) (*v1.ConfigMap, error) {
func NewComputeTemplate(runtime *api.ComputeTemplate) (*v1.ConfigMap, error) {
config := &v1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: runtime.Name,
Namespace: namespace,
Namespace: runtime.Namespace,
Labels: map[string]string{
"ray.io/config-type": "compute-template",
"ray.io/compute-template": runtime.Name,
},
},
Data: map[string]string{
"name": runtime.Name,
"namespace": runtime.Namespace,
"cpu": strconv.FormatUint(uint64(runtime.Cpu), 10),
"memory": strconv.FormatUint(uint64(runtime.Memory), 10),
"gpu": strconv.FormatUint(uint64(runtime.Gpu), 10),
Expand Down
13 changes: 7 additions & 6 deletions cli/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,17 +95,18 @@ Flags:
--gpu-accelerator string GPU Accelerator type
--memory uint32 ray pod memory in GB (default 1)
--name string name of the compute template
-n, --namespace string kubernetes namespace where the compute template will be stored

```

#### Get a Ray Compute Template
`./kuberay template compute get <compute template name>`
`./kuberay template compute get -n <namespace> <compute template name>`

#### List Ray Compute Templates
`./kuberay template compute list`
`./kuberay template compute list -n <namespace>`

#### Delete a Ray Compute Template
`./kuberay template compute delete <compute template name>`
`./kuberay template compute delete -n <namespace> <compute template name>`

## End to end example

Expand All @@ -119,8 +120,8 @@ kubectl port-forward svc/kuberay-apiserver-service 8887:8887 -n ray-system
Create compute templates

```
./kuberay template compute create --cpu 2 --memory 4 --name "worker-template"
./kuberay template compute create --cpu 1 --memory 2 --name "head-template"
./kuberay template compute create -n <namespace> --cpu 2 --memory 4 --name "worker-template"
./kuberay template compute create -n <namespace> --cpu 1 --memory 2 --name "head-template"
```

List compute templates created
Expand All @@ -132,7 +133,7 @@ List compute templates created
Create the cluster

```
./kuberay cluster create --name test-cluster --user jiaxin.shan \
./kuberay cluster create -n <namespace> --name test-cluster --user jiaxin.shan \
--head-compute-template head-template \
--head-image rayproject/ray:1.9.2 \
--worker-group-name small-wg \
Expand Down
10 changes: 8 additions & 2 deletions cli/pkg/cmd/template/compute/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (

type CreateOptions struct {
name string
namespace string
cpu uint32
memory uint32
gpu uint32
Expand All @@ -31,12 +32,15 @@ func newCmdCreate() *cobra.Command {
return createComputeTemplate(opts)
},
}

cmd.Flags().StringVarP(&opts.namespace, "namespace", "n", "", "kubernetes namespace where the compute template will be stored")
cmd.Flags().StringVar(&opts.name, "name", "", "name of the compute template")
cmd.Flags().Uint32Var(&opts.cpu, "cpu", 1, "ray pod CPU")
cmd.Flags().Uint32Var(&opts.memory, "memory", 1, "ray pod memory in GB")
cmd.Flags().Uint32Var(&opts.gpu, "gpu", 0, "ray head GPU")
cmd.Flags().StringVar(&opts.gpuAccelerator, "gpu-accelerator", "", "GPU Accelerator type")
if err := cmd.MarkFlagRequired("namespace"); err != nil {
klog.Warning(err)
}
if err := cmd.MarkFlagRequired("name"); err != nil {
klog.Warning(err)
}
Expand All @@ -58,19 +62,21 @@ func createComputeTemplate(opts CreateOptions) error {

computeTemplate := &go_client.ComputeTemplate{
Name: opts.name,
Namespace: opts.namespace,
Cpu: opts.cpu,
Memory: opts.memory,
Gpu: opts.gpu,
GpuAccelerator: opts.gpuAccelerator,
}

r, err := client.CreateComputeTemplate(ctx, &go_client.CreateComputeTemplateRequest{
Namespace: opts.namespace,
ComputeTemplate: computeTemplate,
})
if err != nil {
log.Fatalf("could not create compute template %v", err)
}

log.Printf("compute template %v is created", r.Id)
log.Printf("compute template %v has been created in %v", r.Name, r.Namespace)
return nil
}
Loading