Skip to content

Commit

Permalink
Use ray start block in Pod's entrypoint (#77)
Browse files Browse the repository at this point in the history
* use ray start block

Signed-off-by: chenk008 <[email protected]>

* add block into rayStartParams

* fix ut

* add block in sample config

* add sample without block

Co-authored-by: wuhua.ck <[email protected]>
  • Loading branch information
chenk008 and wuhua.ck authored Dec 2, 2021
1 parent 4204a41 commit 3102c53
Show file tree
Hide file tree
Showing 7 changed files with 149 additions and 34 deletions.
7 changes: 2 additions & 5 deletions ray-operator/config/samples/ray-cluster.complete.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ spec:
dashboard-host: '0.0.0.0'
num-cpus: '1' # can be auto-completed from the limits
node-ip-address: $MY_POD_IP # auto-completed as the head pod IP
block: 'true'
#pod template
template:
metadata:
Expand All @@ -43,11 +44,6 @@ spec:
containers:
- name: ray-head
image: rayproject/ray:1.8.0
# you can have any command and args here to run your code.
# the below command/args will be appended after the Ray start command and it args, and executed after Ray start.
command: ["python3"]
args:
- '/opt/sample_code.py'
env:
- name: CPU_REQUEST
valueFrom:
Expand Down Expand Up @@ -119,6 +115,7 @@ spec:
rayStartParams:
redis-password: 'LetMeInRay'
node-ip-address: $MY_POD_IP
block: 'true'
#pod template
template:
metadata:
Expand Down
6 changes: 1 addition & 5 deletions ray-operator/config/samples/ray-cluster.getting-started.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ spec:
dashboard-host: '0.0.0.0'
num-cpus: '1' # can be auto-completed from the limits
node-ip-address: $MY_POD_IP # auto-completed as the head pod IP
block: 'true'
#pod template
template:
metadata:
Expand All @@ -43,11 +44,6 @@ spec:
containers:
- name: ray-head
image: rayproject/ray:1.8.0
# you can have any command and args here to run your code.
# the below command/args will be appended after the Ray start command and it args, and executed after Ray start.
command: ["python"]
args:
- '/opt/sample_code.py'
env:
- name: MY_POD_IP
valueFrom:
Expand Down
8 changes: 3 additions & 5 deletions ray-operator/config/samples/ray-cluster.heterogeneous.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ spec:
dashboard-host: '0.0.0.0'
num-cpus: '1' # can be auto-completed from the limits
node-ip-address: $MY_POD_IP # auto-completed as the head pod IP
block: 'true'
#pod template
template:
metadata:
Expand All @@ -43,11 +44,6 @@ spec:
containers:
- name: ray-head
image: rayproject/ray:1.8.0
# you can have any command and args here to run your code.
# the below command/args will be appended after the Ray start command and it args, and executed after Ray start.
command: ["sleep"]
args:
- "infinity"
env:
- name: MY_POD_IP
valueFrom:
Expand Down Expand Up @@ -91,6 +87,7 @@ spec:
rayStartParams:
redis-password: 'LetMeInRay'
node-ip-address: $MY_POD_IP
block: 'true'
#pod template
template:
metadata:
Expand Down Expand Up @@ -165,6 +162,7 @@ spec:
rayStartParams:
redis-password: 'LetMeInRay'
node-ip-address: $MY_POD_IP
block: "true"
#pod template
template:
metadata:
Expand Down
6 changes: 1 addition & 5 deletions ray-operator/config/samples/ray-cluster.mini.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ spec:
dashboard-host: '0.0.0.0'
num-cpus: '1' # can be auto-completed from the limits
node-ip-address: $MY_POD_IP # auto-completed as the head pod IP
block: 'true'
#pod template
template:
metadata:
Expand All @@ -47,11 +48,6 @@ spec:
image: rayproject/ray:1.8.0
#image: rayproject/ray:nightly
#image: bonsaidev.azurecr.io/bonsai/lazer-0-9-0-cpu:dev
# you can have any command and args here to run your code.
# the below command/args will be appended after the Ray start command and it args, and executed after Ray start.
command: ["sleep"]
args:
- "infinity"
env:
- name: MY_POD_IP
valueFrom:
Expand Down
98 changes: 98 additions & 0 deletions ray-operator/config/samples/ray-cluster.without-block.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
apiVersion: ray.io/v1alpha1
kind: RayCluster
metadata:
labels:
controller-tools.k8s.io: "1.0"
# An unique identifier for the head node and workers of this cluster.
name: raycluster-non-block
spec:
rayVersion: '1.8.0' # should match the Ray version in the image of the containers
######################headGroupSpecs#################################
# head group template and specs, (perhaps 'group' is not needed in the name)
headGroupSpec:
# Kubernetes Service Type, valid values are 'ClusterIP', 'NodePort' and 'LoadBalancer'
serviceType: ClusterIP
# the pod replicas in this group typed head (assuming there could be more than 1 in the future)
replicas: 1
# logical group name, for this called head-group, also can be functional
# pod type head or worker
# rayNodeType: head # Not needed since it is under the headgroup
# the following params are used to complete the ray start: ray start --head --block --redis-port=6379 ...
rayStartParams:
port: '6379' # should match headService targetPort
object-manager-port: '12345'
node-manager-port: '12346'
object-store-memory: '100000000'
redis-password: 'LetMeInRay'
dashboard-host: '0.0.0.0'
num-cpus: '1' # can be auto-completed from the limits
node-ip-address: $MY_POD_IP # auto-completed as the head pod IP
#pod template
template:
metadata:
labels:
# custom labels. NOTE: do not define custom labels start with `raycluster.`, they may be used in controller.
# Refer to https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/
rayCluster: raycluster-sample # will be injected if missing
rayNodeType: head # will be injected if missing, must be head or wroker
groupName: headgroup # will be injected if missing
# annotations for pod
annotations:
key: value
spec:
containers:
- name: ray-head
image: rayproject/ray:1.8.0
# Without the `--block` flag, you can have any command and args here to run your code.
# the below command/args will be appended after the Ray start command and it args, and executed after Ray start.
command: [ "python" ]
args:
- '/opt/sample_code.py'
env:
- name: MY_POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
ports:
- containerPort: 6379
volumeMounts:
- mountPath: /opt
name: config
volumes:
# You set volumes at the Pod level, then mount them into containers inside that Pod
- name: config
configMap:
# Provide the name of the ConfigMap you want to mount.
name: ray-code-single
# An array of keys from the ConfigMap to create as files
items:
- key: sample_code.py
path: sample_code.py
######################Ray code sample#################################
# this is only an example code that is mounted into the container and executed to show the Ray cluster at work
---
apiVersion: v1
kind: ConfigMap
metadata:
name: ray-code-single
data:
sample_code.py: |
import ray
from os import environ
redis_pass = environ.get("REDIS_PASSWORD")
print("trying to connect to Ray!")
ray.init(address="auto", _redis_password=redis_pass)
print("now executing some code with Ray!")
import time
start = time.time()
@ray.remote
def f():
time.sleep(0.01)
return ray._private.services.get_node_ip_address()
values=set(ray.get([f.remote() for _ in range(1000)]))
print("Ray Nodes: ",str(values))
file = open("/tmp/ray_nodes.txt","a")
file.write("available nodes: %s\n" % str(values))
file.close()
end = time.time()
print("Execution time = ",end - start)
27 changes: 21 additions & 6 deletions ray-operator/controllers/common/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,15 +72,13 @@ func BuildPod(podTemplateSpec v1.PodTemplateSpec, rayNodeType rayiov1alpha1.RayN
Spec: podTemplateSpec.Spec,
}
index := getRayContainerIndex(pod)
cont := concatenateContainerCommand(rayNodeType, rayStartParams)

addEmptyDir(&pod.Spec.Containers[index], &pod)
cleanupInvalidVolumeMounts(&pod.Spec.Containers[index], &pod)
if len(pod.Spec.InitContainers) > index {
cleanupInvalidVolumeMounts(&pod.Spec.InitContainers[index], &pod)
}

// saving temporarily the old command and args
var cmd, args string
if len(pod.Spec.Containers[index].Command) > 0 {
cmd = convertCmdToString(pod.Spec.Containers[index].Command)
Expand All @@ -89,17 +87,23 @@ func BuildPod(podTemplateSpec v1.PodTemplateSpec, rayNodeType rayiov1alpha1.RayN
cmd += convertCmdToString(pod.Spec.Containers[index].Args)
}
if !strings.Contains(cmd, "ray start") {
cont := concatenateContainerCommand(rayNodeType, rayStartParams)
// replacing the old command
pod.Spec.Containers[index].Command = []string{"/bin/bash", "-c", "--"}
if cmd != "" {
// sleep infinity is used to keep the pod `running` after the last command exits, and not go into `completed` state
args = fmt.Sprintf("%s && %s && %s", cont, cmd, "sleep infinity")
args = fmt.Sprintf("%s && %s", cont, cmd)
} else {
args = fmt.Sprintf("%s && %s", cont, "sleep infinity")
args = cont
}

if !isRayStartWithBlock(rayStartParams) {
// sleep infinity is used to keep the pod `running` after the last command exits, and not go into `completed` state
args = args + " && sleep infinity"
}

pod.Spec.Containers[index].Args = []string{args}
}

for index := range pod.Spec.InitContainers {
setInitContainerEnvVars(&pod.Spec.InitContainers[index], svcName)
}
Expand All @@ -109,6 +113,13 @@ func BuildPod(podTemplateSpec v1.PodTemplateSpec, rayNodeType rayiov1alpha1.RayN
return pod
}

func isRayStartWithBlock(rayStartParams map[string]string) bool {
if blockValue, exist := rayStartParams["block"]; exist {
return strings.ToLower(blockValue) == "true"
}
return false
}

func convertCmdToString(cmdArr []string) (cmd string) {
cmdAggr := new(bytes.Buffer)
for _, v := range cmdArr {
Expand Down Expand Up @@ -264,7 +275,11 @@ func concatenateContainerCommand(nodeType rayiov1alpha1.RayNodeType, rayStartPar
func convertParamMap(rayStartParams map[string]string) (s string) {
flags := new(bytes.Buffer)
for k, v := range rayStartParams {
fmt.Fprintf(flags, " --%s=%s ", k, v)
if strings.ToLower(v) == "true" {
fmt.Fprintf(flags, " --%s ", k)
} else {
fmt.Fprintf(flags, " --%s=%s ", k, v)
}
}
return flags.String()
}
Expand Down
31 changes: 23 additions & 8 deletions ray-operator/controllers/common/pod_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package common
import (
"fmt"
"reflect"
"sort"
"strings"
"testing"

Expand Down Expand Up @@ -43,10 +44,8 @@ var instance = &rayiov1alpha1.RayCluster{
Spec: corev1.PodSpec{
Containers: []corev1.Container{
corev1.Container{
Name: "ray-head",
Image: "rayproject/autoscaler",
Command: []string{"python"},
Args: []string{"/opt/code.py"},
Name: "ray-head",
Image: "rayproject/autoscaler",
Env: []corev1.EnvVar{
corev1.EnvVar{
Name: "MY_POD_IP",
Expand All @@ -72,6 +71,7 @@ var instance = &rayiov1alpha1.RayCluster{
"port": "6379",
"redis-password": "LetMeInRay",
"num-cpus": "1",
"block": "true",
},
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Expand All @@ -84,10 +84,8 @@ var instance = &rayiov1alpha1.RayCluster{
Spec: corev1.PodSpec{
Containers: []corev1.Container{
corev1.Container{
Name: "ray-worker",
Image: "rayproject/autoscaler",
Command: []string{"echo"},
Args: []string{"Hello Ray"},
Name: "ray-worker",
Image: "rayproject/autoscaler",
Env: []corev1.EnvVar{
corev1.EnvVar{
Name: "MY_POD_IP",
Expand Down Expand Up @@ -141,4 +139,21 @@ func TestBuildPod(t *testing.T) {
if !reflect.DeepEqual(expectedResult, actualResult) {
t.Fatalf("Expected `%v` but got `%v`", expectedResult, actualResult)
}

expectedCommandArg := splitAndSort("ulimit -n 65536; ray start --block --num-cpus=1 --address=raycluster-sample-head-svc:6379 --port=6379 --redis-password=LetMeInRay")
if !reflect.DeepEqual(expectedCommandArg, splitAndSort(pod.Spec.Containers[0].Args[0])) {
t.Fatalf("Expected `%v` but got `%v`", expectedCommandArg, pod.Spec.Containers[0].Args)
}
}

func splitAndSort(s string) []string {
strs := strings.Split(s, " ")
result := make([]string, 0, len(strs))
for _, s := range strs {
if len(s) > 0 {
result = append(result, s)
}
}
sort.Strings(result)
return result
}

0 comments on commit 3102c53

Please sign in to comment.