Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use ray start block in Pod's entrypoint #77

Merged
merged 7 commits into from
Dec 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions ray-operator/config/samples/ray-cluster.complete.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ spec:
dashboard-host: '0.0.0.0'
num-cpus: '1' # can be auto-completed from the limits
node-ip-address: $MY_POD_IP # auto-completed as the head pod IP
block: 'true'
#pod template
template:
metadata:
Expand All @@ -43,11 +44,6 @@ spec:
containers:
- name: ray-head
image: rayproject/ray:1.8.0
# you can have any command and args here to run your code.
Jeffwan marked this conversation as resolved.
Show resolved Hide resolved
# the below command/args will be appended after the Ray start command and it args, and executed after Ray start.
command: ["python3"]
args:
- '/opt/sample_code.py'
env:
- name: CPU_REQUEST
valueFrom:
Expand Down Expand Up @@ -119,6 +115,7 @@ spec:
rayStartParams:
redis-password: 'LetMeInRay'
node-ip-address: $MY_POD_IP
block: 'true'
#pod template
template:
metadata:
Expand Down
6 changes: 1 addition & 5 deletions ray-operator/config/samples/ray-cluster.getting-started.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ spec:
dashboard-host: '0.0.0.0'
num-cpus: '1' # can be auto-completed from the limits
node-ip-address: $MY_POD_IP # auto-completed as the head pod IP
block: 'true'
#pod template
template:
metadata:
Expand All @@ -43,11 +44,6 @@ spec:
containers:
- name: ray-head
image: rayproject/ray:1.8.0
# you can have any command and args here to run your code.
# the below command/args will be appended after the Ray start command and it args, and executed after Ray start.
command: ["python"]
args:
- '/opt/sample_code.py'
env:
- name: MY_POD_IP
valueFrom:
Expand Down
8 changes: 3 additions & 5 deletions ray-operator/config/samples/ray-cluster.heterogeneous.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ spec:
dashboard-host: '0.0.0.0'
num-cpus: '1' # can be auto-completed from the limits
node-ip-address: $MY_POD_IP # auto-completed as the head pod IP
block: 'true'
#pod template
template:
metadata:
Expand All @@ -43,11 +44,6 @@ spec:
containers:
- name: ray-head
image: rayproject/ray:1.8.0
# you can have any command and args here to run your code.
# the below command/args will be appended after the Ray start command and it args, and executed after Ray start.
command: ["sleep"]
args:
- "infinity"
env:
- name: MY_POD_IP
valueFrom:
Expand Down Expand Up @@ -91,6 +87,7 @@ spec:
rayStartParams:
redis-password: 'LetMeInRay'
node-ip-address: $MY_POD_IP
block: 'true'
#pod template
template:
metadata:
Expand Down Expand Up @@ -165,6 +162,7 @@ spec:
rayStartParams:
redis-password: 'LetMeInRay'
node-ip-address: $MY_POD_IP
block: "true"
#pod template
template:
metadata:
Expand Down
6 changes: 1 addition & 5 deletions ray-operator/config/samples/ray-cluster.mini.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ spec:
dashboard-host: '0.0.0.0'
num-cpus: '1' # can be auto-completed from the limits
node-ip-address: $MY_POD_IP # auto-completed as the head pod IP
block: 'true'
#pod template
template:
metadata:
Expand All @@ -47,11 +48,6 @@ spec:
image: rayproject/ray:1.8.0
#image: rayproject/ray:nightly
#image: bonsaidev.azurecr.io/bonsai/lazer-0-9-0-cpu:dev
# you can have any command and args here to run your code.
# the below command/args will be appended after the Ray start command and it args, and executed after Ray start.
command: ["sleep"]
args:
- "infinity"
env:
- name: MY_POD_IP
valueFrom:
Expand Down
98 changes: 98 additions & 0 deletions ray-operator/config/samples/ray-cluster.without-block.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
apiVersion: ray.io/v1alpha1
kind: RayCluster
metadata:
labels:
controller-tools.k8s.io: "1.0"
# An unique identifier for the head node and workers of this cluster.
name: raycluster-non-block
spec:
rayVersion: '1.8.0' # should match the Ray version in the image of the containers
######################headGroupSpecs#################################
# head group template and specs, (perhaps 'group' is not needed in the name)
headGroupSpec:
# Kubernetes Service Type, valid values are 'ClusterIP', 'NodePort' and 'LoadBalancer'
serviceType: ClusterIP
# the pod replicas in this group typed head (assuming there could be more than 1 in the future)
replicas: 1
# logical group name, for this called head-group, also can be functional
# pod type head or worker
# rayNodeType: head # Not needed since it is under the headgroup
# the following params are used to complete the ray start: ray start --head --block --redis-port=6379 ...
rayStartParams:
port: '6379' # should match headService targetPort
object-manager-port: '12345'
node-manager-port: '12346'
object-store-memory: '100000000'
redis-password: 'LetMeInRay'
dashboard-host: '0.0.0.0'
num-cpus: '1' # can be auto-completed from the limits
node-ip-address: $MY_POD_IP # auto-completed as the head pod IP
#pod template
template:
metadata:
labels:
# custom labels. NOTE: do not define custom labels start with `raycluster.`, they may be used in controller.
# Refer to https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/
rayCluster: raycluster-sample # will be injected if missing
rayNodeType: head # will be injected if missing, must be head or wroker
groupName: headgroup # will be injected if missing
# annotations for pod
annotations:
key: value
spec:
containers:
- name: ray-head
image: rayproject/ray:1.8.0
# Without the `--block` flag, you can have any command and args here to run your code.
# the below command/args will be appended after the Ray start command and it args, and executed after Ray start.
command: [ "python" ]
args:
- '/opt/sample_code.py'
env:
- name: MY_POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
ports:
- containerPort: 6379
volumeMounts:
- mountPath: /opt
name: config
volumes:
# You set volumes at the Pod level, then mount them into containers inside that Pod
- name: config
configMap:
# Provide the name of the ConfigMap you want to mount.
name: ray-code-single
# An array of keys from the ConfigMap to create as files
items:
- key: sample_code.py
path: sample_code.py
######################Ray code sample#################################
# this is only an example code that is mounted into the container and executed to show the Ray cluster at work
---
apiVersion: v1
kind: ConfigMap
metadata:
name: ray-code-single
data:
sample_code.py: |
import ray
from os import environ
redis_pass = environ.get("REDIS_PASSWORD")
print("trying to connect to Ray!")
ray.init(address="auto", _redis_password=redis_pass)
print("now executing some code with Ray!")
import time
start = time.time()
@ray.remote
def f():
time.sleep(0.01)
return ray._private.services.get_node_ip_address()
values=set(ray.get([f.remote() for _ in range(1000)]))
print("Ray Nodes: ",str(values))
file = open("/tmp/ray_nodes.txt","a")
file.write("available nodes: %s\n" % str(values))
file.close()
end = time.time()
print("Execution time = ",end - start)
27 changes: 21 additions & 6 deletions ray-operator/controllers/common/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,15 +72,13 @@ func BuildPod(podTemplateSpec v1.PodTemplateSpec, rayNodeType rayiov1alpha1.RayN
Spec: podTemplateSpec.Spec,
}
index := getRayContainerIndex(pod)
cont := concatenateContainerCommand(rayNodeType, rayStartParams)

addEmptyDir(&pod.Spec.Containers[index], &pod)
cleanupInvalidVolumeMounts(&pod.Spec.Containers[index], &pod)
if len(pod.Spec.InitContainers) > index {
cleanupInvalidVolumeMounts(&pod.Spec.InitContainers[index], &pod)
}

// saving temporarily the old command and args
var cmd, args string
if len(pod.Spec.Containers[index].Command) > 0 {
cmd = convertCmdToString(pod.Spec.Containers[index].Command)
Expand All @@ -89,17 +87,23 @@ func BuildPod(podTemplateSpec v1.PodTemplateSpec, rayNodeType rayiov1alpha1.RayN
cmd += convertCmdToString(pod.Spec.Containers[index].Args)
}
if !strings.Contains(cmd, "ray start") {
cont := concatenateContainerCommand(rayNodeType, rayStartParams)
// replacing the old command
pod.Spec.Containers[index].Command = []string{"/bin/bash", "-c", "--"}
if cmd != "" {
// sleep infinity is used to keep the pod `running` after the last command exits, and not go into `completed` state
args = fmt.Sprintf("%s && %s && %s", cont, cmd, "sleep infinity")
args = fmt.Sprintf("%s && %s", cont, cmd)
} else {
args = fmt.Sprintf("%s && %s", cont, "sleep infinity")
args = cont
}

if !isRayStartWithBlock(rayStartParams) {
// sleep infinity is used to keep the pod `running` after the last command exits, and not go into `completed` state
args = args + " && sleep infinity"
}

pod.Spec.Containers[index].Args = []string{args}
}

for index := range pod.Spec.InitContainers {
setInitContainerEnvVars(&pod.Spec.InitContainers[index], svcName)
}
Expand All @@ -109,6 +113,13 @@ func BuildPod(podTemplateSpec v1.PodTemplateSpec, rayNodeType rayiov1alpha1.RayN
return pod
}

func isRayStartWithBlock(rayStartParams map[string]string) bool {
if blockValue, exist := rayStartParams["block"]; exist {
return strings.ToLower(blockValue) == "true"
}
return false
}

func convertCmdToString(cmdArr []string) (cmd string) {
cmdAggr := new(bytes.Buffer)
for _, v := range cmdArr {
Expand Down Expand Up @@ -264,7 +275,11 @@ func concatenateContainerCommand(nodeType rayiov1alpha1.RayNodeType, rayStartPar
func convertParamMap(rayStartParams map[string]string) (s string) {
flags := new(bytes.Buffer)
for k, v := range rayStartParams {
fmt.Fprintf(flags, " --%s=%s ", k, v)
if strings.ToLower(v) == "true" {
fmt.Fprintf(flags, " --%s ", k)
} else {
fmt.Fprintf(flags, " --%s=%s ", k, v)
}
}
return flags.String()
}
Expand Down
31 changes: 23 additions & 8 deletions ray-operator/controllers/common/pod_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package common
import (
"fmt"
"reflect"
"sort"
"strings"
"testing"

Expand Down Expand Up @@ -43,10 +44,8 @@ var instance = &rayiov1alpha1.RayCluster{
Spec: corev1.PodSpec{
Containers: []corev1.Container{
corev1.Container{
Name: "ray-head",
Image: "rayproject/autoscaler",
Command: []string{"python"},
Args: []string{"/opt/code.py"},
Name: "ray-head",
Image: "rayproject/autoscaler",
Env: []corev1.EnvVar{
corev1.EnvVar{
Name: "MY_POD_IP",
Expand All @@ -72,6 +71,7 @@ var instance = &rayiov1alpha1.RayCluster{
"port": "6379",
"redis-password": "LetMeInRay",
"num-cpus": "1",
"block": "true",
},
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Expand All @@ -84,10 +84,8 @@ var instance = &rayiov1alpha1.RayCluster{
Spec: corev1.PodSpec{
Containers: []corev1.Container{
corev1.Container{
Name: "ray-worker",
Image: "rayproject/autoscaler",
Command: []string{"echo"},
Args: []string{"Hello Ray"},
Name: "ray-worker",
Image: "rayproject/autoscaler",
Env: []corev1.EnvVar{
corev1.EnvVar{
Name: "MY_POD_IP",
Expand Down Expand Up @@ -141,4 +139,21 @@ func TestBuildPod(t *testing.T) {
if !reflect.DeepEqual(expectedResult, actualResult) {
t.Fatalf("Expected `%v` but got `%v`", expectedResult, actualResult)
}

expectedCommandArg := splitAndSort("ulimit -n 65536; ray start --block --num-cpus=1 --address=raycluster-sample-head-svc:6379 --port=6379 --redis-password=LetMeInRay")
if !reflect.DeepEqual(expectedCommandArg, splitAndSort(pod.Spec.Containers[0].Args[0])) {
t.Fatalf("Expected `%v` but got `%v`", expectedCommandArg, pod.Spec.Containers[0].Args)
}
}

func splitAndSort(s string) []string {
strs := strings.Split(s, " ")
result := make([]string, 0, len(strs))
for _, s := range strs {
if len(s) > 0 {
result = append(result, s)
}
}
sort.Strings(result)
return result
}