Skip to content
This repository has been archived by the owner on Feb 3, 2021. It is now read-only.

Feature: Spark retry docker pull #672

Merged
merged 4 commits into from
Oct 24, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions aztk/client/cluster/helpers/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,9 @@ def create_pool_and_job_and_table(
:param VmImageModel: the type of image to provision for the cluster
:param wait: wait until the cluster is ready
"""
# update storage with the necessary values
# save cluster configuration in storage
core_cluster_operations.get_cluster_data(cluster_conf.cluster_id).save_cluster_config(cluster_conf)

if cluster_conf.scheduling_target != models.SchedulingTarget.Any:
core_cluster_operations.create_task_table(cluster_conf.cluster_id)

# reuse pool_id as job_id
pool_id = cluster_conf.cluster_id
job_id = cluster_conf.cluster_id
Expand Down Expand Up @@ -71,4 +68,8 @@ def create_pool_and_job_and_table(
# Add job to batch
core_cluster_operations.batch_client.job.add(job)

# create storage task table
if cluster_conf.scheduling_target != models.SchedulingTarget.Any:
core_cluster_operations.create_task_table(cluster_conf.cluster_id)

return helpers.get_cluster(cluster_conf.cluster_id, core_cluster_operations.batch_client)
6 changes: 3 additions & 3 deletions aztk/client/job/helpers/submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,6 @@ def submit_job(
core_job_operations.get_cluster_data(job_configuration.id).save_cluster_config(
job_configuration.to_cluster_config())

if job_configuration.scheduling_target != models.SchedulingTarget.Any:
core_job_operations.create_task_table(job_configuration.id)

# get a verified node agent sku
sku_to_use, image_ref_to_use = helpers.select_latest_verified_vm_image_with_node_agent_sku(
vm_image_model.publisher, vm_image_model.offer, vm_image_model.sku, core_job_operations.batch_client)
Expand Down Expand Up @@ -84,4 +81,7 @@ def submit_job(

core_job_operations.batch_client.job_schedule.add(setup)

if job_configuration.scheduling_target != models.SchedulingTarget.Any:
core_job_operations.create_task_table(job_configuration.id)

return core_job_operations.batch_client.job_schedule.get(job_schedule_id=job_configuration.id)
14 changes: 11 additions & 3 deletions aztk/node_scripts/setup_host.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,11 @@ install_prerequisites () {

install_docker_compose () {
echo "Installing Docker-Compose"
for i in {1..5}; do
sudo curl -L https://github.com/docker/compose/releases/download/1.19.0/docker-compose-`uname -s`-`uname -m` -o /usr/local/bin/docker-compose && break || sleep 2;
url=https://github.com/docker/compose/releases/download/1.19.0/docker-compose-`uname -s`-`uname -m`
for i in {1..5}; do
sudo curl -L $url -o /usr/local/bin/docker-compose && break ||
echo "ERROR: failed to download docker-compose ... retrying in $($i**2) seconds" &&
sleep $i**2;
done
sudo chmod +x /usr/local/bin/docker-compose
echo "Finished installing Docker-Compose"
Expand All @@ -59,7 +62,12 @@ pull_docker_container () {
docker login $DOCKER_ENDPOINT --username $DOCKER_USERNAME --password $DOCKER_PASSWORD
fi

docker pull $docker_repo_name

for i in {1..5}; do
docker pull $docker_repo_name && break ||
echo "ERROR: docker pull $docker_repo_name failed ... retrying after $($i**2) seconds" &&
sleep $i**2;
done
echo "Finished pulling $docker_repo_name"
}

Expand Down