From 44088499ad43f4251423ff08926e72d26bbd378b Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Mon, 10 Nov 2014 16:53:05 -0500 Subject: [PATCH 1/9] Use pssh. Replace bash-isms with pssh to neatly parallelize cluster operations. Also, decrease questionably high sleep times. --- setup.sh | 63 +++++++++++++++----------------------------------------- 1 file changed, 17 insertions(+), 46 deletions(-) diff --git a/setup.sh b/setup.sh index e040661..406efe0 100755 --- a/setup.sh +++ b/setup.sh @@ -1,5 +1,7 @@ #!/bin/bash +yum install -y pssh + # Make sure we are in the spark-ec2 directory cd /root/spark-ec2 @@ -42,60 +44,29 @@ fi echo "Setting executable permissions on scripts..." find . -regex "^.+.\(sh\|py\)" | xargs chmod a+x -echo "Running setup-slave on master to mount filesystems, etc..." -source ./setup-slave.sh - -echo "SSH'ing to master machine(s) to approve key(s)..." -for master in $MASTERS; do - echo $master - ssh $SSH_OPTS $master echo -n & - sleep 0.3 -done -ssh $SSH_OPTS localhost echo -n & -ssh $SSH_OPTS `hostname` echo -n & -wait - -# Try to SSH to each cluster node to approve their key. Since some nodes may -# be slow in starting, we retry failed slaves up to 3 times. -TODO="$SLAVES $OTHER_MASTERS" # List of nodes to try (initially all) -TRIES="0" # Number of times we've tried so far -echo "SSH'ing to other cluster nodes to approve keys..." -while [ "e$TODO" != "e" ] && [ $TRIES -lt 4 ] ; do - NEW_TODO= - for slave in $TODO; do - echo $slave - ssh $SSH_OPTS $slave echo -n - if [ $? != 0 ] ; then - NEW_TODO="$NEW_TODO $slave" - fi - done - TRIES=$[$TRIES + 1] - if [ "e$NEW_TODO" != "e" ] && [ $TRIES -lt 4 ] ; then - sleep 15 - TODO="$NEW_TODO" - echo "Re-attempting SSH to cluster nodes to approve keys..." - else - break; - fi -done +echo "SSH-ing to all cluster nodes to approve keys..." +pssh --inline \ + --host "localhost $MASTERS $SLAVES" \ + --user root \ + --extra-args "$SSH_OPTS" \ + ":" echo "RSYNC'ing /root/spark-ec2 to other cluster nodes..." for node in $SLAVES $OTHER_MASTERS; do echo $node rsync -e "ssh $SSH_OPTS" -az /root/spark-ec2 $node:/root & scp $SSH_OPTS ~/.ssh/id_rsa $node:.ssh & - sleep 0.3 + sleep 0.1 done wait -# NOTE: We need to rsync spark-ec2 before we can run setup-slave.sh -# on other cluster nodes -echo "Running slave setup script on other cluster nodes..." -for node in $SLAVES $OTHER_MASTERS; do - echo $node - ssh -t -t $SSH_OPTS root@$node "spark-ec2/setup-slave.sh" & sleep 0.3 -done -wait +echo "Running setup-slave on all cluster nodes to mount filesystems, etc..." +pssh --inline \ + --host "$MASTERS $SLAVES" \ + --user root \ + --extra-args "-t -t $SSH_OPTS" \ + "spark-ec2/setup-slave.sh" + # Always include 'scala' module if it's not defined as a work around # for older versions of the scripts. @@ -126,6 +97,6 @@ chmod u+x /root/spark/conf/spark-env.sh for module in $MODULES; do echo "Setting up $module" source ./$module/setup.sh - sleep 1 + sleep 0.1 cd /root/spark-ec2 # guard against setup.sh changing the cwd done From 06bf4d18031a71b87e3b7a6c8c2fe10452ec1906 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Tue, 11 Nov 2014 15:31:08 -0500 Subject: [PATCH 2/9] Approve key to local hostname. --- setup.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.sh b/setup.sh index 406efe0..bd8fedc 100755 --- a/setup.sh +++ b/setup.sh @@ -46,7 +46,7 @@ find . -regex "^.+.\(sh\|py\)" | xargs chmod a+x echo "SSH-ing to all cluster nodes to approve keys..." pssh --inline \ - --host "localhost $MASTERS $SLAVES" \ + --host "localhost $(hostname) $MASTERS $SLAVES" \ --user root \ --extra-args "$SSH_OPTS" \ ":" From c12f4e0e77cebb17ff432999dc16071ade296c8e Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Sun, 16 Nov 2014 23:02:08 -0500 Subject: [PATCH 3/9] approve keys twice --- setup.sh | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/setup.sh b/setup.sh index bd8fedc..bf35bb6 100755 --- a/setup.sh +++ b/setup.sh @@ -11,6 +11,14 @@ source /root/.bash_profile # Load the cluster variables set by the deploy script source ec2-variables.sh +function approve_ssh_keys () { + pssh --inline \ + --host "localhost $(hostname) $MASTERS $SLAVES" \ + --user root \ + --extra-args "$SSH_OPTS" \ + ":" +} + # Set hostname based on EC2 private DNS name, so that it is set correctly # even if the instance is restarted with a different private DNS name PRIVATE_DNS=`wget -q -O - http://instance-data.ec2.internal/latest/meta-data/local-hostname` @@ -45,11 +53,7 @@ echo "Setting executable permissions on scripts..." find . -regex "^.+.\(sh\|py\)" | xargs chmod a+x echo "SSH-ing to all cluster nodes to approve keys..." -pssh --inline \ - --host "localhost $(hostname) $MASTERS $SLAVES" \ - --user root \ - --extra-args "$SSH_OPTS" \ - ":" +approve_ssh_keys echo "RSYNC'ing /root/spark-ec2 to other cluster nodes..." for node in $SLAVES $OTHER_MASTERS; do @@ -67,6 +71,9 @@ pssh --inline \ --extra-args "-t -t $SSH_OPTS" \ "spark-ec2/setup-slave.sh" +echo "SSH-ing to all cluster nodes to re-approve keys..." +# We do this again because setup-slave.sh clears out .ssh/known_hosts. +approve_ssh_keys # Always include 'scala' module if it's not defined as a work around # for older versions of the scripts. From 6af05df0d9409ab9a752f7d02bc5e1f22b4e8480 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Tue, 18 Nov 2014 12:29:40 -0500 Subject: [PATCH 4/9] time the pssh calls --- setup.sh | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/setup.sh b/setup.sh index bf35bb6..40c84e2 100755 --- a/setup.sh +++ b/setup.sh @@ -12,11 +12,14 @@ source /root/.bash_profile source ec2-variables.sh function approve_ssh_keys () { - pssh --inline \ - --host "localhost $(hostname) $MASTERS $SLAVES" \ - --user root \ - --extra-args "$SSH_OPTS" \ - ":" + time { + pssh --inline \ + --host "localhost $(hostname) $MASTERS $SLAVES" \ + --user root \ + --extra-args "$SSH_OPTS" \ + ":" + } + echo " ✝ approve_ssh_keys" } # Set hostname based on EC2 private DNS name, so that it is set correctly @@ -65,11 +68,14 @@ done wait echo "Running setup-slave on all cluster nodes to mount filesystems, etc..." -pssh --inline \ - --host "$MASTERS $SLAVES" \ - --user root \ - --extra-args "-t -t $SSH_OPTS" \ - "spark-ec2/setup-slave.sh" +time { + pssh --inline \ + --host "$MASTERS $SLAVES" \ + --user root \ + --extra-args "-t -t $SSH_OPTS" \ + "spark-ec2/setup-slave.sh" +} +echo " ✝ setup-slave" echo "SSH-ing to all cluster nodes to re-approve keys..." # We do this again because setup-slave.sh clears out .ssh/known_hosts. From 3fa084590e408b2cf36200d35d06b2ea7ae969af Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Thu, 27 Nov 2014 17:00:28 -0500 Subject: [PATCH 5/9] Revert "time the pssh calls" This reverts commit d9333af0a21d1aff3216471823344f326ef84cfb. --- setup.sh | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/setup.sh b/setup.sh index 40c84e2..bf35bb6 100755 --- a/setup.sh +++ b/setup.sh @@ -12,14 +12,11 @@ source /root/.bash_profile source ec2-variables.sh function approve_ssh_keys () { - time { - pssh --inline \ - --host "localhost $(hostname) $MASTERS $SLAVES" \ - --user root \ - --extra-args "$SSH_OPTS" \ - ":" - } - echo " ✝ approve_ssh_keys" + pssh --inline \ + --host "localhost $(hostname) $MASTERS $SLAVES" \ + --user root \ + --extra-args "$SSH_OPTS" \ + ":" } # Set hostname based on EC2 private DNS name, so that it is set correctly @@ -68,14 +65,11 @@ done wait echo "Running setup-slave on all cluster nodes to mount filesystems, etc..." -time { - pssh --inline \ - --host "$MASTERS $SLAVES" \ - --user root \ - --extra-args "-t -t $SSH_OPTS" \ - "spark-ec2/setup-slave.sh" -} -echo " ✝ setup-slave" +pssh --inline \ + --host "$MASTERS $SLAVES" \ + --user root \ + --extra-args "-t -t $SSH_OPTS" \ + "spark-ec2/setup-slave.sh" echo "SSH-ing to all cluster nodes to re-approve keys..." # We do this again because setup-slave.sh clears out .ssh/known_hosts. From d82356c958a0fb3c823a2e7b964e6ea9e3dd75e1 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Mon, 22 Dec 2014 22:00:03 -0500 Subject: [PATCH 6/9] test removing ssh pre-approval --- setup.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/setup.sh b/setup.sh index bf35bb6..cdacfc0 100755 --- a/setup.sh +++ b/setup.sh @@ -52,8 +52,8 @@ fi echo "Setting executable permissions on scripts..." find . -regex "^.+.\(sh\|py\)" | xargs chmod a+x -echo "SSH-ing to all cluster nodes to approve keys..." -approve_ssh_keys +# echo "SSH-ing to all cluster nodes to approve keys..." +# approve_ssh_keys echo "RSYNC'ing /root/spark-ec2 to other cluster nodes..." for node in $SLAVES $OTHER_MASTERS; do @@ -71,9 +71,9 @@ pssh --inline \ --extra-args "-t -t $SSH_OPTS" \ "spark-ec2/setup-slave.sh" -echo "SSH-ing to all cluster nodes to re-approve keys..." +# echo "SSH-ing to all cluster nodes to re-approve keys..." # We do this again because setup-slave.sh clears out .ssh/known_hosts. -approve_ssh_keys +# approve_ssh_keys # Always include 'scala' module if it's not defined as a work around # for older versions of the scripts. From af29aa835f3bd6788a58653c075b76f38254efd8 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Tue, 23 Dec 2014 22:34:18 -0500 Subject: [PATCH 7/9] use pushd/popd --- setup-slave.sh | 4 +++- setup.sh | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/setup-slave.sh b/setup-slave.sh index 16e231c..fe73947 100755 --- a/setup-slave.sh +++ b/setup-slave.sh @@ -8,7 +8,7 @@ if [[ -e /sys/kernel/mm/transparent_hugepage/enabled ]]; then fi # Make sure we are in the spark-ec2 directory -cd /root/spark-ec2 +pushd /root/spark-ec2 source ec2-variables.sh @@ -114,3 +114,5 @@ cat /root/spark-ec2/github.hostkey >> /root/.ssh/known_hosts echo '#!/bin/bash' > /usr/bin/realpath echo 'readlink -e "$@"' >> /usr/bin/realpath chmod a+x /usr/bin/realpath + +popd diff --git a/setup.sh b/setup.sh index cdacfc0..ab7758b 100755 --- a/setup.sh +++ b/setup.sh @@ -3,7 +3,7 @@ yum install -y pssh # Make sure we are in the spark-ec2 directory -cd /root/spark-ec2 +pushd /root/spark-ec2 # Load the environment variables specific to this AMI source /root/.bash_profile @@ -107,3 +107,5 @@ for module in $MODULES; do sleep 0.1 cd /root/spark-ec2 # guard against setup.sh changing the cwd done + +popd From 4d76809f3d7a4d53eaafd6cb63ff8e9751ad7a94 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Tue, 23 Dec 2014 22:34:57 -0500 Subject: [PATCH 8/9] remove unused ssh pre-approval code --- setup.sh | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/setup.sh b/setup.sh index ab7758b..08fc753 100755 --- a/setup.sh +++ b/setup.sh @@ -11,14 +11,6 @@ source /root/.bash_profile # Load the cluster variables set by the deploy script source ec2-variables.sh -function approve_ssh_keys () { - pssh --inline \ - --host "localhost $(hostname) $MASTERS $SLAVES" \ - --user root \ - --extra-args "$SSH_OPTS" \ - ":" -} - # Set hostname based on EC2 private DNS name, so that it is set correctly # even if the instance is restarted with a different private DNS name PRIVATE_DNS=`wget -q -O - http://instance-data.ec2.internal/latest/meta-data/local-hostname` @@ -52,9 +44,6 @@ fi echo "Setting executable permissions on scripts..." find . -regex "^.+.\(sh\|py\)" | xargs chmod a+x -# echo "SSH-ing to all cluster nodes to approve keys..." -# approve_ssh_keys - echo "RSYNC'ing /root/spark-ec2 to other cluster nodes..." for node in $SLAVES $OTHER_MASTERS; do echo $node @@ -71,10 +60,6 @@ pssh --inline \ --extra-args "-t -t $SSH_OPTS" \ "spark-ec2/setup-slave.sh" -# echo "SSH-ing to all cluster nodes to re-approve keys..." -# We do this again because setup-slave.sh clears out .ssh/known_hosts. -# approve_ssh_keys - # Always include 'scala' module if it's not defined as a work around # for older versions of the scripts. if [[ ! $MODULES =~ *scala* ]]; then From fb643c8b32eb3a2662347954ad2de46691c9938d Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Tue, 23 Dec 2014 22:35:32 -0500 Subject: [PATCH 9/9] sudo yum --- setup.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.sh b/setup.sh index 08fc753..57b917b 100755 --- a/setup.sh +++ b/setup.sh @@ -1,6 +1,6 @@ #!/bin/bash -yum install -y pssh +sudo yum install -y pssh # Make sure we are in the spark-ec2 directory pushd /root/spark-ec2