diff --git a/setup-slave.sh b/setup-slave.sh index 16e231c..fe73947 100755 --- a/setup-slave.sh +++ b/setup-slave.sh @@ -8,7 +8,7 @@ if [[ -e /sys/kernel/mm/transparent_hugepage/enabled ]]; then fi # Make sure we are in the spark-ec2 directory -cd /root/spark-ec2 +pushd /root/spark-ec2 source ec2-variables.sh @@ -114,3 +114,5 @@ cat /root/spark-ec2/github.hostkey >> /root/.ssh/known_hosts echo '#!/bin/bash' > /usr/bin/realpath echo 'readlink -e "$@"' >> /usr/bin/realpath chmod a+x /usr/bin/realpath + +popd diff --git a/setup.sh b/setup.sh index e040661..57b917b 100755 --- a/setup.sh +++ b/setup.sh @@ -1,7 +1,9 @@ #!/bin/bash +sudo yum install -y pssh + # Make sure we are in the spark-ec2 directory -cd /root/spark-ec2 +pushd /root/spark-ec2 # Load the environment variables specific to this AMI source /root/.bash_profile @@ -42,60 +44,21 @@ fi echo "Setting executable permissions on scripts..." find . -regex "^.+.\(sh\|py\)" | xargs chmod a+x -echo "Running setup-slave on master to mount filesystems, etc..." -source ./setup-slave.sh - -echo "SSH'ing to master machine(s) to approve key(s)..." -for master in $MASTERS; do - echo $master - ssh $SSH_OPTS $master echo -n & - sleep 0.3 -done -ssh $SSH_OPTS localhost echo -n & -ssh $SSH_OPTS `hostname` echo -n & -wait - -# Try to SSH to each cluster node to approve their key. Since some nodes may -# be slow in starting, we retry failed slaves up to 3 times. -TODO="$SLAVES $OTHER_MASTERS" # List of nodes to try (initially all) -TRIES="0" # Number of times we've tried so far -echo "SSH'ing to other cluster nodes to approve keys..." -while [ "e$TODO" != "e" ] && [ $TRIES -lt 4 ] ; do - NEW_TODO= - for slave in $TODO; do - echo $slave - ssh $SSH_OPTS $slave echo -n - if [ $? != 0 ] ; then - NEW_TODO="$NEW_TODO $slave" - fi - done - TRIES=$[$TRIES + 1] - if [ "e$NEW_TODO" != "e" ] && [ $TRIES -lt 4 ] ; then - sleep 15 - TODO="$NEW_TODO" - echo "Re-attempting SSH to cluster nodes to approve keys..." - else - break; - fi -done - echo "RSYNC'ing /root/spark-ec2 to other cluster nodes..." for node in $SLAVES $OTHER_MASTERS; do echo $node rsync -e "ssh $SSH_OPTS" -az /root/spark-ec2 $node:/root & scp $SSH_OPTS ~/.ssh/id_rsa $node:.ssh & - sleep 0.3 + sleep 0.1 done wait -# NOTE: We need to rsync spark-ec2 before we can run setup-slave.sh -# on other cluster nodes -echo "Running slave setup script on other cluster nodes..." -for node in $SLAVES $OTHER_MASTERS; do - echo $node - ssh -t -t $SSH_OPTS root@$node "spark-ec2/setup-slave.sh" & sleep 0.3 -done -wait +echo "Running setup-slave on all cluster nodes to mount filesystems, etc..." +pssh --inline \ + --host "$MASTERS $SLAVES" \ + --user root \ + --extra-args "-t -t $SSH_OPTS" \ + "spark-ec2/setup-slave.sh" # Always include 'scala' module if it's not defined as a work around # for older versions of the scripts. @@ -126,6 +89,8 @@ chmod u+x /root/spark/conf/spark-env.sh for module in $MODULES; do echo "Setting up $module" source ./$module/setup.sh - sleep 1 + sleep 0.1 cd /root/spark-ec2 # guard against setup.sh changing the cwd done + +popd