diff --git a/scripts/aws/run_systemds_script.sh b/scripts/aws/run_systemds_script.sh index 4c58fcec9bb..db2d7185e24 100755 --- a/scripts/aws/run_systemds_script.sh +++ b/scripts/aws/run_systemds_script.sh @@ -32,7 +32,7 @@ fi source systemds_cluster.config -aws s3 cp $1 s3://system-ds-bucket/ --exclude "*" --include "*.dml" +aws s3 cp $1 s3://${BUCKET} --exclude "*" --include "*.dml" if [ ! -z "$2" ] then @@ -50,7 +50,7 @@ STEP_INFO=$(aws emr add-steps --cluster-id $CLUSTER_ID --steps "Type=Spark, --driver-memory,$SPARK_DRIVER_MEMORY, --num-executors,$SPARK_NUM_EXECUTORS, --conf,spark.driver.maxResultSize=0, - $SYSTEMDS_JAR_PATH, -f, s3://system-ds-bucket/$dml_filename, -exec, $SYSTEMDS_EXEC_MODE,$args,-stats, -explain]") + $SYSTEMDS_JAR_PATH, -f, s3://$BUCKET/$dml_filename, -exec, $SYSTEMDS_EXEC_MODE,$args,-stats, -explain]") STEP_ID=$(echo $STEP_INFO | jq .StepIds | tr -d '"' | tr -d ']' | tr -d '[' | tr -d '[:space:]' ) echo "Waiting for the step to finish" diff --git a/scripts/aws/spinup_systemds_cluster.sh b/scripts/aws/spinup_systemds_cluster.sh index 8c93a636eae..58f9f2db059 100755 --- a/scripts/aws/spinup_systemds_cluster.sh +++ b/scripts/aws/spinup_systemds_cluster.sh @@ -46,9 +46,12 @@ set_config "SPARK_EXECUTOR_MEMORY" $SPARK_EXECUTOR_MEMORY set_config "SPARK_DRIVER_MEMORY" "1G" set_config "BUCKET" $BUCKET-$(((RANDOM % 999) + 1000)) -#Create systemDS bucket -aws s3api create-bucket --bucket $BUCKET --region $REGION &> /dev/null -aws s3api create-bucket --bucket $BUCKET-logs --region $REGION &> /dev/null +#Source again to update the changes for the current session +source systemds_cluster.config + +#Create systemDS bucket (LocationConstraint configuration required regions outside of us-east-1) +aws s3api create-bucket --bucket $BUCKET --region $REGION --create-bucket-configuration LocationConstraint=$REGION &> /dev/null +aws s3api create-bucket --bucket $BUCKET-logs --region $REGION --create-bucket-configuration LocationConstraint=$REGION &> /dev/null # Upload Jar and scripts to s3 aws s3 sync $SYSTEMDS_TARGET_DIRECTORY s3://$BUCKET --exclude "*" --include "*.dml" --include "*config.xml" --include "*DS.jar*" @@ -60,11 +63,17 @@ if [ ! -f ${KEYPAIR_NAME}.pem ]; then echo "${KEYPAIR_NAME}.pem private key created!" fi +#Get the first available subnet in the default VPC of the configured region +DEFAULT_SUBNET=$(aws ec2 describe-subnets --region $REGION \ + --filter "Name=defaultForAz,Values=true" --query "Subnets[0].SubnetId" --output text) + #Create the cluster +#Note: Ganglia not available since emr-6.15.0: exchanged with AmazonCloudWatchAgent CLUSTER_INFO=$(aws emr create-cluster \ - --applications Name=Ganglia Name=Spark \ + --applications Name=AmazonCloudWatchAgent Name=Spark \ --ec2-attributes '{"KeyName":"'${KEYPAIR_NAME}'", - "InstanceProfile":"EMR_EC2_DefaultRole"}'\ + "InstanceProfile":"EMR_EC2_DefaultRole", + "SubnetId": "'${DEFAULT_SUBNET}'"}'\ --service-role EMR_DefaultRole \ --enable-debugging \ --release-label $EMR_VERSION \ @@ -104,6 +113,6 @@ echo "Cluster info:" export CLUSTER_URL=$(aws emr describe-cluster --cluster-id $CLUSTER_ID | jq .Cluster.MasterPublicDnsName | tr -d '"') aws emr ssh --cluster-id $CLUSTER_ID --key-pair-file ${KEYPAIR_NAME}.pem --region $REGION \ - --command 'aws s3 cp s3://system-ds-bucket/target . --recursive --exclude "*" --include "*DS.jar*"' + --command 'aws s3 cp s3://'${BUCKET}' . --recursive --exclude "*" --include "*DS.jar*"' echo "Spinup finished." diff --git a/scripts/aws/systemds_cluster.config b/scripts/aws/systemds_cluster.config index a254bbc8649..8afed8d2bba 100644 --- a/scripts/aws/systemds_cluster.config +++ b/scripts/aws/systemds_cluster.config @@ -23,8 +23,8 @@ KEYPAIR_NAME="SystemDSkeynamex" REGION="us-east-1" -BUCKET="systemds-bucket" -EMR_VERSION="emr-5.28.0" +BUCKET=systemds-bucket +EMR_VERSION="emr-7.0.0" INSTANCES_TYPE="m5.xlarge" MASTER_INSTANCES_COUNT=1