Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYSTEMDS-2926] AWS scripts update for EMR-7.0.0 #2003

Merged
merged 4 commits into from
Feb 25, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions scripts/aws/run_systemds_script.sh
Original file line number Diff line number Diff line change
@@ -32,7 +32,7 @@ fi

source systemds_cluster.config

aws s3 cp $1 s3://system-ds-bucket/ --exclude "*" --include "*.dml"
aws s3 cp $1 s3://${BUCKET} --exclude "*" --include "*.dml"

if [ ! -z "$2" ]
then
@@ -50,7 +50,7 @@ STEP_INFO=$(aws emr add-steps --cluster-id $CLUSTER_ID --steps "Type=Spark,
--driver-memory,$SPARK_DRIVER_MEMORY,
--num-executors,$SPARK_NUM_EXECUTORS,
--conf,spark.driver.maxResultSize=0,
$SYSTEMDS_JAR_PATH, -f, s3://system-ds-bucket/$dml_filename, -exec, $SYSTEMDS_EXEC_MODE,$args,-stats, -explain]")
$SYSTEMDS_JAR_PATH, -f, s3://$BUCKET/$dml_filename, -exec, $SYSTEMDS_EXEC_MODE,$args,-stats, -explain]")

STEP_ID=$(echo $STEP_INFO | jq .StepIds | tr -d '"' | tr -d ']' | tr -d '[' | tr -d '[:space:]' )
echo "Waiting for the step to finish"
21 changes: 15 additions & 6 deletions scripts/aws/spinup_systemds_cluster.sh
Original file line number Diff line number Diff line change
@@ -46,9 +46,12 @@ set_config "SPARK_EXECUTOR_MEMORY" $SPARK_EXECUTOR_MEMORY
set_config "SPARK_DRIVER_MEMORY" "1G"
set_config "BUCKET" $BUCKET-$(((RANDOM % 999) + 1000))

#Create systemDS bucket
aws s3api create-bucket --bucket $BUCKET --region $REGION &> /dev/null
aws s3api create-bucket --bucket $BUCKET-logs --region $REGION &> /dev/null
#Source again to update the changes for the current session
source systemds_cluster.config

#Create systemDS bucket (LocationConstraint configuration required regions outside of us-east-1)
aws s3api create-bucket --bucket $BUCKET --region $REGION --create-bucket-configuration LocationConstraint=$REGION &> /dev/null
aws s3api create-bucket --bucket $BUCKET-logs --region $REGION --create-bucket-configuration LocationConstraint=$REGION &> /dev/null

# Upload Jar and scripts to s3
aws s3 sync $SYSTEMDS_TARGET_DIRECTORY s3://$BUCKET --exclude "*" --include "*.dml" --include "*config.xml" --include "*DS.jar*"
@@ -60,11 +63,17 @@ if [ ! -f ${KEYPAIR_NAME}.pem ]; then
echo "${KEYPAIR_NAME}.pem private key created!"
fi

#Get the first available subnet in the default VPC of the configured region
DEFAULT_SUBNET=$(aws ec2 describe-subnets --region $REGION \
--filter "Name=defaultForAz,Values=true" --query "Subnets[0].SubnetId" --output text)

#Create the cluster
#Note: Ganglia not available since emr-6.15.0: exchanged with AmazonCloudWatchAgent
CLUSTER_INFO=$(aws emr create-cluster \
--applications Name=Ganglia Name=Spark \
--applications Name=AmazonCloudWatchAgent Name=Spark \
--ec2-attributes '{"KeyName":"'${KEYPAIR_NAME}'",
"InstanceProfile":"EMR_EC2_DefaultRole"}'\
"InstanceProfile":"EMR_EC2_DefaultRole",
"SubnetId": "'${DEFAULT_SUBNET}'"}'\
--service-role EMR_DefaultRole \
--enable-debugging \
--release-label $EMR_VERSION \
@@ -104,6 +113,6 @@ echo "Cluster info:"
export CLUSTER_URL=$(aws emr describe-cluster --cluster-id $CLUSTER_ID | jq .Cluster.MasterPublicDnsName | tr -d '"')

aws emr ssh --cluster-id $CLUSTER_ID --key-pair-file ${KEYPAIR_NAME}.pem --region $REGION \
--command 'aws s3 cp s3://system-ds-bucket/target . --recursive --exclude "*" --include "*DS.jar*"'
--command 'aws s3 cp s3://'${BUCKET}' . --recursive --exclude "*" --include "*DS.jar*"'

echo "Spinup finished."
4 changes: 2 additions & 2 deletions scripts/aws/systemds_cluster.config
Original file line number Diff line number Diff line change
@@ -23,8 +23,8 @@

KEYPAIR_NAME="SystemDSkeynamex"
REGION="us-east-1"
BUCKET="systemds-bucket"
EMR_VERSION="emr-5.28.0"
BUCKET=systemds-bucket
EMR_VERSION="emr-7.0.0"

INSTANCES_TYPE="m5.xlarge"
MASTER_INSTANCES_COUNT=1