From fd458aa2208555867ea505c43fba6d8a11a71d43 Mon Sep 17 00:00:00 2001 From: Maurizio Melato Date: Fri, 10 Aug 2018 21:53:53 +0200 Subject: [PATCH 01/31] Refactor cluster-check.sh to host scaledown test New script syntax: cluster-check.sh {submit | scaledown_check} Signed-off-by: Maurizio Melato --- tests/cfncluster-release-check.py | 2 +- tests/cluster-check.sh | 149 +++++++++++++++++++----------- 2 files changed, 97 insertions(+), 54 deletions(-) diff --git a/tests/cfncluster-release-check.py b/tests/cfncluster-release-check.py index 5088108b89..4ea4f64c5c 100755 --- a/tests/cfncluster-release-check.py +++ b/tests/cfncluster-release-check.py @@ -164,7 +164,7 @@ def run_test(region, distro, scheduler, instance_type, key_name, extra_args): prochelp.exec_command(['scp'] + ssh_params + [os.path.join(_dirname(), 'cluster-check.sh'), '%s@%s:.' % (username, master_ip)], stdout=out_f, stderr=sub.STDOUT, universal_newlines=True) - prochelp.exec_command(['ssh', '-n'] + ssh_params + ['%s@%s' % (username, master_ip), '/bin/bash --login cluster-check.sh %s' % scheduler], + prochelp.exec_command(['ssh', '-n'] + ssh_params + ['%s@%s' % (username, master_ip), '/bin/bash --login cluster-check.sh submit %s' % scheduler], stdout=out_f, stderr=sub.STDOUT, universal_newlines=True) _double_writeln(out_f, 'SUCCESS: %s!!' % (testname)) diff --git a/tests/cluster-check.sh b/tests/cluster-check.sh index 4dd151ef6b..ae35ea83ef 100755 --- a/tests/cluster-check.sh +++ b/tests/cluster-check.sh @@ -22,13 +22,10 @@ # node from the master node). # -# in case the scheduler goes nuts, wrap ourselves in a timeout so -# there's a bounded completion time -if test "$CHECK_CLUSTER_SUBPROCESS" = ""; then - export CHECK_CLUSTER_SUBPROCESS=1 - timeout -s KILL 10m /bin/bash ./cluster-check.sh "$@" - exit $? -fi +# Usage: +# cluster-check.sh {submit | scaledown_check} +# +set -e sge_get_slots() { local -- ppn i=0 @@ -71,30 +68,54 @@ torque_get_slots() { echo ${ppn} } +submit_launch() { + # in case the scheduler goes nuts, wrap ourselves in a timeout so + # there's a bounded completion time + if test "$CHECK_CLUSTER_SUBPROCESS" = ""; then + export CHECK_CLUSTER_SUBPROCESS=1 + timeout -s KILL 10m /bin/bash ./cluster-check.sh "$@" + exit $? + fi -scheduler="$1" -# job1: 8m30s -_sleepjob1=510 -# job2: 2m -_sleepjob2=120 + scheduler="$2" -echo "--> scheduler: $scheduler" + echo "--> scheduler: $scheduler" -set -e + submit_init ${scheduler} + + ${scheduler}_submit -# we submit 2 1-node jobs, each of which are a sleep. -# The whole thing has to run in 10 minutes, or the kill above will -# fail the job, which means that the jobs must run at the same time. -# The initial cluster is 1 nodes, so we'll need to scale up 1 further node in -# less than 8 minutes in order for the test to succeed. + done=0 + while test $done = 0 ; do + if test -f job1.done -a -f job2.done; then + done=1 + else + sleep 1 + fi + done +} + +submit_init() { + # we submit 2 1-node jobs, each of which are a sleep. + # The whole thing has to run in 10 minutes, or the kill above will + # fail the job, which means that the jobs must run at the same time. + # The initial cluster is 1 nodes, so we'll need to scale up 1 further node in + # less than 8 minutes in order for the test to succeed. -if test "$scheduler" = "slurm" ; then - _ppn=$(slurm_get_slots) + # job1: 8m30s + export _sleepjob1=510 + # job2: 2m + export _sleepjob2=120 + + scheduler=$1 + export _ppn=$(${scheduler}_get_slots) if [ -z "${_ppn}" ]; then - >&2 echo "The number of slots per instance couldn't be retrieved, no compute nodes available in Slurm cluster" + >&2 echo "The number of slots per instance couldn't be retrieved, no compute nodes available in ${scheduler} cluster" exit 1 fi +} +slurm_submit() { cat > job1.sh <&2 echo "The number of slots per instance couldn't be retrieved, no compute nodes available in sge cluster" - exit 1 - fi - - count=$((ppn)) +sge_submit() { + count=$((_ppn)) cat > job1.sh <&2 echo "The number of slots per instance couldn't be retrieved, no compute nodes available in Torque cluster" - exit 1 - fi - +torque_submit() { cat > job1.sh < scheduler: $scheduler" + + ${scheduler}_scaledown_check + + aws_scaledown_check +} + +slurm_scaledown_check() { + : # TODO +} + +sge_scaledown_check() { + : # TODO +} + +torque_scaledown_check() { + : # TODO +} + +aws_scaledown_check() { + : # TODO +} + +main() { + case "$1" in + submit) + submit_launch "$@" + ;; + scaledown_check) + scaledown_check_launch "$@" + ;; + *) + echo "!! Unknown command $1 !!" + exit 1 + ;; + esac +} + +main "$@" From 6a67ff158e6855ac1c41a5d8cdd21a178a290279 Mon Sep 17 00:00:00 2001 From: Balaji Sridharan Date: Wed, 11 Jul 2018 18:26:18 -0700 Subject: [PATCH 02/31] Add parameters to enable scaling down based on idle time Added ScaleDownIdleTime parameter Made the necessary changes in examples/confid and docs/source/configuration.rst Signed-off-by: Balaji Sridharan --- cli/cfncluster/cfnconfig.py | 3 ++- cli/cfncluster/examples/config | 3 +++ cloudformation/cfncluster.cfn.json | 14 +++++++++++++- docs/source/configuration.rst | 8 ++++++++ 4 files changed, 26 insertions(+), 2 deletions(-) diff --git a/cli/cfncluster/cfnconfig.py b/cli/cfncluster/cfnconfig.py index 6cb17518b2..bd5fd94fc0 100644 --- a/cli/cfncluster/cfnconfig.py +++ b/cli/cfncluster/cfnconfig.py @@ -318,7 +318,8 @@ def __init__(self, args): self.__scaling_options = dict(scaling_threshold=('ScalingThreshold',None), scaling_period=('ScalingPeriod',None), scaling_evaluation_periods=('ScalingEvaluationPeriods',None), scaling_adjustment=('ScalingAdjustment',None),scaling_adjustment2=('ScalingAdjustment2',None), - scaling_cooldown=('ScalingCooldown',None),scaling_threshold2=('ScalingThreshold2',None)) + scaling_cooldown=('ScalingCooldown',None),scale_down_idle_time=('ScaleDownIdleTime',None), + scaling_threshold2=('ScalingThreshold2',None)) try: if self.__scaling_section: diff --git a/cli/cfncluster/examples/config b/cli/cfncluster/examples/config index e7418d621c..150c12611f 100644 --- a/cli/cfncluster/examples/config +++ b/cli/cfncluster/examples/config @@ -198,3 +198,6 @@ master_subnet_id = subnet- # Amount of time in seconds to wait before attempting further scaling actions # (defaults to 300 for the default template #scaling_cooldown = 300 +# Amount of time in minutes without a job after which the compute node will terminate +# Defaults to 10 for the default template +#scale_down_idle_time = 10 diff --git a/cloudformation/cfncluster.cfn.json b/cloudformation/cfncluster.cfn.json index c98c9b8be0..ac13c02932 100644 --- a/cloudformation/cfncluster.cfn.json +++ b/cloudformation/cfncluster.cfn.json @@ -108,7 +108,8 @@ "ScalingAdjustment", "ScalingThreshold2", "ScalingAdjustment2", - "ScalingCooldown" + "ScalingCooldown", + "ScaleDownIdleTime" ] }, { @@ -289,6 +290,9 @@ "ScalingCooldown": { "default": "scaling_cooldown" }, + "ScaleDownIdleTime": { + "default": "scale_down_idle_time" + }, "ScalingThreshold2": { "default": "scaling_threshold2" }, @@ -948,6 +952,11 @@ "Type": "String", "Default": "300" }, + "ScaleDownIdleTime": { + "Description": "Period in minutes without jobs after which compute node will terminate ", + "Type": "String", + "Default": "10" + }, "ScalingAdjustment": { "Description": "Number of instances to add to cluster when the CloudWatch ScaleUp action is called.", "Type": "String", @@ -3374,6 +3383,9 @@ "cfn_scheduler": { "Ref": "Scheduler" }, + "cfn_scale_down_idle_time": { + "Ref": "ScaleDownIdleTime" + }, "cfn_encrypted_ephemeral": { "Ref": "EncryptedEphemeral" }, diff --git a/docs/source/configuration.rst b/docs/source/configuration.rst index 41f8fdd517..5f3e5c81c8 100644 --- a/docs/source/configuration.rst +++ b/docs/source/configuration.rst @@ -575,3 +575,11 @@ Amount of time in seconds to wait before attempting further scaling actions. Defaults to 300 for the default template. :: scaling_cooldown = 300 + +scale_down_idle_time +"""""""""""""""" +Amount of time in minutes without a job after which the compute node will terminate. + +Defaults to 10 for the default template. :: + + scale_down_idle_time = 10 From f28297ea7c5636fe006403c43f9940c62bd8ffbd Mon Sep 17 00:00:00 2001 From: Balaji Sridharan Date: Thu, 19 Jul 2018 18:43:55 -0700 Subject: [PATCH 03/31] Add S3 specific policies in cloud formation template Signed-off-by: Balaji Sridharan --- cli/cfncluster/config_sanity.py | 2 + cloudformation/cfncluster.cfn.json | 62 ++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/cli/cfncluster/config_sanity.py b/cli/cfncluster/config_sanity.py index 7987ec470b..8f40d21c39 100644 --- a/cli/cfncluster/config_sanity.py +++ b/cli/cfncluster/config_sanity.py @@ -48,6 +48,8 @@ def check_resource(region, aws_access_key_id, aws_secret_access_key, resource_ty (['autoscaling:DescribeAutoScalingGroups', 'autoscaling:TerminateInstanceInAutoScalingGroup', 'autoscaling:SetDesiredCapacity'], "*"), (['cloudwatch:PutMetricData'], "*"), (['dynamodb:PutItem', 'dynamodb:Query', 'dynamodb:GetItem', 'dynamodb:DeleteItem', 'dynamodb:DescribeTable'], "arn:aws:dynamodb:%s:%s:table/cfncluster-*" % (region, accountid)), + (['s3:ListBucket'], "arn:aws:s3:::%s-cfncluster" % region), + (['s3:ListBucket'], "arn:aws:s3:::%s-cfncluster/*" % region), (['sqs:ListQueues'], "*"), (['logs:*'], "arn:aws:logs:*:*:*")] diff --git a/cloudformation/cfncluster.cfn.json b/cloudformation/cfncluster.cfn.json index ac13c02932..aa77d639a9 100644 --- a/cloudformation/cfncluster.cfn.json +++ b/cloudformation/cfncluster.cfn.json @@ -2098,6 +2098,68 @@ } ] }, + { + "Sid": "S3ListBucket", + "Action": [ + "s3:ListBucket" + ], + "Effect": "Allow", + "Resource": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Fn::FindInMap": [ + "AWSRegion2Capabilites", + { + "Ref": "AWS::Region" + }, + "arn" + ] + }, + ":s3:::", + { + "Ref": "AWS::Region" + }, + "-cfncluster" + ] + ] + } + ] + }, + { + "Sid": "S3GetObj", + "Action": [ + "s3:GetObject" + ], + "Effect": "Allow", + "Resource": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Fn::FindInMap": [ + "AWSRegion2Capabilites", + { + "Ref": "AWS::Region" + }, + "arn" + ] + }, + ":s3:::", + { + "Ref": "AWS::Region" + }, + "-cfncluster/*" + ] + ] + } + ] + }, { "Sid": "SQSList", "Action": [ From 4c2ff88658b62abc08a9feadfa792da901d5b82a Mon Sep 17 00:00:00 2001 From: Balaji Sridharan Date: Wed, 25 Jul 2018 11:25:16 -0700 Subject: [PATCH 04/31] Add documentation to README.rst and iam.rst about new policies and remove ListBucket from permissions Signed-off-by: Balaji Sridharan --- README.rst | 5 ++++ cli/cfncluster/config_sanity.py | 3 +-- cloudformation/cfncluster.cfn.json | 31 ------------------------ docs/source/iam.rst | 39 ++++++++++++++++++++++++++++-- 4 files changed, 43 insertions(+), 35 deletions(-) diff --git a/README.rst b/README.rst index 8453202152..2efe91fc57 100644 --- a/README.rst +++ b/README.rst @@ -28,6 +28,11 @@ Please open a GitHub issue for any feedback or issues: https://github.com/awslabs/cfncluster. There is also an active AWS HPC forum which may be helpful:https://forums.aws.amazon.com/forum.jspa?forumID=192. +CfnCluster 1.5.3 IAM Change +========================= +Between CfnCluster 1.5.2 and 1.5.3 we made a change to the CfnClusterInstancePolicy that adds “s3:GetObject” permissions on objects in -cfncluster bucket. +If you’re using a custom policy (e.g. you specify "ec2_iam_role" in your config) be sure it includes this new permission. See https://cfncluster.readthedocs.io/en/latest/iam.html + CfnCluster 1.5 IAM Change ========================= Between CfnCluster 1.4.2 and 1.5.0 we made a change to the CfnClusterInstancePolicy that adds “ec2:DescribeVolumes” permissions. If you’re using a custom policy (e.g. you specify "ec2_iam_role" in your config) be sure it includes this new permission. See https://cfncluster.readthedocs.io/en/latest/iam.html diff --git a/cli/cfncluster/config_sanity.py b/cli/cfncluster/config_sanity.py index 8f40d21c39..da3b907d44 100644 --- a/cli/cfncluster/config_sanity.py +++ b/cli/cfncluster/config_sanity.py @@ -48,8 +48,7 @@ def check_resource(region, aws_access_key_id, aws_secret_access_key, resource_ty (['autoscaling:DescribeAutoScalingGroups', 'autoscaling:TerminateInstanceInAutoScalingGroup', 'autoscaling:SetDesiredCapacity'], "*"), (['cloudwatch:PutMetricData'], "*"), (['dynamodb:PutItem', 'dynamodb:Query', 'dynamodb:GetItem', 'dynamodb:DeleteItem', 'dynamodb:DescribeTable'], "arn:aws:dynamodb:%s:%s:table/cfncluster-*" % (region, accountid)), - (['s3:ListBucket'], "arn:aws:s3:::%s-cfncluster" % region), - (['s3:ListBucket'], "arn:aws:s3:::%s-cfncluster/*" % region), + (['s3:GetObject'], "arn:aws:s3:::%s-cfncluster/*" % region), (['sqs:ListQueues'], "*"), (['logs:*'], "arn:aws:logs:*:*:*")] diff --git a/cloudformation/cfncluster.cfn.json b/cloudformation/cfncluster.cfn.json index aa77d639a9..9efadc47e7 100644 --- a/cloudformation/cfncluster.cfn.json +++ b/cloudformation/cfncluster.cfn.json @@ -2098,37 +2098,6 @@ } ] }, - { - "Sid": "S3ListBucket", - "Action": [ - "s3:ListBucket" - ], - "Effect": "Allow", - "Resource": [ - { - "Fn::Join": [ - "", - [ - "arn:", - { - "Fn::FindInMap": [ - "AWSRegion2Capabilites", - { - "Ref": "AWS::Region" - }, - "arn" - ] - }, - ":s3:::", - { - "Ref": "AWS::Region" - }, - "-cfncluster" - ] - ] - } - ] - }, { "Sid": "S3GetObj", "Action": [ diff --git a/docs/source/iam.rst b/docs/source/iam.rst index 9f9f86e040..c893e45911 100644 --- a/docs/source/iam.rst +++ b/docs/source/iam.rst @@ -4,8 +4,11 @@ IAM in CfnCluster ======================== .. warning:: - Between CfnCluster 1.4.2 and 1.5.0 we added a change to the `CfnClusterInstancePolicy` that adds "ec2:DescribeVolumes" permissions. If you're using a custom policy (e.g. you specify "ec2_iam_role" in your config) be sure it includes this new permission. + Between CfnCluster 1.5.2 and 1.5.3 we made a change to the CfnClusterInstancePolicy that adds “s3:GetObject” permissions on objects in -cfncluster bucket. + If you’re using a custom policy (e.g. you specify "ec2_iam_role" in your config) be sure it includes this new permission. See https://cfncluster.readthedocs.io/en/latest/iam.html + + Between CfnCluster 1.4.2 and 1.5.0 we added a change to the `CfnClusterInstancePolicy` that adds "ec2:DescribeVolumes" permissions. If you're using a custom policy (e.g. you specify "ec2_iam_role" in your config) be sure it includes this new permission. CfnCluster utilizes multiple AWS services to deploy and operate a cluster. The services used are listed in the :ref:`AWS Services used in CfnCluster ` section of the documentation. @@ -336,6 +339,38 @@ CfnClusterUserPolicy ], "Effect": "Allow", "Resource": "*" - } + }, + { + "Sid": "S3GetObj", + "Action": [ + "s3:GetObject" + ], + "Effect": "Allow", + "Resource": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Fn::FindInMap": [ + "AWSRegion2Capabilites", + { + "Ref": "AWS::Region" + }, + "arn" + ] + }, + ":s3:::", + { + "Ref": "AWS::Region" + }, + "-cfncluster/*" + ] + ] + } + ] + }, + ] } From 0ec3c92e429abde44d47c29b0bcac749db1b6dbc Mon Sep 17 00:00:00 2001 From: Balaji Sridharan Date: Mon, 6 Aug 2018 18:32:14 -0700 Subject: [PATCH 05/31] Merge custom permission requirements into one sentence Signed-off-by: Balaji Sridharan --- docs/source/iam.rst | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/docs/source/iam.rst b/docs/source/iam.rst index c893e45911..2f14db6227 100644 --- a/docs/source/iam.rst +++ b/docs/source/iam.rst @@ -4,11 +4,8 @@ IAM in CfnCluster ======================== .. warning:: - - Between CfnCluster 1.5.2 and 1.5.3 we made a change to the CfnClusterInstancePolicy that adds “s3:GetObject” permissions on objects in -cfncluster bucket. - If you’re using a custom policy (e.g. you specify "ec2_iam_role" in your config) be sure it includes this new permission. See https://cfncluster.readthedocs.io/en/latest/iam.html - - Between CfnCluster 1.4.2 and 1.5.0 we added a change to the `CfnClusterInstancePolicy` that adds "ec2:DescribeVolumes" permissions. If you're using a custom policy (e.g. you specify "ec2_iam_role" in your config) be sure it includes this new permission. + Between CfnCluster 1.4.2 and 1.5.3 we added a change to the `CfnClusterInstancePolicy` that adds "ec2:DescribeVolumes" permissions and “s3:GetObject” permissions on objects in -cfncluster bucket. + If you're using a custom policy (e.g. you specify "ec2_iam_role" in your config) be sure it includes this new permission. CfnCluster utilizes multiple AWS services to deploy and operate a cluster. The services used are listed in the :ref:`AWS Services used in CfnCluster ` section of the documentation. From cafa2d41376f4476a4b4047c722152aac36f59ce Mon Sep 17 00:00:00 2001 From: Balaji Sridharan Date: Mon, 13 Aug 2018 15:16:55 -0700 Subject: [PATCH 06/31] Fix documentation per code reviews Signed-off-by: Balaji Sridharan --- README.rst | 4 ++-- docs/source/iam.rst | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 2efe91fc57..48e236d0f0 100644 --- a/README.rst +++ b/README.rst @@ -28,9 +28,9 @@ Please open a GitHub issue for any feedback or issues: https://github.com/awslabs/cfncluster. There is also an active AWS HPC forum which may be helpful:https://forums.aws.amazon.com/forum.jspa?forumID=192. -CfnCluster 1.5.3 IAM Change +CfnCluster 1.6 IAM Change ========================= -Between CfnCluster 1.5.2 and 1.5.3 we made a change to the CfnClusterInstancePolicy that adds “s3:GetObject” permissions on objects in -cfncluster bucket. +Between CfnCluster 1.5.3 and 1.6.0 we made a change to the CfnClusterInstancePolicy that adds “s3:GetObject” permissions on objects in -cfncluster bucket. If you’re using a custom policy (e.g. you specify "ec2_iam_role" in your config) be sure it includes this new permission. See https://cfncluster.readthedocs.io/en/latest/iam.html CfnCluster 1.5 IAM Change diff --git a/docs/source/iam.rst b/docs/source/iam.rst index 2f14db6227..00ac080ae2 100644 --- a/docs/source/iam.rst +++ b/docs/source/iam.rst @@ -4,9 +4,11 @@ IAM in CfnCluster ======================== .. warning:: - Between CfnCluster 1.4.2 and 1.5.3 we added a change to the `CfnClusterInstancePolicy` that adds "ec2:DescribeVolumes" permissions and “s3:GetObject” permissions on objects in -cfncluster bucket. + Between CfnCluster 1.5.3 and 1.6.0 we added a change to the `CfnClusterInstancePolicy` that adds “s3:GetObject” permissions on objects in -cfncluster bucket. If you're using a custom policy (e.g. you specify "ec2_iam_role" in your config) be sure it includes this new permission. + Between CfnCluster 1.4.2 and 1.5.0 we added a change to the `CfnClusterInstancePolicy` that adds "ec2:DescribeVolumes" permissions. If you're using a custom policy (e.g. you specify "ec2_iam_role" in your config) be sure it includes this new permission. + CfnCluster utilizes multiple AWS services to deploy and operate a cluster. The services used are listed in the :ref:`AWS Services used in CfnCluster ` section of the documentation. CfnCluster uses EC2 IAM roles to enable instances access to AWS services for the deployment and operation of the cluster. By default the EC2 IAM role is created as part of the cluster creation by CloudFormation. This means that the user creating the cluster must have the appropriate level of permissions From 99689bdc381d608f0ee20668ea80643a34bd6a0e Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Tue, 21 Aug 2018 15:04:10 -0700 Subject: [PATCH 07/31] Don't output Ganglia URL if Ganglia is turned off Signed-off-by: Sean Smith --- cli/cfncluster/cfncluster.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/cli/cfncluster/cfncluster.py b/cli/cfncluster/cfncluster.py index 87ade9a747..24c5f9db1a 100644 --- a/cli/cfncluster/cfncluster.py +++ b/cli/cfncluster/cfncluster.py @@ -16,6 +16,7 @@ import logging import boto3 import os +import json from botocore.exceptions import ClientError from . import cfnconfig @@ -103,7 +104,10 @@ def create(args): event.get('ResourceStatusReason'))) logger.info('') outputs = cfn.describe_stacks(StackName=stack_name).get("Stacks")[0].get('Outputs', []) + ganglia_enabled = is_ganglia_enabled(config.parameters) for output in outputs: + if not ganglia_enabled and output.get('OutputKey').startswith('Ganglia'): + continue logger.info("%s: %s" % (output.get('OutputKey'), output.get('OutputValue'))) else: status = cfn.describe_stacks(StackName=stack_name).get("Stacks")[0].get('StackStatus') @@ -119,6 +123,14 @@ def create(args): logger.critical(e) sys.exit(1) +def is_ganglia_enabled(parameters): + extra_json = dict(filter(lambda x: x[0] == 'ExtraJson', parameters)) + try: + extra_json = json.loads(extra_json.get('ExtraJson')).get('cfncluster') + return extra_json.get('ganglia_enabled') == 'yes' + except: + pass + return False def update(args): logger.info('Updating: %s' % (args.cluster_name)) From a1f5d1d9d00ea00db5b0ce05ee21816967102d92 Mon Sep 17 00:00:00 2001 From: Elveskevtar Date: Tue, 21 Aug 2018 10:20:47 -0700 Subject: [PATCH 08/31] Add cfncluster stepfunctions Signed-off-by: Elveskevtar --- cli/cfncluster/cli.py | 21 ++ cli/setup.py | 3 +- cli/stepfunctions/Dockerfile | 4 + cli/stepfunctions/README.md | 170 +++++++++++ cli/stepfunctions/__init__.py | 226 ++++++++++++++ cli/stepfunctions/job_parallel.txt | 64 ++++ cli/stepfunctions/job_sequential.txt | 53 ++++ cli/stepfunctions/jobs/example/sleep.sh | 3 + cli/stepfunctions/jobs/hello_world.sh | 3 + cli/stepfunctions/jobs/jobs.config | 11 + cli/stepfunctions/package.sh | 13 + cli/stepfunctions/requirements-lambda.txt | 3 + cli/stepfunctions/src/constants.py | 9 + cli/stepfunctions/src/handlers.py | 231 ++++++++++++++ cli/stepfunctions/src/jobs.py | 139 +++++++++ cli/stepfunctions/templates/template.yaml | 348 ++++++++++++++++++++++ 16 files changed, 1300 insertions(+), 1 deletion(-) create mode 100644 cli/stepfunctions/Dockerfile create mode 100644 cli/stepfunctions/README.md create mode 100644 cli/stepfunctions/__init__.py create mode 100644 cli/stepfunctions/job_parallel.txt create mode 100644 cli/stepfunctions/job_sequential.txt create mode 100755 cli/stepfunctions/jobs/example/sleep.sh create mode 100755 cli/stepfunctions/jobs/hello_world.sh create mode 100644 cli/stepfunctions/jobs/jobs.config create mode 100755 cli/stepfunctions/package.sh create mode 100644 cli/stepfunctions/requirements-lambda.txt create mode 100644 cli/stepfunctions/src/constants.py create mode 100644 cli/stepfunctions/src/handlers.py create mode 100644 cli/stepfunctions/src/jobs.py create mode 100644 cli/stepfunctions/templates/template.yaml diff --git a/cli/cfncluster/cli.py b/cli/cfncluster/cli.py index d26fa53d2c..f0e5af6da9 100644 --- a/cli/cfncluster/cli.py +++ b/cli/cfncluster/cli.py @@ -20,6 +20,7 @@ from . import cfncluster from . import easyconfig +from stepfunctions import deploy def create(args): cfncluster.create(args) @@ -54,6 +55,9 @@ def start(args): def stop(args): cfncluster.stop(args) +def stepfunctiondeploy(args): + deploy(args) + def config_logger(): logger = logging.getLogger('cfncluster.cfncluster') logger.setLevel(logging.DEBUG) @@ -192,6 +196,23 @@ def main(): help='print command and exit.') pssh.set_defaults(func=command) + default_path = os.path.expanduser(os.path.join('~', '.cfncluster', 'config')) + stepfunctions = subparsers.add_parser('stepfunctions', + description='deploy a cfncluster stepfunction via cloudformation') + stepfunctions.add_argument('--bucket', '-b', dest='bucket_name', + help='Specify s3 bucket to use/create', required=True) + stepfunctions.add_argument('--config', '-c', dest='config_file', + help='Specify cfncluster config file to use', default=default_path) + stepfunctions.add_argument('--jobs', '-j', dest='jobs_config', + help='Specify jobs config file to use', required=True) + stepfunctions.add_argument('--stack-name', '-s', dest='stack_name', + help='Specify the stack name to use', default='CfnClusterStepFunction') + stepfunctions.add_argument('--region', '-r', dest='region', + help='Specify the region to deploy in', default='us-east-1') + stepfunctions.add_argument('--key-name', '-k', dest='key_name', + help='Specify the ec2 key pair', default='cfncluster-stepfunctions') + stepfunctions.set_defaults(func=stepfunctiondeploy) + args, extra_args = parser.parse_known_args() logger.debug(args) if args.func.__name__ == 'command': diff --git a/cli/setup.py b/cli/setup.py index 1c6475cc0e..24d3fd0d0f 100644 --- a/cli/setup.py +++ b/cli/setup.py @@ -21,7 +21,7 @@ def read(fname): console_scripts = ['cfncluster = cfncluster.cli:main'] version = "1.5.3" -requires = ['boto3>=1.7.33', 'awscli>=1.11.175', 'future>=0.16.0'] +requires = ['boto3>=1.7.33', 'awscli>=1.11.175', 'future>=0.16.0', 'jinja2==2.10'] if sys.version_info[:2] == (2, 6): # For python2.6 we have to require argparse since it @@ -46,6 +46,7 @@ def read(fname): zip_safe = False, package_data = { '' : ['examples/config'], + 'stepfunctions': ['**/*'] }, long_description=read('README'), classifiers=[ diff --git a/cli/stepfunctions/Dockerfile b/cli/stepfunctions/Dockerfile new file mode 100644 index 0000000000..6782ba6e0a --- /dev/null +++ b/cli/stepfunctions/Dockerfile @@ -0,0 +1,4 @@ +FROM lambci/lambda:build-python2.7 +RUN yum install libffi-devel openssl-devel +RUN mkdir /var/package +ADD requirements-lambda.txt /var/task diff --git a/cli/stepfunctions/README.md b/cli/stepfunctions/README.md new file mode 100644 index 0000000000..8de2a5c9d0 --- /dev/null +++ b/cli/stepfunctions/README.md @@ -0,0 +1,170 @@ +# CfnCluster Step Functions + +CfnCluster Step Function is a state management solution for deploying high-performance computing (HPC) CfnClusters in an environment with a configurable state machine. This allows our customers to not only run jobs based on particular state of previous job executions, but it also provides real-time visualizations through AWS Step Functions. Additionally, the Step Function state machine handles the setup and teardown process during execution so that customers can focus on their workloads instead of the compute infastructure. + +## Usage + +* Dependencies: + * `docker` installed + * `aws-cli` installed +* Ensure that your AWS credentials are properly configured + * Visit the [AWS Documentation](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-getting-started.html) for more information + +``` +$ pip install -r requirements.txt +$ ./deploy.py --bucket --region --config --jobs +``` + +To Run Step Function: + +1. Wait for CloudFormation stack to deploy +2. Click the link generated by the deploy.py script which links to the [AWS Step Functions Console](https://console.aws.amazon.com/states) +3. Input format: +``` +{ + "cluster_name": "" +} +``` +4. Click `Start Execution` + +## How to Specify Jobs + +Jobs are specified in a configuration file whose path is passed to the `--jobs` or `-j` parameter. An example of a given configuration file can be seen below: + +``` +[order] +sequential = job1, job2 + +[job job1] +name = thejobtouse.sh +s3_uri = s3://job-bucket/thejobtouse.sh + +[job job2] +handler = a_real_job.sh +local_path = /path/where/job/lives +wait_time = 30 +``` + +### Order Section [order] + +Required Parameters: + +`sequential`: List of job names to schedule sequentially given in the form of a comma separated list; order matters + +``` +[order] +sequential = goodjob, badjob, otherjob +``` + +OR + +`parallel`: List of job names to schedule in parallel given in the form of a comma separated list; order does not matter + +``` +[order] +parallel = goodjob, badjob, otherjob +``` + +**Important**: either `sequential` or `parallel` must be specified; not both + +### Job Section [job ] + +Required Parameters: + +`s3_uri`: An S3 URI pointing to the script or folder to package for job scheduling or execution + +``` +[job apple] +s3_uri = s3://thebucket/thefolder +handler = thescript +``` + +OR + +`local_path`: A local path (relative to the jobs config file or absolute) pointing to the script or folder to package for job scheduling and execution + +``` +[job banana] +local_path = /path/to/the/script +handler = script +``` + +AND + +`handler`: The path and name of the script to run. Since the `s3_uri` and `local_path` can both be directories, this is to specify which file to send off to the scheduler + +``` +[job carrot] +local_path = relative/path/project +handler = script/path/in/project.sh +``` + +**Important**: either `s3_uri` or `local_path` must be specified; not both + +Optional Parameters: + +`wait_time`: How long to wait between rechecking the status of the job to see if it's completed; default = 10; range 1-240 due to scheduler limitations + +``` +[job donut] +s3_uri = s3://bucket/script +handler = script +wait_time = 240 +``` + +## Arguments + +### `--config` or `-c` + +Specifies the CfnCluster configuration file to use. This will be utilized by the step function to deploy user defined clusters. For more information on how to configure CfnCluster visit the [CfnCluster Documentation](http://cfncluster.readthedocs.io/en/latest/getting_started.html#configuring-cfncluster). + +### `--bucket` or `-b` + +Specifies the name of the S3 bucket to be used to store the source code that creates and terminates the CfnClusters. **Important**: if the bucket already exists, it must be in the same region as that given by the --region argument. If it does not exist, it will be made for you in the specified region. + +### `--jobs` or `-j` + +Specifies the job configuration file to use. This will be used to package your jobs for use in the Step Function. + +## Optional Arguments + +### `--region` or `-r` + +Specifies the AWS region to deploy the CloudFormation stack that contains the Step Function and corresponding source code to deploy and terminate CfnClusters. Defaults to us-east-1. + +### `--stack-name` or `-s` + +Specifies the name that should be given to the CloudFormation stack that the script deploys. + +### `--key-name` or `-k` + +Specifies the name of the EC2 key pair to use for the CfnCluster master node. **Important**: the `key_name` parameter is optional but if you choose to specify it, the [EC2 key pair](https://console.aws.amazon.com/ec2#KeyPairs) with this name must exist and a secret in [AWS Secrets Manager](https://console.aws.amazon.com/secretsmanager) must exist with the same name and a secret value set to the private key. If `key_name` is omitted, it is defaulted to `cfncluster-stepfunctions`. + +## Flags + +### `--help` or `-h` + +Prints the help menu and usage to standard output. + +``` +usage: deploy.py [-h] --bucket BUCKET_NAME --config CONFIG_FILE --jobs + JOBS_CONFIG [--stack-name STACK_NAME] [--region REGION] + [--key-name KEY_NAME] + +Deploys CfnCluster Step Function + +optional arguments: + -h, --help show this help message and exit + --bucket BUCKET_NAME, -b BUCKET_NAME + Specify s3 bucket to use/create + --config CONFIG_FILE, -c CONFIG_FILE + Specify config file to use + --jobs JOBS_CONFIG, -j JOBS_CONFIG + Specify jobs config file to use + --stack-name STACK_NAME, -s STACK_NAME + Specify the stack name to use + --region REGION, -r REGION + Specify the region to deploy in + --key-name KEY_NAME, -k KEY_NAME + Specify the ec2 key pair +``` diff --git a/cli/stepfunctions/__init__.py b/cli/stepfunctions/__init__.py new file mode 100644 index 0000000000..6b89e55b7a --- /dev/null +++ b/cli/stepfunctions/__init__.py @@ -0,0 +1,226 @@ +#!/usr/bin/env python +from distutils import dir_util as dirutil +from shutil import copy +import argparse +import configparser +import os +import subprocess +import sys +import tempfile + +from botocore.exceptions import ClientError +from jinja2 import Template +import boto3 + +def _check_docker(): + # check that docker is installed + if subprocess.call(['docker', '-v']) != 0: + print('Docker is not installed properly') + sys.exit(1) + +def _resolve_s3(bucket_name, region): + # create s3 bucket if it does not exist + s3 = boto3.resource('s3') + if s3.Bucket(name=bucket_name) in s3.buckets.all(): + # if bucket already exists, check loca + location = s3.meta.client.get_bucket_location( + Bucket=bucket_name).get('LocationConstraint') + location = 'us-east-1' if location == None else location + if location != region: + print('Bucket {} is in {}, should be in {}'.format( + bucket_name, location, region)) + sys.exit(1) + else: + # if 'us-east-1' CreateBucketConfiguration must be omitted + if region == 'us-east-1': + bucket = s3.create_bucket(Bucket=bucket_name) + else: + bucket = s3.create_bucket( + Bucket=bucket_name, + CreateBucketConfiguration={ + 'LocationConstraint': region + } + ) + bucket.wait_until_exists() + +def _copy_source(script_path): + # create temporary directory + tempdir = tempfile.mkdtemp(dir='/tmp') + + # copy source code and cfn template to tempdir + dirutil.copy_tree(os.path.join(script_path, 'src'), tempdir) + dirutil.copy_tree(os.path.join(script_path, 'templates'), tempdir) + + print('Created temporary directory: {}'.format(tempdir)) + return tempdir + +def _edit_key_param(tempdir, config_file, key_name): + # get configuration file + config_path = os.path.realpath(config_file) + config = configparser.ConfigParser() + config.readfp(open(config_path)) + + # set key_name of config + template = config.get('global', 'cluster_template') + config.set('cluster {}'.format(template), 'key_name', key_name) + + # output with new key_name to a copy + os.mkdir(os.path.join(tempdir, 'config')) + new_config = open(os.path.join(tempdir, 'config', 'cfncluster.config'), 'w+') + config.write(new_config) + new_config.close() + print('Copied cfncluster config with key_name={}'.format(key_name)) + + return config + +def _package_jobs(tempdir, jobs_config, config, script_dir): + # setup jobs temp folder and copy jobs config + try: + os.mkdir(os.path.join(tempdir, 'jobs')) + config_path = os.path.realpath(jobs_config) + copy(config_path, os.path.join(tempdir, 'jobs.config')) + config.readfp(open(config_path)) + except IOError: + msg = 'Must specify a real file for the jobs config.\n' \ + 'A working example can be found at {}' + loc = os.path.join(script_dir, 'jobs', 'jobs.config') + print(msg.format(loc)) + sys.exit(1) + + # package user specified jobs + job_sections = filter(lambda x: 'job ' in x, config.sections()) + for section in job_sections: + job_name = section[4:] + new_path = os.path.join(tempdir, 'jobs', job_name) + os.mkdir(new_path) + is_s3 = 's3_uri' in config.options(section) + is_local = 'local_path' in config.options(section) + if is_s3 and is_local: + print('Must specify s3_uri or local_path, not both') + sys.exit(1) + elif is_s3: + # if s3 use aws s3 short commands + uri = config.get(section, 's3_uri') + print(subprocess.check_output( + ['aws', 's3', 'cp', uri, new_path] + )) + elif is_local: + # if local path copy file/directory + path = config.get(section, 'local_path') + config_folder = os.path.dirname(config_path) + job_path = os.path.join(config_folder, path) + if os.path.isdir(job_path): + dirutil.copy_tree(job_path, new_path) + else: + copy(job_path, new_path) + else: + print('Need to specify s3_uri or local_path in {} section'.format(section)) + sys.exit(1) + +def _generate_template(script_path, tempdir, config): + # dynamically generate cfn template based on jobs config + template_txt = open(os.path.join(tempdir, 'template.yaml'), 'r').read() + cfn_template = Template(template_txt) + + # handle sequential and parallel job execution types + if 'sequential' in config.options('order'): + job_txt = open(os.path.join(script_path, 'job_sequential.txt'), 'r').read() + sequential = config.get('order', 'sequential') + jobs = sequential.split(',') + jobs = map(lambda x: x.strip(), jobs) + job_list = [] + for job in jobs: + sec = 10 + section = 'job {}'.format(job) + if 'wait_time' in config.options(section): + sec = int(config.get(section, 'wait_time')) + if sec <= 0 or sec > 240: + print('wait_time must be between 1 and 240 seconds inclusive') + sys.exit(1) + index = jobs.index(job) + end = 'Delete_CfnCluster' if index == len(jobs) - 1 else \ + 'Pass_Job_{}'.format(jobs[index + 1]) + handler = config.get(section, 'handler') + job_list.append({ + 'name': job, 'sec': sec, 'end': end, 'handler': handler + }) + sequential_template = Template(job_txt) + job_def = sequential_template.render(job_list=job_list) + entry = 'Pass_Job_{}'.format(jobs[0]) + elif 'parallel' in config.options('order'): + job_txt = open(os.path.join(script_path, 'job_parallel.txt'), 'r').read() + parallel = config.get('order', 'parallel') + jobs = parallel.split(',') + jobs = map(lambda x: x.strip(), jobs) + job_list = [] + for job in jobs: + sec = 10 + section = 'job {}'.format(job) + if 'wait_time' in config.options(section): + sec = int(config.get(section, 'wait_time')) + if sec <= 0 or sec > 240: + print('wait_time must be between 1 and 240 seconds inclusive') + sys.exit(1) + handler = config.get(section, 'handler') + job_list.append({'name': job, 'sec': sec, 'handler': handler}) + parallel_template = Template(job_txt) + job_def = parallel_template.render(job_list=job_list) + entry = 'Parallel_Job_Execution' + + # output dynamically generated template + new_cfn_txt = cfn_template.render(entry=entry, jobs=job_def) + open(os.path.join(tempdir, 'template.yaml'), 'w').write(new_cfn_txt) + +def _package(config_file, key_name, jobs_config): + script_path = os.path.dirname(os.path.realpath(__file__)) + tempdir = _copy_source(script_path) + config = _edit_key_param(tempdir, config_file, key_name) + _package_jobs(tempdir, jobs_config, config, script_path) + _generate_template(script_path, tempdir, config) + return tempdir + +def _deeplink_url(region, stack_name): + # get outputs from cfn to use in url + try: + cloudformation = boto3.resource( + 'cloudformation', region_name=region) + stack = cloudformation.Stack(stack_name) + stackId = stack.stack_id + outputs = stack.outputs + machineArn = filter( + lambda op: op['OutputKey'] == 'StateMachineArn', outputs + )[0]['OutputValue'] + except ClientError as e: + print(e.response.get('Error').get('Message')) + sys.exit(1) + + # fill and print url + url_region = '{}.'.format(region) if region != 'us-east-1' else '' + print('URL to Step Function State Machine:') + print('https://{}console.aws.amazon.com/states/home?region={}#/' \ + 'statemachines/view/{}?registered=true&stackId={}'.format( + url_region, region, machineArn, stackId)) + +def deploy(args): + """Deploys the CloudFormation stack based on args + + Args: + args: arguments passed in by argparse library + """ + _check_docker() + _resolve_s3(args.bucket_name, args.region) + tempdir = _package(args.config_file, args.key_name, args.jobs_config) + + print('Packaging up all dependencies, this can take a moment...') + + # package and deploy the cloudformation stack + try: + path_dir = os.path.dirname(os.path.realpath(__file__)) + path = os.path.join(path_dir, 'package.sh') + print(subprocess.check_output([path, tempdir, + args.bucket_name, args.stack_name, args.region, path_dir])) + except subprocess.CalledProcessError as e: + print(e.output) + sys.exit(1) + + _deeplink_url(args.region, args.stack_name) diff --git a/cli/stepfunctions/job_parallel.txt b/cli/stepfunctions/job_parallel.txt new file mode 100644 index 0000000000..7e85a92cdf --- /dev/null +++ b/cli/stepfunctions/job_parallel.txt @@ -0,0 +1,64 @@ + "Parallel_Job_Execution": { + "Type": "Parallel", + "Branches": [ + {% for job in job_list %}{ + "StartAt": "Pass_Job_{{ job.name }}", + "States": { + "Pass_Job_{{ job.name }}": { + "Type": "Pass", + "Result": { + "name": "{{ job.name }}", + "handler": "{{ job.handler }}" + }, + "ResultPath": "$.job_info", + "Next": "Schedule_Job_{{ job.name }}" + }, + "Schedule_Job_{{ job.name }}": { + "Type": "Task", + "Resource": "${runJobArn}", + "Next": "Wait_For_Job_{{ job.name }}" + }, + "Wait_For_Job_{{ job.name }}": { + "Type": "Wait", + "Seconds": {{ job.sec }}, + "Next": "Poll_On_Job_{{ job.name }}" + }, + "Poll_On_Job_{{ job.name }}": { + "Type": "Task", + "Resource": "${pollOnJobArn}", + "Next": "{{ job.name }}_Poll_Choice" + }, + "{{ job.name }}_Poll_Choice": { + "Type": "Choice", + "Choices": [ + { + "Variable": "$.status", + "StringEquals": "idle", + "Next": "Wait_For_Job_{{ job.name }}" + }, + { + "Variable": "$.status", + "StringEquals": "failed", + "Next": "Job_Failed_{{ job.name }}" + }, + { + "Variable": "$.status", + "StringEquals": "complete", + "Next": "Job_Succeeded_{{ job.name }}" + } + ] + }, + "Job_Failed_{{ job.name }}": { + "Type": "Fail", + "Cause": "Exit code not 0" + }, + "Job_Succeeded_{{ job.name }}": { + "Type": "Pass", + "End": true + } + } + }{% if not loop.last %},{% endif %} + {% if not loop.last %} {% endif %}{% endfor -%} + ], + "Next": "Delete_CfnCluster" + } diff --git a/cli/stepfunctions/job_sequential.txt b/cli/stepfunctions/job_sequential.txt new file mode 100644 index 0000000000..e1ccbb0944 --- /dev/null +++ b/cli/stepfunctions/job_sequential.txt @@ -0,0 +1,53 @@ + {% for job in job_list %}"Pass_Job_{{ job.name }}": { + "Type": "Pass", + "Result": { + "name": "{{ job.name }}", + "handler": "{{ job.handler }}" + }, + "ResultPath": "$.job_info", + "Next": "Schedule_Job_{{ job.name }}" + }, + "Schedule_Job_{{ job.name }}": { + "Type": "Task", + "Resource": "${runJobArn}", + "Next": "Wait_For_Job_{{ job.name }}" + }, + "Wait_For_Job_{{ job.name }}": { + "Type": "Wait", + "Seconds": {{ job.sec }}, + "Next": "Poll_On_Job_{{ job.name }}" + }, + "Poll_On_Job_{{ job.name }}": { + "Type": "Task", + "Resource": "${pollOnJobArn}", + "Next": "{{ job.name }}_Poll_Choice" + }, + "{{ job.name }}_Poll_Choice": { + "Type": "Choice", + "Choices": [ + { + "Variable": "$.status", + "StringEquals": "idle", + "Next": "Wait_For_Job_{{ job.name }}" + }, + { + "Variable": "$.status", + "StringEquals": "failed", + "Next": "Job_Failed_{{ job.name }}" + }, + { + "Variable": "$.status", + "StringEquals": "complete", + "Next": "Job_Succeeded_{{ job.name }}" + } + ] + }, + "Job_Failed_{{ job.name }}": { + "Type": "Fail", + "Cause": "Exit code not 0" + }, + "Job_Succeeded_{{ job.name }}": { + "Type": "Pass", + "Next": "{{ job.end }}" + }{% if not loop.last %},{% endif %} + {% endfor -%} diff --git a/cli/stepfunctions/jobs/example/sleep.sh b/cli/stepfunctions/jobs/example/sleep.sh new file mode 100755 index 0000000000..ff7c50e747 --- /dev/null +++ b/cli/stepfunctions/jobs/example/sleep.sh @@ -0,0 +1,3 @@ +#!/bin/bash +sleep 30 +exit 0 diff --git a/cli/stepfunctions/jobs/hello_world.sh b/cli/stepfunctions/jobs/hello_world.sh new file mode 100755 index 0000000000..1e44da15e7 --- /dev/null +++ b/cli/stepfunctions/jobs/hello_world.sh @@ -0,0 +1,3 @@ +#!/bin/bash +echo 'Hello World' +exit 0 diff --git a/cli/stepfunctions/jobs/jobs.config b/cli/stepfunctions/jobs/jobs.config new file mode 100644 index 0000000000..64d2e3a1f9 --- /dev/null +++ b/cli/stepfunctions/jobs/jobs.config @@ -0,0 +1,11 @@ +[order] +sequential = hello, folder + +[job hello] +handler = hello_world.sh +local_path = hello_world.sh +wait_time = 30 + +[job folder] +handler = sleep.sh +local_path = example diff --git a/cli/stepfunctions/package.sh b/cli/stepfunctions/package.sh new file mode 100755 index 0000000000..d41fae06e7 --- /dev/null +++ b/cli/stepfunctions/package.sh @@ -0,0 +1,13 @@ +#!/bin/bash +docker build -t cfncluster-stepfunctions $5 +docker run -v $1:/var/package cfncluster-stepfunctions \ + pip install -r requirements-lambda.txt -t /var/package +aws cloudformation package \ + --template-file $1/template.yaml \ + --output-template-file $1/deploy.yaml \ + --s3-bucket $2 +aws cloudformation deploy \ + --template-file $1/deploy.yaml \ + --capabilities CAPABILITY_IAM \ + --stack-name $3 \ + --region $4 diff --git a/cli/stepfunctions/requirements-lambda.txt b/cli/stepfunctions/requirements-lambda.txt new file mode 100644 index 0000000000..08674a5b20 --- /dev/null +++ b/cli/stepfunctions/requirements-lambda.txt @@ -0,0 +1,3 @@ +cfncluster==1.5.2 +setuptools==39.2.0 +paramiko==2.4.1 diff --git a/cli/stepfunctions/src/constants.py b/cli/stepfunctions/src/constants.py new file mode 100644 index 0000000000..c2b21b75c2 --- /dev/null +++ b/cli/stepfunctions/src/constants.py @@ -0,0 +1,9 @@ +# functions used for func arg in cfncluster cli +def create(): + pass + +def delete(): + pass + +def update(): + pass diff --git a/cli/stepfunctions/src/handlers.py b/cli/stepfunctions/src/handlers.py new file mode 100644 index 0000000000..47bf4aa58d --- /dev/null +++ b/cli/stepfunctions/src/handlers.py @@ -0,0 +1,231 @@ +from StringIO import StringIO +import configparser +import logging +import os +import sys +import traceback + +from botocore.exceptions import ClientError +from cfncluster import cli, cfncluster +import boto3 +import paramiko + +import constants + +# set logger and log level +logger = logging.getLogger() +logger.setLevel(logging.INFO) + +idle = ['CREATE_IN_PROGRESS', 'UPDATE_IN_PROGRESS', 'REVIEW_IN_PROGRESS'] +complete = ['CREATE_COMPLETE', 'UPDATE_COMPLETE'] + + +class Args: + """Setup arguments to pass to cfncluster cli + + Initializes with all possible arguments that could be + passed into the cfncluster cli + """ + + config_file = 'config/cfncluster.config' + reset_desired = False + template_url = None + norollback = False + nowait = True + + def __init__(self, cluster_name, region, func): + self.cluster_name = cluster_name + self.region = region + self.func = func + + +class EC2_SSH: + """Creates a paramiko ssh client for EC2 instances + + Attributes: + ip: Master public IP address of EC2 instance + """ + + def __init__(self, ip, username, key): + self.ip = ip + self.username = username + self.key = key + + def __enter__(self): + try: + sm = boto3.client('secretsmanager') + secret = sm.get_secret_value(SecretId=self.key) + key_string = secret['SecretString'] + key_file = StringIO(key_string) + pkey = paramiko.RSAKey.from_private_key(key_file) + self.ssh = paramiko.SSHClient() + self.ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + self.ssh.connect(self.ip, username=self.username, pkey=pkey) + except ClientError as e: + print(e.response.get('Error').get('Message')) + sys.exit(1) + return self.ssh + + def __exit__(self, exc_type, exc_value, tb): + if exc_type is not None: + traceback.print_exception(exc_type, exc_value, tb) + self.ssh.close() + + +class EC2_SFTP: + """Creates a paramiko sftp client for EC2 instances + + Attributes: + ip: Master public IP address of EC2 instance + """ + + def __init__(self, ip, username, key): + self.ip = ip + self.username = username + self.key = key + + def __enter__(self): + try: + sm = boto3.client('secretsmanager') + secret = sm.get_secret_value(SecretId=self.key) + key_string = secret['SecretString'] + key_file = StringIO(key_string) + pkey = paramiko.RSAKey.from_private_key(key_file) + self.transport = paramiko.Transport(self.ip) + self.transport.connect(username=self.username, pkey=pkey) + self.sftp = paramiko.SFTPClient.from_transport(self.transport) + except ClientError as e: + print(e.response.get('Error').get('Message')) + sys.exit(1) + return self.sftp + + def __exit__(self, exc_type, exc_value, tb): + if exc_type is not None: + traceback.print_exception(exc_type, exc_value, tb) + self.sftp.close() + self.transport.close() + + +def create_cfncluster(event, context): + """Handler for creating cfnclusters + + Args: + event: should contain 'cluster_name' attribute + """ + logging.info('event = {}\ncontext = {}'.format(event, context)) + + # variable check + if event.get('cluster_name') is None: + raise Exception('cluster_name not specified') + + config = configparser.ConfigParser() + config.readfp(open('config/cfncluster.config')) + event['key_name'] = config.get('cluster default', 'key_name') + + region = os.getenv('AWS_DEFAULT_REGION', 'us-east-1') + + # create/get ec2 key pair + try: + ec2 = boto3.client('ec2') + ec2.describe_key_pairs(KeyNames=[event['key_name']]) + sm = boto3.client('secretsmanager') + sm.describe_secret(SecretId=event['key_name']) + except ClientError as e: + if e.response.get('Error').get('Code') == 'InvalidKeyPair.NotFound': + try: + ec2 = boto3.client('ec2') + key = ec2.create_key_pair(KeyName=event['key_name']) + sm = boto3.client('secretsmanager') + sm.create_secret( + Name=event['key_name'], + SecretString=key['KeyMaterial'] + ) + except ClientError as e: + print(e.response.get('Error').get('Message')) + sys.exit(1) + else: + print(e.response.get('Error').get('Message')) + sys.exit(1) + + args = Args(event['cluster_name'], region, constants.create) + cli.create(args) + return event + +def is_cluster_ready(event, context): + """Handler for waiting on successful cfncluster deployment + + Args: + event: contains number of executions of this function + """ + logging.debug('event = {}\ncontext = {}'.format(event, context)) + + # variable check + if event.get('execution_count') is None: + event['execution_count'] = 0 + + # poll on cluster creation + stack = 'cfncluster-{}'.format(event['cluster_name']) + try: + cfn = boto3.resource('cloudformation') + stack = cfn.Stack(stack) + status = stack.stack_status + except ClientError as e: + print(e.response.get('Error').get('Message')) + sys.exit(1) + + logger.info('Poll {}: {}'.format(event['execution_count'], status)) + + if status in idle: + event['status'] = 'idle' + elif status in complete: + event['status'] = 'complete' + outputs = stack.outputs + parameters = stack.parameters + event['master_ip'] = filter( + lambda op: op['OutputKey'] == 'MasterPublicIP', outputs + )[0]['OutputValue'] + event['user_name'] = filter( + lambda op: op['OutputKey'] == 'ClusterUser', outputs + )[0]['OutputValue'] + event['scheduler'] = filter( + lambda param: param['ParameterKey'] == 'Scheduler', parameters + )[0]['ParameterValue'] + else: + logging.error(status) + event['status'] = 'failed' + + event['execution_count'] += 1 + + # give timeout if applicable + if event['execution_count'] == 15 and event['status'] == 'idle': + event['status'] = 'timeout' + + # make working temporary directory in master node + if event['status'] == 'complete': + master_ip = event['master_ip'] + user_name = event['user_name'] + key_name = event['key_name'] + with EC2_SSH(master_ip, user_name, key_name) as ssh_client: + command = 'mktemp -d -p /shared' + workdir = ssh_client.exec_command(command)[1].read().strip() + event['workdir'] = workdir + + return event + +def delete_cfncluster(event, context): + """Handler for deleting cfnclusters + + Args: + event: should contain 'cluster_name' attribute + """ + logging.debug('event = {}\ncontext = {}'.format(event, context)) + + # the output of parallel states is a list of outputs of all branches + is_list = isinstance(event, list) + name = event[0]['cluster_name'] if is_list else event['cluster_name'] + + region = os.getenv('AWS_DEFAULT_REGION', 'us-east-1') + args = Args(name, region, constants.delete) + cli.delete(args) + + return event diff --git a/cli/stepfunctions/src/jobs.py b/cli/stepfunctions/src/jobs.py new file mode 100644 index 0000000000..32d36e953d --- /dev/null +++ b/cli/stepfunctions/src/jobs.py @@ -0,0 +1,139 @@ +import configparser +import logging +import os +import time +import zipfile + +from handlers import EC2_SSH, EC2_SFTP + +# set logger and log level +logger = logging.getLogger() +logger.setLevel(logging.INFO) + +class JobSchedulingException(Exception): + pass + +commands = { + 'schedule': { + 'sge': '. /opt/sge/default/common/settings.sh; cd {}; qsub {} |& grep \'submitted\' | awk \'{{print $3}}\'', + 'torque': 'cd {}; qsub {} |& awk -F. \'{{$0=$1}}1\'', + 'slurm': 'cd {}; sbatch {} |& awk \'{{print $4}}\'' + }, + 'poll': { + 'sge': '. /opt/sge/default/common/settings.sh; qstat | awk \'$1 == {} {{print $5}}\'', + 'torque': 'qstat -c | awk -F. \'$1 == {} {{print $0}}\' | awk \'{{print $5}}\'', + 'slurm': 'scontrol show job {} | grep JobState | awk \'{{print $1}}\' | awk -F= \'{{print $2}}\'' + }, + 'job_status': { + 'queued': { + 'sge': 'qw', + 'torque': 'Q', + 'slurm': 'PENDING' + }, + 'running': { + 'sge': 'r', + 'torque': 'R', + 'slurm': 'RUNNING' + } + }, + 'exit_code': { + 'sge': '. /opt/sge/default/common/settings.sh; qacct -j {} | grep exit_status | awk \'{{print $2}}\'', + 'torque': 'qstat -f {} | grep exit_status | awk \'{{print $3}}\'', + 'slurm': 'scontrol show job {} | grep ExitCode= | awk \'{{print $5}}\' | awk -F= \'{{print $2}}\' | awk -F: \'{{print $1}}\'' + } +} + +def run_job(event, context): + """Runs an example job + + Args: + event: contains ip for the master node of the cfncluster + """ + logging.debug('event = {}\ncontext = {}'.format(event, context)) + + job_name = event['job_info']['name'] + job_handler = event['job_info']['handler'] + scheduler = event['scheduler'] + + workdir = event['workdir'] + master_ip = event['master_ip'] + user_name = event['user_name'] + key_name = event['key_name'] + + # package job + zip_name = '{}.zip'.format(job_name) + zip_path = os.path.join('/tmp', zip_name) + zip_file = zipfile.ZipFile(zip_path, 'w') + for root, dirs, files in os.walk(os.path.join('jobs', job_name)): + for file in files: + local_path = os.path.join(root, file) + remote_path = os.path.join(root[5:], file) + zip_file.write(local_path, remote_path) + zip_file.close() + + # upload job via sftp + with EC2_SFTP(master_ip, user_name, key_name) as sftp_client: + sftp_client.chdir(workdir) + sftp_client.put(zip_path, zip_name) + + # schedule job + with EC2_SSH(master_ip, user_name, key_name) as ssh_client: + zip_path = os.path.join(workdir, job_name) + command = 'unzip {}.zip -d {}'.format(zip_path, workdir) + output = ssh_client.exec_command(command)[1].read().strip() + + command = commands['schedule'][scheduler] + command = command.format(os.path.join(workdir, job_name), job_handler) + logging.info(command) + schedule = ssh_client.exec_command(command) + job_id = schedule[1].read().strip() + logging.info(schedule[1]) + logging.info(schedule[1].read()) + + # handle errors + if job_id == '': + message = 'Job {} failed to schedule'.format(job_name) + raise JobSchedulingException(message) + + event['job_id'] = job_id + return event + +def is_job_done(event, context): + """Determines whether the job is complete + + Args: + event: contains job id to check whether complete + """ + logging.debug('event = {}\ncontext = {}'.format(event, context)) + + scheduler = event['scheduler'] + master_ip = event['master_ip'] + user_name = event['user_name'] + key_name = event['key_name'] + + # check job status + with EC2_SSH(master_ip, user_name, key_name) as ssh_client: + command = commands['poll'][scheduler].format(event['job_id']) + status = ssh_client.exec_command(command)[1].read().strip() + + queued = commands['job_status']['queued'][scheduler] + running = commands['job_status']['running'][scheduler] + + if status == queued or status == running: + event['status'] = 'idle' + else: + command = commands['exit_code'][scheduler].format(event['job_id']) + + # attempt to wait for job journaling + t_end = time.time() + 30 + while time.time() < t_end: + try: + code = ssh_client.exec_command(command)[1].read().strip() + code = int(code) + break + except ValueError: + time.sleep(1) + + event['status'] = 'complete' if code == 0 else 'failed' + + return event diff --git a/cli/stepfunctions/templates/template.yaml b/cli/stepfunctions/templates/template.yaml new file mode 100644 index 0000000000..6518041d8e --- /dev/null +++ b/cli/stepfunctions/templates/template.yaml @@ -0,0 +1,348 @@ +AWSTemplateFormatVersion: '2010-09-09' +Description: 'AWS CloudFormation template for Step Function Integration' +Resources: + CfnClusterLambdaRole: + Type: 'AWS::IAM::Role' + Properties: + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: 'Allow' + Action: 'sts:AssumeRole' + Principal: + Service: 'lambda.amazonaws.com' + ManagedPolicyArns: + - 'arn:aws:iam::aws:policy/AWSLambdaFullAccess' + Path: '/' + Policies: + - PolicyName: 'SecretManagerPolicy' + PolicyDocument: + Version: '2012-10-17' + Statement: + - Resource: '*' + Effect: 'Allow' + Action: + - 'ec2:CreateKeyPair' + - 'secretsmanager:CreateSecret' + - 'secretsmanager:DescribeSecret' + - 'secretsmanager:GetSecretValue' + CfnClusterStateMachineRole: + Type: 'AWS::IAM::Role' + Properties: + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: 'Allow' + Action: 'sts:AssumeRole' + Principal: + Service: + - !Sub 'states.${AWS::Region}.amazonaws.com' + Path: '/' + Policies: + - PolicyName: 'StateExecutionPolicy' + PolicyDocument: + Version: '2012-10-17' + Statement: + - Resource: '*' + Effect: 'Allow' + Action: 'lambda:InvokeFunction' + CfnClusterUserPolicy: + Type: 'AWS::IAM::Policy' + Properties: + PolicyName: 'CfnClusterUserPolicy' + PolicyDocument: + Version: '2012-10-17' + Statement: + - Sid: 'EC2Describe' + Resource: '*' + Effect: 'Allow' + Action: + - 'ec2:DescribeKeyPairs' + - 'ec2:DescribeVpcs' + - 'ec2:DescribeSubnets' + - 'ec2:DescribeSecurityGroups' + - 'ec2:DescribePlacementGroups' + - 'ec2:DescribeImages' + - 'ec2:DescribeInstances' + - 'ec2:DescribeSnapshots' + - 'ec2:DescribeVolumes' + - 'ec2:DescribeVpcAttribute' + - 'ec2:DescribeAddresses' + - 'ec2:CreateTags' + - 'ec2:DescribeNetworkInterfaces' + - 'ec2:DescribeAvailabilityZones' + - Sid: 'EC2Modify' + Resource: '*' + Effect: 'Allow' + Action: + - 'ec2:CreateVolume' + - 'ec2:RunInstances' + - 'ec2:AllocateAddress' + - 'ec2:AssociateAddress' + - 'ec2:AttachNetworkInterface' + - 'ec2:AuthorizeSecurityGroupEgress' + - 'ec2:AuthorizeSecurityGroupIngress' + - 'ec2:CreateNetworkInterface' + - 'ec2:CreateSecurityGroup' + - 'ec2:ModifyVolumeAttribute' + - 'ec2:ModifyNetworkInterfaceAttribute' + - 'ec2:DeleteNetworkInterface' + - 'ec2:DeleteVolume' + - 'ec2:TerminateInstances' + - 'ec2:DeleteSecurityGroup' + - 'ec2:DisassociateAddress' + - 'ec2:RevokeSecurityGroupIngress' + - 'ec2:ReleaseAddress' + - Sid: 'AutoScalingDescribe' + Resource: '*' + Effect: 'Allow' + Action: + - 'autoscaling:DescribeAutoScalingGroups' + - 'autoscaling:DescribeLaunchConfigurations' + - 'autoscaling:DescribeAutoScalingInstances' + - Sid: 'AutoScalingModify' + Resource: '*' + Effect: 'Allow' + Action: + - 'autoscaling:CreateAutoScalingGroup' + - 'autoscaling:CreateLaunchConfiguration' + - 'autoscaling:PutNotificationConfiguration' + - 'autoscaling:UpdateAutoScalingGroup' + - 'autoscaling:PutScalingPolicy' + - 'autoscaling:DeleteLaunchConfiguration' + - 'autoscaling:DescribeScalingActivities' + - 'autoscaling:DeleteAutoScalingGroup' + - 'autoscaling:DeletePolicy' + - Sid: 'DynamoDBDescribe' + Resource: '*' + Effect: 'Allow' + Action: 'dynamodb:DescribeTable' + - Sid: 'DynamoDBModify' + Resource: '*' + Effect: 'Allow' + Action: + - 'dynamodb:CreateTable' + - 'dynamodb:DeleteTable' + - Sid: 'CloudWatchModify' + Resource: '*' + Effect: 'Allow' + Action: + - 'cloudwatch:PutMetricAlarm' + - 'cloudwatch:DeleteAlarms' + - Sid: 'SQSDescribe' + Resource: '*' + Effect: 'Allow' + Action: 'sqs:GetQueueAttributes' + - Sid: 'SQSModify' + Resource: '*' + Effect: 'Allow' + Action: + - 'sqs:CreateQueue' + - 'sqs:SetQueueAttributes' + - 'sqs:DeleteQueue' + - Sid: 'SNSDescribe' + Resource: '*' + Effect: 'Allow' + Action: + - 'sns:ListTopics' + - 'sns:GetTopicAttributes' + - Sid: 'SNSModify' + Resource: '*' + Effect: 'Allow' + Action: + - 'sns:CreateTopic' + - 'sns:Subscribe' + - 'sns:DeleteTopic' + - Sid: 'CloudFormationDescribe' + Resource: '*' + Effect: 'Allow' + Action: + - 'cloudformation:DescribeStackEvents' + - 'cloudformation:DescribeStackResources' + - 'cloudformation:DescribeStacks' + - 'cloudformation:ListStacks' + - Sid: 'CloudFormationModify' + Resource: '*' + Effect: 'Allow' + Action: + - 'cloudformation:CreateStack' + - 'cloudformation:DeleteStack' + - 'cloudformation:UpdateStack' + - Sid: 'S3CfnClusterReadOnly' + Resource: !Sub 'arn:aws:s3:::${AWS::Region}-cfncluster*' + Effect: 'Allow' + Action: + - 's3:Get*' + - 's3:List*' + - Sid: 'IAMModify' + Resource: !Sub 'arn:aws:iam::${AWS::AccountId}:role/*' + Effect: 'Allow' + Action: + - 'iam:PassRole' + - 'iam:CreateRole' + - 'iam:DeleteRole' + - Sid: 'IAMInstanceProfile' + Resource: !Sub 'arn:aws:iam::${AWS::AccountId}:instance-profile/*' + Effect: 'Allow' + Action: + - 'iam:CreateInstanceProfile' + - 'iam:DeleteInstanceProfile' + - Sid: 'IAMAddRoleToProfile' + Resource: '*' + Effect: 'Allow' + Action: + - 'iam:AddRoleToInstanceProfile' + - 'iam:RemoveRoleFromInstanceProfile' + - 'iam:PutRolePolicy' + - 'iam:DeleteRolePolicy' + Roles: + - Ref: 'CfnClusterLambdaRole' + CreateCfnCluster: + Type: 'AWS::Lambda::Function' + Properties: + Description: 'Creates a cluster' + FunctionName: 'CreateCfnCluster' + Handler: 'handlers.create_cfncluster' + MemorySize: 1536 + Role: !GetAtt [ CfnClusterLambdaRole, Arn ] + Runtime: 'python2.7' + Timeout: 300 + PollOnCluster: + Type: 'AWS::Lambda::Function' + Properties: + Description: 'Polls on creation of a cluster' + FunctionName: 'PollOnCluster' + Handler: 'handlers.is_cluster_ready' + MemorySize: 1536 + Role: !GetAtt [ CfnClusterLambdaRole, Arn ] + Runtime: 'python2.7' + Timeout: 300 + ScheduleJob: + Type: 'AWS::Lambda::Function' + Properties: + Description: 'Schedules the next job' + FunctionName: 'ScheduleJob' + Handler: 'jobs.run_job' + MemorySize: 1536 + Role: !GetAtt [ CfnClusterLambdaRole, Arn ] + Runtime: 'python2.7' + Timeout: 300 + PollOnJob: + Type: 'AWS::Lambda::Function' + Properties: + Description: 'Polls on status of jobs' + FunctionName: 'PollOnJob' + Handler: 'jobs.is_job_done' + MemorySize: 1536 + Role: !GetAtt [ CfnClusterLambdaRole, Arn ] + Runtime: 'python2.7' + Timeout: 300 + RunJob: + Type: 'AWS::Lambda::Function' + Properties: + Description: 'Runs the next job' + FunctionName: 'RunJob' + Handler: 'jobs.run_job' + MemorySize: 1536 + Role: !GetAtt [ CfnClusterLambdaRole, Arn ] + Runtime: 'python2.7' + Timeout: 300 + DeleteCluster: + Type: 'AWS::Lambda::Function' + Properties: + Description: 'Deletes a cluster' + FunctionName: 'DeleteCfnCluster' + Handler: 'handlers.delete_cfncluster' + MemorySize: 1536 + Role: !GetAtt [ CfnClusterLambdaRole, Arn ] + Runtime: 'python2.7' + Timeout: 300 + CfnClusterStateMachine: + Type: 'AWS::StepFunctions::StateMachine' + Properties: + StateMachineName: 'CfnClusterStateMachine' + DefinitionString: + !Sub + - |- + { + "Comment": "CfnCluster Step Function", + "StartAt": "Create_CfnCluster", + "States": { + "Create_CfnCluster": { + "Type": "Task", + "Resource": "${createClusterArn}", + "Next": "Wait_For_Cluster" + }, + "Wait_For_Cluster": { + "Type": "Wait", + "Seconds": 120, + "Next": "Poll_On_Cluster" + }, + "Poll_On_Cluster": { + "Type": "Task", + "Resource": "${pollOnClusterArn}", + "Next": "Poll_Choice" + }, + "Poll_Choice": { + "Type": "Choice", + "Choices": [ + { + "Variable": "$.status", + "StringEquals": "idle", + "Next": "Wait_For_Cluster" + }, + { + "Variable": "$.status", + "StringEquals": "complete", + "Next": "Creation_Success" + }, + { + "Variable": "$.status", + "StringEquals": "failed", + "Next": "Creation_Failed" + }, + { + "Variable": "$.status", + "StringEquals": "timeout", + "Next": "Cluster_Timeout" + } + ] + }, + "Creation_Failed": { + "Type": "Fail", + "Cause": "Cluster failed to create" + }, + "Cluster_Timeout": { + "Type": "Fail", + "Cause": "Cluster creation timed out" + }, + "Creation_Success": { + "Type": "Pass", + "Next": "{{ entry }}" + }, +{{ jobs }}, + "Delete_CfnCluster": { + "Type": "Task", + "Resource": "${deleteClusterArn}", + "End": true + } + } + } + - { + createClusterArn: !GetAtt [ CreateCfnCluster, Arn ], + pollOnClusterArn: !GetAtt [ PollOnCluster, Arn ], + deleteClusterArn: !GetAtt [ DeleteCluster, Arn ], + pollOnJobArn: !GetAtt [ PollOnJob, Arn ], + runJobArn: !GetAtt [ RunJob, Arn ] + } + RoleArn: !GetAtt [ CfnClusterStateMachineRole, Arn ] +Outputs: + StateMachineArn: + Value: + Ref: CfnClusterStateMachine + ExecutionInput: + Description: 'Sample Input to start execution' + Value: |- + { + "cluster_name": "cfnclusterstepfunction" + } From 85d046e49509b167522d3f9da543857f2259d9c5 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Thu, 23 Aug 2018 11:51:50 -0700 Subject: [PATCH 09/31] URL was getting incorrectly stringified Signed-off-by: Sean Smith --- cli/cfncluster/cfncluster.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cli/cfncluster/cfncluster.py b/cli/cfncluster/cfncluster.py index 24c5f9db1a..32c0df2cd6 100644 --- a/cli/cfncluster/cfncluster.py +++ b/cli/cfncluster/cfncluster.py @@ -151,7 +151,7 @@ def update(args): desired_capacity = asg.describe_auto_scaling_groups(AutoScalingGroupNames=[asg_name])\ .get('AutoScalingGroups')[0]\ .get('DesiredCapacity') - config.parameters.append(('InitialQueueSize', desired_capacity)) + config.parameters.append(('InitialQueueSize', str(desired_capacity))) # Get the MasterSubnetId and use it to determine AvailabilityZone try: @@ -174,7 +174,7 @@ def update(args): try: logger.debug((config.template_url, config.parameters)) - cfn_params = [{'ParameterKey': param[0], 'ParameterValue': str(param[1])} for param in config.parameters] + cfn_params = [{'ParameterKey': param[0], 'ParameterValue': param[1]} for param in config.parameters] cfn.update_stack(StackName=stack_name,TemplateURL=config.template_url, Parameters=cfn_params, Capabilities=capabilities) status = cfn.describe_stacks(StackName=stack_name).get("Stacks")[0].get('StackStatus') From 46ccc5b0aacf9f8f1673d6392e24aaa32690fc0e Mon Sep 17 00:00:00 2001 From: Elveskevtar Date: Wed, 22 Aug 2018 14:34:07 -0700 Subject: [PATCH 10/31] Update html documentation to include step functions Signed-off-by: Elveskevtar --- cli/stepfunctions/README.md | 24 +++---- docs/source/commands.rst | 33 +++++++++- docs/source/index.rst | 1 + docs/source/stepfunctions.rst | 118 ++++++++++++++++++++++++++++++++++ 4 files changed, 162 insertions(+), 14 deletions(-) create mode 100644 docs/source/stepfunctions.rst diff --git a/cli/stepfunctions/README.md b/cli/stepfunctions/README.md index 8de2a5c9d0..1a8166768d 100644 --- a/cli/stepfunctions/README.md +++ b/cli/stepfunctions/README.md @@ -11,8 +11,7 @@ CfnCluster Step Function is a state management solution for deploying high-perfo * Visit the [AWS Documentation](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-getting-started.html) for more information ``` -$ pip install -r requirements.txt -$ ./deploy.py --bucket --region --config --jobs +$ cfncluster stepfunctions --bucket --region --config --jobs ``` To Run Step Function: @@ -103,7 +102,7 @@ handler = script/path/in/project.sh Optional Parameters: -`wait_time`: How long to wait between rechecking the status of the job to see if it's completed; default = 10; range 1-240 due to scheduler limitations +`wait_time`: Period between polling on the status of the job in seconds; default = 10; range 1-240 due to scheduler limitations ``` [job donut] @@ -114,10 +113,6 @@ wait_time = 240 ## Arguments -### `--config` or `-c` - -Specifies the CfnCluster configuration file to use. This will be utilized by the step function to deploy user defined clusters. For more information on how to configure CfnCluster visit the [CfnCluster Documentation](http://cfncluster.readthedocs.io/en/latest/getting_started.html#configuring-cfncluster). - ### `--bucket` or `-b` Specifies the name of the S3 bucket to be used to store the source code that creates and terminates the CfnClusters. **Important**: if the bucket already exists, it must be in the same region as that given by the --region argument. If it does not exist, it will be made for you in the specified region. @@ -128,6 +123,10 @@ Specifies the job configuration file to use. This will be used to package your j ## Optional Arguments +### `--config` or `-c` + +Specifies the CfnCluster configuration file to use. This will be utilized by the step function to deploy user defined clusters. For more information on how to configure CfnCluster visit the [CfnCluster Documentation](http://cfncluster.readthedocs.io/en/latest/getting_started.html#configuring-cfncluster). + ### `--region` or `-r` Specifies the AWS region to deploy the CloudFormation stack that contains the Step Function and corresponding source code to deploy and terminate CfnClusters. Defaults to us-east-1. @@ -147,18 +146,19 @@ Specifies the name of the EC2 key pair to use for the CfnCluster master node. ** Prints the help menu and usage to standard output. ``` -usage: deploy.py [-h] --bucket BUCKET_NAME --config CONFIG_FILE --jobs - JOBS_CONFIG [--stack-name STACK_NAME] [--region REGION] - [--key-name KEY_NAME] +usage: cfncluster stepfunctions [-h] --bucket BUCKET_NAME + [--config CONFIG_FILE] --jobs JOBS_CONFIG + [--stack-name STACK_NAME] [--region REGION] + [--key-name KEY_NAME] -Deploys CfnCluster Step Function +deploy a cfncluster stepfunction via cloudformation optional arguments: -h, --help show this help message and exit --bucket BUCKET_NAME, -b BUCKET_NAME Specify s3 bucket to use/create --config CONFIG_FILE, -c CONFIG_FILE - Specify config file to use + Specify cfncluster config file to use --jobs JOBS_CONFIG, -j JOBS_CONFIG Specify jobs config file to use --stack-name STACK_NAME, -s STACK_NAME diff --git a/docs/source/commands.rst b/docs/source/commands.rst index 6fb32f7f11..89e48fb46f 100644 --- a/docs/source/commands.rst +++ b/docs/source/commands.rst @@ -178,7 +178,36 @@ optional arguments: :: - $cfncluster ssh mycluster -i ~/.ssh/id_rsa -v + $ cfncluster ssh mycluster -i ~/.ssh/id_rsa -v + +stepfunctions +============= + +Creates a step function that automatically creates a cluster, runs user specified jobs, and tears the cluster down. + +For example: + cfncluster stepfunctions -b s3bucket -j path/to/jobs.config + +This uses the bucket name s3bucket, or creates it if it doesn't exist, to store the lambda source code and user specified jobs. To see how jobs in the ``jobs.config`` file should be specified, see `Job Config `_. + +arguments: + -h, --help show this help message and exit + --jobs JOBS_CONFIG, -j JOBS_CONFIG + specify jobs config file to use (REQUIRED) + --bucket BUCKET_NAME, -b BUCKET_NAME + specify s3 bucket to use/create (REQUIRED) + --config CONFIG_FILE, -c CONFIG_FILE + specify an alternative config file (default: ~/.cfncluster/config) + --region REGION, -r REGION + specify a region to deploy in (default: us-east-1) + --stack-name STACK_NAME, -s STACK_NAME + specify the stack name to use (default: CfnClusterStepFunction) + --key-name KEY_NAME, -k KEY_NAME + specify the ec2 key pair (default: cfncluster-stepfunctions) + +:: + + $ cfncluster stepfunctions -b s3bucket -j path/to/jobs.config status ====== @@ -199,7 +228,7 @@ optional arguments: :: - $cfncluster status mycluster + $ cfncluster status mycluster list ==== diff --git a/docs/source/index.rst b/docs/source/index.rst index c5ee3de911..dd14504331 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -18,6 +18,7 @@ CfnCluster ("cloud formation cluster") is a framework that deploys and maintains functional tutorials development + stepfunctions Getting Started --------------- diff --git a/docs/source/stepfunctions.rst b/docs/source/stepfunctions.rst new file mode 100644 index 0000000000..78ce15bd61 --- /dev/null +++ b/docs/source/stepfunctions.rst @@ -0,0 +1,118 @@ +.. _stepfunctions: + +CfnCluster Stepfunctions +######################## + +Why Stepfunctions +================= + +* Allows for complex workflows with CfnCluster +* Handles cluster creation, teardown, and updates +* Useful for conditional automated job execution +* Interfaces with other AWS services + +.. image:: https://s3.amazonaws.com/global-cfncluster/doc-images/parallel_job.gif + +Getting Started +=============== + +.. image:: https://s3.amazonaws.com/global-cfncluster/doc-images/command_start.gif + +1. Configure CfnCluster configuration file with ``cfncluster configure`` or manually +2. Collect jobs that you would like CfnCluster Step Functions to schedule +3. Configure jobs configuration file using the following `guide <#jobs-configuration-guide>`_ +4. Deploy a Step Function using ``cfncluster stepfunctions``; see `here `_ +5. Navigate to the Step Function using the deeplink given from the command +6. Click Start Execution and provide a cluster name via JSON execution input + +:: + + { + "cluster_name": "cfnclusterstepfunctions" + } + + +.. image:: https://s3.amazonaws.com/global-cfncluster/doc-images/command_end.gif +.. image:: https://s3.amazonaws.com/global-cfncluster/doc-images/JSON.gif + +Jobs Config +=========== + +:: + + [order] + sequential = job1, banana, job2 + + [job job1] + handler = src/script.sh + s3_uri = s3://bucket-to-use/folder/path/to/project + + [job job2] + handler = is-this-even-a-job + local_path = /path/to/the/job/is-this-even-a-job + + [job banana] + handler = long-running-script.sh + s3_uri = s3://bucket-to-use/folder/path/to/project + wait_time = 240 + +Sections Options: + ``[order]`` required parameters: + * ``sequential``: List of job names to schedule sequentially given in the form of a comma separated list; order matters + + :: + + [order] + sequential = firstjob, secondjob, thirdjob + + OR + + * ``parallel``: List of job names to schedule in parallel given in the form of a comma separated list; order does not matter + + :: + + [order] + parallel = paralleljob1, paralleljob2, otherjob + + **IMPORTANT**: either ``sequential`` or ``parallel`` must be specified; not both + + ``[job ]`` required parameters: + * ``s3_uri``: An S3 URI pointing to the script or folder to pacakge for job scheduling and execution + + :: + + [job apple] + s3_uri = s3://thebucket/thefolder + handler = thescript + + OR + + * ``local_path``: A local path (relative to the jobs config file or absolute) pointing to the script or folder for job scheduling and execution + + :: + + [job banana] + local_path = /path/to/the/script + handler = script + + AND + + * ``handler``: The path and name of the script to run. Since the ``s3_uri`` and ``local_path`` can both be directories, this is to specify which file to send off to the scheduler + + :: + + [job carrot] + local_path = relative/path/project + handler = script/path/in/project.sh + + **IMPORTANT**: either ``s3_uri`` or ``local_path`` must be specified; not both + + ``[job ]`` optional parameters: + * ``wait_time``: Period between polling on the status of the job in seconds; default = 10; range 1-240 due to scheduler limitations + + :: + + [job danish] + s3_uri = s3://bucket/script + handler = script + wait_time = 240 From 99f2ddab2eb5ae52a052ca027a84f45d77ec2327 Mon Sep 17 00:00:00 2001 From: Henrique Freitas Date: Fri, 24 Aug 2018 16:54:45 -0300 Subject: [PATCH 11/31] Add GetTemplate permission to CfnClusterUserPolicy template --- docs/source/iam.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/source/iam.rst b/docs/source/iam.rst index 00ac080ae2..79baad8535 100644 --- a/docs/source/iam.rst +++ b/docs/source/iam.rst @@ -281,7 +281,8 @@ CfnClusterUserPolicy "cloudformation:DescribeStackResource", "cloudformation:DescribeStackResources", "cloudformation:DescribeStacks", - "cloudformation:ListStacks" + "cloudformation:ListStacks", + "cloudformation:GetTemplate" ], "Effect": "Allow", "Resource": "*" From 666588615d67950216e5944f463d8d0daf921a54 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Wed, 22 Aug 2018 17:30:56 -0700 Subject: [PATCH 12/31] Clarify Docs Signed-off-by: Sean Smith --- docs/source/configuration.rst | 50 ++++++++++++++++---------------- docs/source/pre_post_install.rst | 14 +++++++-- 2 files changed, 37 insertions(+), 27 deletions(-) diff --git a/docs/source/configuration.rst b/docs/source/configuration.rst index 5f3e5c81c8..ab784dc093 100644 --- a/docs/source/configuration.rst +++ b/docs/source/configuration.rst @@ -212,7 +212,7 @@ Defaults to NONE for the default template. :: pre_install_args """""""""""""""" Quoted list of arguments to be passed to preinstall script - + Defaults to NONE for the default template. :: pre_install_args = NONE @@ -220,7 +220,7 @@ Defaults to NONE for the default template. :: post_install """""""""""" URL to a postinstall script. This is executed after any of the boot_as_* scripts are run - + Can be specified in "http://hostname/path/to/script.sh" or "s3://bucketname/path/to/script.sh" format. Defaults to NONE for the default template. :: @@ -230,7 +230,7 @@ Defaults to NONE for the default template. :: post_install_args """"""""""""""""" Arguments to be passed to postinstall script - + Defaults to NONE for the default template. :: post_install_args = NONE @@ -238,15 +238,15 @@ Defaults to NONE for the default template. :: proxy_server """""""""""" HTTP(S) proxy server, typically http://x.x.x.x:8080 - + Defaults to NONE for the default template. :: proxy_server = NONE placement_group """"""""""""""" -Cluster placement group. The can be one of three values: NONE, DYNAMIC and an existing placement group name. When DYNAMIC is set, a unique placement group will be created as part of the cluster and deleted when the cluster is deleted. - +Cluster placement group. The can be one of three values: NONE, DYNAMIC and an existing placement group name. When DYNAMIC is set, a unique placement group will be created as part of the cluster and deleted when the cluster is deleted. + Defaults to NONE for the default template. More information on placement groups can be found `here `_:: placement_group = NONE @@ -254,7 +254,7 @@ Defaults to NONE for the default template. More information on placement groups placement """"""""" Cluster placement logic. This enables the whole cluster or only compute to use the placement group. - + Defaults to cluster in the default template. :: placement = cluster @@ -262,7 +262,7 @@ Defaults to cluster in the default template. :: ephemeral_dir """"""""""""" If instance store volumes exist, this is the path/mountpoint they will be mounted on. - + Defaults to /scratch in the default template. :: ephemeral_dir = /scratch @@ -270,15 +270,15 @@ Defaults to /scratch in the default template. :: shared_dir """""""""" Path/mountpoint for shared EBS volume - + Defaults to /shared in the default template. See :ref:`EBS Section ` for details on working with EBS volumes:: shared_dir = /shared encrypted_ephemeral """"""""""""""""""" -Encrypted ephemeral drives. In-memory keys, non-recoverable. If true, CfnCluster will generate an ephemeral encryption key in memroy and using LUKS encryption, encrypt your instance store volumes. - +Encrypted ephemeral drives. In-memory keys, non-recoverable. If true, CfnCluster will generate an ephemeral encryption key in memroy and using LUKS encryption, encrypt your instance store volumes. + Defaults to false in default template. :: encrypted_ephemeral = false @@ -302,10 +302,10 @@ Defaults to 15 in default template. :: base_os """"""" OS type used in the cluster - + Defaults to alinux in the default template. Available options are: alinux, centos6, centos7, ubuntu1404 and ubuntu1604 -Note: The base_os determines the username used to log into the cluster. +Note: The base_os determines the username used to log into the cluster. * Centos 6 & 7: ``centos`` * Ubuntu: ``ubuntu`` @@ -408,7 +408,7 @@ ssh_from CIDR formatted IP range in which to allow SSH access from. This is only used when cfncluster creates the security group. - + Defaults to 0.0.0.0/0 in the default template. :: ssh_from = 0.0.0.0/0 @@ -416,7 +416,7 @@ Defaults to 0.0.0.0/0 in the default template. :: additional_sg """"""""""""" Additional VPC security group Id for all instances. - + Defaults to NONE in the default template. :: additional_sg = sg-xxxxxx @@ -465,7 +465,7 @@ EBS Volume configuration settings for the volume mounted on the master node and ebs_snapshot_id """"""""""""""" Id of EBS snapshot if using snapshot as source for volume. - + Defaults to NONE for default template. :: ebs_snapshot_id = snap-xxxxx @@ -481,7 +481,7 @@ Defaults to gp2 for default template. :: volume_size """"""""""" Size of volume to be created (if not using a snapshot). - + Defaults to 20GB for default template. :: volume_size = 20 @@ -522,7 +522,7 @@ Settings which define how the compute nodes scale. :: scaling_threshold """"""""""""""""" Threshold for triggering CloudWatch ScaleUp action. - + Defaults to 1 for default template. :: scaling_threshold = 1 @@ -530,7 +530,7 @@ Defaults to 1 for default template. :: scaling_adjustment """""""""""""""""" Number of instances to add when called CloudWatch ScaleUp action. - + Defaults to 1 for default template. :: scaling_adjustment = 1 @@ -539,7 +539,7 @@ Defaults to 1 for default template. :: scaling_threshold2 """""""""""""""""" Threshold for triggering CloudWatch ScaleUp2 action. - + Defaults to 200 for default template. :: scaling_threshold2 = 200 @@ -547,7 +547,7 @@ Defaults to 200 for default template. :: scaling_adjustment2 """"""""""""""""""" Number of instances to add when called CloudWatch ScaleUp2 action - + Defaults to 20 for default template. :: scaling_adjustment2 = 20 @@ -555,7 +555,7 @@ Defaults to 20 for default template. :: scaling_period """""""""""""" Period to measure ScalingThreshold. - + Defaults to 60 for default template. :: scaling_period = 60 @@ -563,7 +563,7 @@ Defaults to 60 for default template. :: scaling_evaluation_periods """""""""""""""""""""""""" Number of periods to measure ScalingThreshold. - + Defaults to 2 for default template. :: scaling_evaluation_periods = 2 @@ -571,13 +571,13 @@ Defaults to 2 for default template. :: scaling_cooldown """""""""""""""" Amount of time in seconds to wait before attempting further scaling actions. - + Defaults to 300 for the default template. :: scaling_cooldown = 300 scale_down_idle_time -"""""""""""""""" +"""""""""""""""""""" Amount of time in minutes without a job after which the compute node will terminate. Defaults to 10 for the default template. :: diff --git a/docs/source/pre_post_install.rst b/docs/source/pre_post_install.rst index c07c4b4bdb..fb3c4f0aaf 100644 --- a/docs/source/pre_post_install.rst +++ b/docs/source/pre_post_install.rst @@ -3,9 +3,9 @@ Custom Bootstrap Actions ======================== -CfnCluster can execute arbitrary code either before(pre) or after(post) the main bootstrap action during cluster creation. This code is typically stored in S3 and accessed via HTTP(S) during cluster creation. The code will be executed as root and can be in any script language supported by the cluster OS, typically `bash` or `python`. +CfnCluster can execute arbitrary code either before(pre) or after(post) the main bootstrap action during cluster creation. This code is typically stored in S3 and accessed via HTTP(S) during cluster creation. The code will be executed as root and can be in any script language supported by the cluster OS, typically `bash` or `python`. -pre-install actions are called before any cluster deployment bootstrap such as configuring NAT, EBS and the scheduler. Typical pre-install actions may include modifying storage, adding extra users or packages. +pre-install actions are called before any cluster deployment bootstrap such as configuring NAT, EBS and the scheduler. Typical pre-install actions may include modifying storage, adding extra users or packages. post-install actions are called after cluster bootstrap is complete, as the last action before an instance is considered complete. Typical post-install actions may include changing scheduler settings, modifying storage or packages. @@ -33,6 +33,16 @@ The following config settings are used to define pre/post-install actions and ar # (defaults to NONE for the default template) post_install_args = NONE +Arguments +--------- +The first two arguments ``$0`` and ``$1`` are reserved for the script name and url. + +:: + + $0 => the script name + $1 => s3 url + $n => args set by pre/post_install_args + Example ------- From 90138172ceaa1f189e88a9a3126d7b4f00cd7a0c Mon Sep 17 00:00:00 2001 From: Henrique Freitas Date: Fri, 24 Aug 2018 14:28:27 -0300 Subject: [PATCH 13/31] Add credentials to sts call in check_resources --- cli/cfncluster/config_sanity.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cli/cfncluster/config_sanity.py b/cli/cfncluster/config_sanity.py index da3b907d44..cdb1af66a8 100644 --- a/cli/cfncluster/config_sanity.py +++ b/cli/cfncluster/config_sanity.py @@ -40,7 +40,9 @@ def check_resource(region, aws_access_key_id, aws_secret_access_key, resource_ty aws_secret_access_key=aws_secret_access_key) arn = iam.get_role(RoleName=resource_value).get('Role').get('Arn') - accountid = boto3.client('sts').get_caller_identity().get('Account') + accountid = boto3.client('sts', region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key).get_caller_identity().get('Account') iam_policy = [(['ec2:DescribeVolumes', 'ec2:AttachVolume', 'ec2:DescribeInstanceAttribute', 'ec2:DescribeInstanceStatus', 'ec2:DescribeInstances'], "*"), (['dynamodb:ListTables'], "*"), From c7fb47d48e9c06f6a678138749b72f22e3f0b575 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Mon, 27 Aug 2018 15:11:49 -0700 Subject: [PATCH 14/31] Revert "Add parameters to enable scaling down based on idle time" This reverts commit 6a67ff158e6855ac1c41a5d8cdd21a178a290279. --- cli/cfncluster/cfnconfig.py | 3 +-- cli/cfncluster/examples/config | 3 --- cloudformation/cfncluster.cfn.json | 14 +------------- docs/source/configuration.rst | 8 -------- 4 files changed, 2 insertions(+), 26 deletions(-) diff --git a/cli/cfncluster/cfnconfig.py b/cli/cfncluster/cfnconfig.py index bd5fd94fc0..6cb17518b2 100644 --- a/cli/cfncluster/cfnconfig.py +++ b/cli/cfncluster/cfnconfig.py @@ -318,8 +318,7 @@ def __init__(self, args): self.__scaling_options = dict(scaling_threshold=('ScalingThreshold',None), scaling_period=('ScalingPeriod',None), scaling_evaluation_periods=('ScalingEvaluationPeriods',None), scaling_adjustment=('ScalingAdjustment',None),scaling_adjustment2=('ScalingAdjustment2',None), - scaling_cooldown=('ScalingCooldown',None),scale_down_idle_time=('ScaleDownIdleTime',None), - scaling_threshold2=('ScalingThreshold2',None)) + scaling_cooldown=('ScalingCooldown',None),scaling_threshold2=('ScalingThreshold2',None)) try: if self.__scaling_section: diff --git a/cli/cfncluster/examples/config b/cli/cfncluster/examples/config index 150c12611f..e7418d621c 100644 --- a/cli/cfncluster/examples/config +++ b/cli/cfncluster/examples/config @@ -198,6 +198,3 @@ master_subnet_id = subnet- # Amount of time in seconds to wait before attempting further scaling actions # (defaults to 300 for the default template #scaling_cooldown = 300 -# Amount of time in minutes without a job after which the compute node will terminate -# Defaults to 10 for the default template -#scale_down_idle_time = 10 diff --git a/cloudformation/cfncluster.cfn.json b/cloudformation/cfncluster.cfn.json index 9efadc47e7..58506c9ade 100644 --- a/cloudformation/cfncluster.cfn.json +++ b/cloudformation/cfncluster.cfn.json @@ -108,8 +108,7 @@ "ScalingAdjustment", "ScalingThreshold2", "ScalingAdjustment2", - "ScalingCooldown", - "ScaleDownIdleTime" + "ScalingCooldown" ] }, { @@ -290,9 +289,6 @@ "ScalingCooldown": { "default": "scaling_cooldown" }, - "ScaleDownIdleTime": { - "default": "scale_down_idle_time" - }, "ScalingThreshold2": { "default": "scaling_threshold2" }, @@ -952,11 +948,6 @@ "Type": "String", "Default": "300" }, - "ScaleDownIdleTime": { - "Description": "Period in minutes without jobs after which compute node will terminate ", - "Type": "String", - "Default": "10" - }, "ScalingAdjustment": { "Description": "Number of instances to add to cluster when the CloudWatch ScaleUp action is called.", "Type": "String", @@ -3414,9 +3405,6 @@ "cfn_scheduler": { "Ref": "Scheduler" }, - "cfn_scale_down_idle_time": { - "Ref": "ScaleDownIdleTime" - }, "cfn_encrypted_ephemeral": { "Ref": "EncryptedEphemeral" }, diff --git a/docs/source/configuration.rst b/docs/source/configuration.rst index ab784dc093..1e4d954cc9 100644 --- a/docs/source/configuration.rst +++ b/docs/source/configuration.rst @@ -575,11 +575,3 @@ Amount of time in seconds to wait before attempting further scaling actions. Defaults to 300 for the default template. :: scaling_cooldown = 300 - -scale_down_idle_time -"""""""""""""""""""" -Amount of time in minutes without a job after which the compute node will terminate. - -Defaults to 10 for the default template. :: - - scale_down_idle_time = 10 From 89b1321a633596ed1a4a5cf20b54deafd7829bde Mon Sep 17 00:00:00 2001 From: Enrico Usai Date: Tue, 28 Aug 2018 12:40:45 +0200 Subject: [PATCH 15/31] Minor improvement to help messages Signed-off-by: Enrico Usai --- cli/cfncluster/cli.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cli/cfncluster/cli.py b/cli/cfncluster/cli.py index f0e5af6da9..bbd0a104ff 100644 --- a/cli/cfncluster/cli.py +++ b/cli/cfncluster/cli.py @@ -145,17 +145,17 @@ def main(): addarg_nowait(pdelete) pdelete.set_defaults(func=delete) - pstart = subparsers.add_parser('start', help='start the compute-fleet that has been stopped') + pstart = subparsers.add_parser('start', help='start the compute fleet that has been stopped') pstart.add_argument("cluster_name", type=str, default=None, - help='starts the compute-fleet of the provided cluster name.') + help='starts the compute fleet of the provided cluster name.') addarg_config(pstart) addarg_region(pstart) pstart.set_defaults(func=start) - pstop = subparsers.add_parser('stop', help='stop the compute-fleet, but leave the MasterServer running for ' + pstop = subparsers.add_parser('stop', help='stop the compute fleet, but leave the master server running for ' 'debugging/development') pstop.add_argument("cluster_name", type=str, default=None, - help='stops the compute-fleet of the provided cluster name.') + help='stops the compute fleet of the provided cluster name.') addarg_config(pstop) addarg_region(pstop) pstop.set_defaults(func=stop) From a0df74b81a216a2e2f05947cfcaf6297d4b4e2f8 Mon Sep 17 00:00:00 2001 From: Enrico Usai Date: Tue, 28 Aug 2018 12:41:08 +0200 Subject: [PATCH 16/31] Add help message for ssh command Signed-off-by: Enrico Usai --- cli/cfncluster/cli.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cli/cfncluster/cli.py b/cli/cfncluster/cli.py index bbd0a104ff..03d766a5bf 100644 --- a/cli/cfncluster/cli.py +++ b/cli/cfncluster/cli.py @@ -187,9 +187,10 @@ def main(): pversion = subparsers.add_parser('version', help='display version of cfncluster') pversion.set_defaults(func=version) - pssh = subparsers.add_parser('ssh', description='run ssh command with username and ip address pre-filled. ' \ - 'Arbitrary arguments are appended to the end of the ssh commmand. ' \ - 'This command may be customized in the aliases section of the config file.') + pssh = subparsers.add_parser('ssh', help='connect to the master server using SSH', + description='run ssh command with username and ip address pre-filled. ' \ + 'Arbitrary arguments are appended to the end of the ssh commmand. ' \ + 'This command may be customized in the aliases section of the config file.') pssh.add_argument("cluster_name", type=str, default=None, help='name of the cluster to set variables for.') pssh.add_argument("--dryrun", "-d", action='store_true', dest="dryrun", default=False, From 663ede40ea7b333065d903039a745af4b25fff39 Mon Sep 17 00:00:00 2001 From: Enrico Usai Date: Tue, 28 Aug 2018 12:41:54 +0200 Subject: [PATCH 17/31] Move ssh command before configure and version commands Signed-off-by: Enrico Usai --- cli/cfncluster/cli.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cli/cfncluster/cli.py b/cli/cfncluster/cli.py index 03d766a5bf..ef8cc115ef 100644 --- a/cli/cfncluster/cli.py +++ b/cli/cfncluster/cli.py @@ -180,13 +180,6 @@ def main(): addarg_region(pinstances) pinstances.set_defaults(func=instances) - pconfigure = subparsers.add_parser('configure', help='creating initial cfncluster configuration') - addarg_config(pconfigure) - pconfigure.set_defaults(func=configure) - - pversion = subparsers.add_parser('version', help='display version of cfncluster') - pversion.set_defaults(func=version) - pssh = subparsers.add_parser('ssh', help='connect to the master server using SSH', description='run ssh command with username and ip address pre-filled. ' \ 'Arbitrary arguments are appended to the end of the ssh commmand. ' \ @@ -197,6 +190,13 @@ def main(): help='print command and exit.') pssh.set_defaults(func=command) + pconfigure = subparsers.add_parser('configure', help='creating initial cfncluster configuration') + addarg_config(pconfigure) + pconfigure.set_defaults(func=configure) + + pversion = subparsers.add_parser('version', help='display version of cfncluster') + pversion.set_defaults(func=version) + default_path = os.path.expanduser(os.path.join('~', '.cfncluster', 'config')) stepfunctions = subparsers.add_parser('stepfunctions', description='deploy a cfncluster stepfunction via cloudformation') From 5159bb211d4c46dd4638fb186639e7dde085bca6 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Tue, 28 Aug 2018 10:29:07 -0700 Subject: [PATCH 18/31] Revert "Add cfncluster stepfunctions" This reverts commit a1f5d1d9d00ea00db5b0ce05ee21816967102d92. Signed-off-by: Sean Smith --- cli/cfncluster/cli.py | 21 -- cli/setup.py | 3 +- cli/stepfunctions/Dockerfile | 4 - cli/stepfunctions/__init__.py | 226 -------------- cli/stepfunctions/job_parallel.txt | 64 ---- cli/stepfunctions/job_sequential.txt | 53 ---- cli/stepfunctions/jobs/example/sleep.sh | 3 - cli/stepfunctions/jobs/hello_world.sh | 3 - cli/stepfunctions/jobs/jobs.config | 11 - cli/stepfunctions/package.sh | 13 - cli/stepfunctions/requirements-lambda.txt | 3 - cli/stepfunctions/src/constants.py | 9 - cli/stepfunctions/src/handlers.py | 231 -------------- cli/stepfunctions/src/jobs.py | 139 --------- cli/stepfunctions/templates/template.yaml | 348 ---------------------- 15 files changed, 1 insertion(+), 1130 deletions(-) delete mode 100644 cli/stepfunctions/Dockerfile delete mode 100644 cli/stepfunctions/__init__.py delete mode 100644 cli/stepfunctions/job_parallel.txt delete mode 100644 cli/stepfunctions/job_sequential.txt delete mode 100755 cli/stepfunctions/jobs/example/sleep.sh delete mode 100755 cli/stepfunctions/jobs/hello_world.sh delete mode 100644 cli/stepfunctions/jobs/jobs.config delete mode 100755 cli/stepfunctions/package.sh delete mode 100644 cli/stepfunctions/requirements-lambda.txt delete mode 100644 cli/stepfunctions/src/constants.py delete mode 100644 cli/stepfunctions/src/handlers.py delete mode 100644 cli/stepfunctions/src/jobs.py delete mode 100644 cli/stepfunctions/templates/template.yaml diff --git a/cli/cfncluster/cli.py b/cli/cfncluster/cli.py index ef8cc115ef..aadbf944db 100644 --- a/cli/cfncluster/cli.py +++ b/cli/cfncluster/cli.py @@ -20,7 +20,6 @@ from . import cfncluster from . import easyconfig -from stepfunctions import deploy def create(args): cfncluster.create(args) @@ -55,9 +54,6 @@ def start(args): def stop(args): cfncluster.stop(args) -def stepfunctiondeploy(args): - deploy(args) - def config_logger(): logger = logging.getLogger('cfncluster.cfncluster') logger.setLevel(logging.DEBUG) @@ -197,23 +193,6 @@ def main(): pversion = subparsers.add_parser('version', help='display version of cfncluster') pversion.set_defaults(func=version) - default_path = os.path.expanduser(os.path.join('~', '.cfncluster', 'config')) - stepfunctions = subparsers.add_parser('stepfunctions', - description='deploy a cfncluster stepfunction via cloudformation') - stepfunctions.add_argument('--bucket', '-b', dest='bucket_name', - help='Specify s3 bucket to use/create', required=True) - stepfunctions.add_argument('--config', '-c', dest='config_file', - help='Specify cfncluster config file to use', default=default_path) - stepfunctions.add_argument('--jobs', '-j', dest='jobs_config', - help='Specify jobs config file to use', required=True) - stepfunctions.add_argument('--stack-name', '-s', dest='stack_name', - help='Specify the stack name to use', default='CfnClusterStepFunction') - stepfunctions.add_argument('--region', '-r', dest='region', - help='Specify the region to deploy in', default='us-east-1') - stepfunctions.add_argument('--key-name', '-k', dest='key_name', - help='Specify the ec2 key pair', default='cfncluster-stepfunctions') - stepfunctions.set_defaults(func=stepfunctiondeploy) - args, extra_args = parser.parse_known_args() logger.debug(args) if args.func.__name__ == 'command': diff --git a/cli/setup.py b/cli/setup.py index 24d3fd0d0f..1c6475cc0e 100644 --- a/cli/setup.py +++ b/cli/setup.py @@ -21,7 +21,7 @@ def read(fname): console_scripts = ['cfncluster = cfncluster.cli:main'] version = "1.5.3" -requires = ['boto3>=1.7.33', 'awscli>=1.11.175', 'future>=0.16.0', 'jinja2==2.10'] +requires = ['boto3>=1.7.33', 'awscli>=1.11.175', 'future>=0.16.0'] if sys.version_info[:2] == (2, 6): # For python2.6 we have to require argparse since it @@ -46,7 +46,6 @@ def read(fname): zip_safe = False, package_data = { '' : ['examples/config'], - 'stepfunctions': ['**/*'] }, long_description=read('README'), classifiers=[ diff --git a/cli/stepfunctions/Dockerfile b/cli/stepfunctions/Dockerfile deleted file mode 100644 index 6782ba6e0a..0000000000 --- a/cli/stepfunctions/Dockerfile +++ /dev/null @@ -1,4 +0,0 @@ -FROM lambci/lambda:build-python2.7 -RUN yum install libffi-devel openssl-devel -RUN mkdir /var/package -ADD requirements-lambda.txt /var/task diff --git a/cli/stepfunctions/__init__.py b/cli/stepfunctions/__init__.py deleted file mode 100644 index 6b89e55b7a..0000000000 --- a/cli/stepfunctions/__init__.py +++ /dev/null @@ -1,226 +0,0 @@ -#!/usr/bin/env python -from distutils import dir_util as dirutil -from shutil import copy -import argparse -import configparser -import os -import subprocess -import sys -import tempfile - -from botocore.exceptions import ClientError -from jinja2 import Template -import boto3 - -def _check_docker(): - # check that docker is installed - if subprocess.call(['docker', '-v']) != 0: - print('Docker is not installed properly') - sys.exit(1) - -def _resolve_s3(bucket_name, region): - # create s3 bucket if it does not exist - s3 = boto3.resource('s3') - if s3.Bucket(name=bucket_name) in s3.buckets.all(): - # if bucket already exists, check loca - location = s3.meta.client.get_bucket_location( - Bucket=bucket_name).get('LocationConstraint') - location = 'us-east-1' if location == None else location - if location != region: - print('Bucket {} is in {}, should be in {}'.format( - bucket_name, location, region)) - sys.exit(1) - else: - # if 'us-east-1' CreateBucketConfiguration must be omitted - if region == 'us-east-1': - bucket = s3.create_bucket(Bucket=bucket_name) - else: - bucket = s3.create_bucket( - Bucket=bucket_name, - CreateBucketConfiguration={ - 'LocationConstraint': region - } - ) - bucket.wait_until_exists() - -def _copy_source(script_path): - # create temporary directory - tempdir = tempfile.mkdtemp(dir='/tmp') - - # copy source code and cfn template to tempdir - dirutil.copy_tree(os.path.join(script_path, 'src'), tempdir) - dirutil.copy_tree(os.path.join(script_path, 'templates'), tempdir) - - print('Created temporary directory: {}'.format(tempdir)) - return tempdir - -def _edit_key_param(tempdir, config_file, key_name): - # get configuration file - config_path = os.path.realpath(config_file) - config = configparser.ConfigParser() - config.readfp(open(config_path)) - - # set key_name of config - template = config.get('global', 'cluster_template') - config.set('cluster {}'.format(template), 'key_name', key_name) - - # output with new key_name to a copy - os.mkdir(os.path.join(tempdir, 'config')) - new_config = open(os.path.join(tempdir, 'config', 'cfncluster.config'), 'w+') - config.write(new_config) - new_config.close() - print('Copied cfncluster config with key_name={}'.format(key_name)) - - return config - -def _package_jobs(tempdir, jobs_config, config, script_dir): - # setup jobs temp folder and copy jobs config - try: - os.mkdir(os.path.join(tempdir, 'jobs')) - config_path = os.path.realpath(jobs_config) - copy(config_path, os.path.join(tempdir, 'jobs.config')) - config.readfp(open(config_path)) - except IOError: - msg = 'Must specify a real file for the jobs config.\n' \ - 'A working example can be found at {}' - loc = os.path.join(script_dir, 'jobs', 'jobs.config') - print(msg.format(loc)) - sys.exit(1) - - # package user specified jobs - job_sections = filter(lambda x: 'job ' in x, config.sections()) - for section in job_sections: - job_name = section[4:] - new_path = os.path.join(tempdir, 'jobs', job_name) - os.mkdir(new_path) - is_s3 = 's3_uri' in config.options(section) - is_local = 'local_path' in config.options(section) - if is_s3 and is_local: - print('Must specify s3_uri or local_path, not both') - sys.exit(1) - elif is_s3: - # if s3 use aws s3 short commands - uri = config.get(section, 's3_uri') - print(subprocess.check_output( - ['aws', 's3', 'cp', uri, new_path] - )) - elif is_local: - # if local path copy file/directory - path = config.get(section, 'local_path') - config_folder = os.path.dirname(config_path) - job_path = os.path.join(config_folder, path) - if os.path.isdir(job_path): - dirutil.copy_tree(job_path, new_path) - else: - copy(job_path, new_path) - else: - print('Need to specify s3_uri or local_path in {} section'.format(section)) - sys.exit(1) - -def _generate_template(script_path, tempdir, config): - # dynamically generate cfn template based on jobs config - template_txt = open(os.path.join(tempdir, 'template.yaml'), 'r').read() - cfn_template = Template(template_txt) - - # handle sequential and parallel job execution types - if 'sequential' in config.options('order'): - job_txt = open(os.path.join(script_path, 'job_sequential.txt'), 'r').read() - sequential = config.get('order', 'sequential') - jobs = sequential.split(',') - jobs = map(lambda x: x.strip(), jobs) - job_list = [] - for job in jobs: - sec = 10 - section = 'job {}'.format(job) - if 'wait_time' in config.options(section): - sec = int(config.get(section, 'wait_time')) - if sec <= 0 or sec > 240: - print('wait_time must be between 1 and 240 seconds inclusive') - sys.exit(1) - index = jobs.index(job) - end = 'Delete_CfnCluster' if index == len(jobs) - 1 else \ - 'Pass_Job_{}'.format(jobs[index + 1]) - handler = config.get(section, 'handler') - job_list.append({ - 'name': job, 'sec': sec, 'end': end, 'handler': handler - }) - sequential_template = Template(job_txt) - job_def = sequential_template.render(job_list=job_list) - entry = 'Pass_Job_{}'.format(jobs[0]) - elif 'parallel' in config.options('order'): - job_txt = open(os.path.join(script_path, 'job_parallel.txt'), 'r').read() - parallel = config.get('order', 'parallel') - jobs = parallel.split(',') - jobs = map(lambda x: x.strip(), jobs) - job_list = [] - for job in jobs: - sec = 10 - section = 'job {}'.format(job) - if 'wait_time' in config.options(section): - sec = int(config.get(section, 'wait_time')) - if sec <= 0 or sec > 240: - print('wait_time must be between 1 and 240 seconds inclusive') - sys.exit(1) - handler = config.get(section, 'handler') - job_list.append({'name': job, 'sec': sec, 'handler': handler}) - parallel_template = Template(job_txt) - job_def = parallel_template.render(job_list=job_list) - entry = 'Parallel_Job_Execution' - - # output dynamically generated template - new_cfn_txt = cfn_template.render(entry=entry, jobs=job_def) - open(os.path.join(tempdir, 'template.yaml'), 'w').write(new_cfn_txt) - -def _package(config_file, key_name, jobs_config): - script_path = os.path.dirname(os.path.realpath(__file__)) - tempdir = _copy_source(script_path) - config = _edit_key_param(tempdir, config_file, key_name) - _package_jobs(tempdir, jobs_config, config, script_path) - _generate_template(script_path, tempdir, config) - return tempdir - -def _deeplink_url(region, stack_name): - # get outputs from cfn to use in url - try: - cloudformation = boto3.resource( - 'cloudformation', region_name=region) - stack = cloudformation.Stack(stack_name) - stackId = stack.stack_id - outputs = stack.outputs - machineArn = filter( - lambda op: op['OutputKey'] == 'StateMachineArn', outputs - )[0]['OutputValue'] - except ClientError as e: - print(e.response.get('Error').get('Message')) - sys.exit(1) - - # fill and print url - url_region = '{}.'.format(region) if region != 'us-east-1' else '' - print('URL to Step Function State Machine:') - print('https://{}console.aws.amazon.com/states/home?region={}#/' \ - 'statemachines/view/{}?registered=true&stackId={}'.format( - url_region, region, machineArn, stackId)) - -def deploy(args): - """Deploys the CloudFormation stack based on args - - Args: - args: arguments passed in by argparse library - """ - _check_docker() - _resolve_s3(args.bucket_name, args.region) - tempdir = _package(args.config_file, args.key_name, args.jobs_config) - - print('Packaging up all dependencies, this can take a moment...') - - # package and deploy the cloudformation stack - try: - path_dir = os.path.dirname(os.path.realpath(__file__)) - path = os.path.join(path_dir, 'package.sh') - print(subprocess.check_output([path, tempdir, - args.bucket_name, args.stack_name, args.region, path_dir])) - except subprocess.CalledProcessError as e: - print(e.output) - sys.exit(1) - - _deeplink_url(args.region, args.stack_name) diff --git a/cli/stepfunctions/job_parallel.txt b/cli/stepfunctions/job_parallel.txt deleted file mode 100644 index 7e85a92cdf..0000000000 --- a/cli/stepfunctions/job_parallel.txt +++ /dev/null @@ -1,64 +0,0 @@ - "Parallel_Job_Execution": { - "Type": "Parallel", - "Branches": [ - {% for job in job_list %}{ - "StartAt": "Pass_Job_{{ job.name }}", - "States": { - "Pass_Job_{{ job.name }}": { - "Type": "Pass", - "Result": { - "name": "{{ job.name }}", - "handler": "{{ job.handler }}" - }, - "ResultPath": "$.job_info", - "Next": "Schedule_Job_{{ job.name }}" - }, - "Schedule_Job_{{ job.name }}": { - "Type": "Task", - "Resource": "${runJobArn}", - "Next": "Wait_For_Job_{{ job.name }}" - }, - "Wait_For_Job_{{ job.name }}": { - "Type": "Wait", - "Seconds": {{ job.sec }}, - "Next": "Poll_On_Job_{{ job.name }}" - }, - "Poll_On_Job_{{ job.name }}": { - "Type": "Task", - "Resource": "${pollOnJobArn}", - "Next": "{{ job.name }}_Poll_Choice" - }, - "{{ job.name }}_Poll_Choice": { - "Type": "Choice", - "Choices": [ - { - "Variable": "$.status", - "StringEquals": "idle", - "Next": "Wait_For_Job_{{ job.name }}" - }, - { - "Variable": "$.status", - "StringEquals": "failed", - "Next": "Job_Failed_{{ job.name }}" - }, - { - "Variable": "$.status", - "StringEquals": "complete", - "Next": "Job_Succeeded_{{ job.name }}" - } - ] - }, - "Job_Failed_{{ job.name }}": { - "Type": "Fail", - "Cause": "Exit code not 0" - }, - "Job_Succeeded_{{ job.name }}": { - "Type": "Pass", - "End": true - } - } - }{% if not loop.last %},{% endif %} - {% if not loop.last %} {% endif %}{% endfor -%} - ], - "Next": "Delete_CfnCluster" - } diff --git a/cli/stepfunctions/job_sequential.txt b/cli/stepfunctions/job_sequential.txt deleted file mode 100644 index e1ccbb0944..0000000000 --- a/cli/stepfunctions/job_sequential.txt +++ /dev/null @@ -1,53 +0,0 @@ - {% for job in job_list %}"Pass_Job_{{ job.name }}": { - "Type": "Pass", - "Result": { - "name": "{{ job.name }}", - "handler": "{{ job.handler }}" - }, - "ResultPath": "$.job_info", - "Next": "Schedule_Job_{{ job.name }}" - }, - "Schedule_Job_{{ job.name }}": { - "Type": "Task", - "Resource": "${runJobArn}", - "Next": "Wait_For_Job_{{ job.name }}" - }, - "Wait_For_Job_{{ job.name }}": { - "Type": "Wait", - "Seconds": {{ job.sec }}, - "Next": "Poll_On_Job_{{ job.name }}" - }, - "Poll_On_Job_{{ job.name }}": { - "Type": "Task", - "Resource": "${pollOnJobArn}", - "Next": "{{ job.name }}_Poll_Choice" - }, - "{{ job.name }}_Poll_Choice": { - "Type": "Choice", - "Choices": [ - { - "Variable": "$.status", - "StringEquals": "idle", - "Next": "Wait_For_Job_{{ job.name }}" - }, - { - "Variable": "$.status", - "StringEquals": "failed", - "Next": "Job_Failed_{{ job.name }}" - }, - { - "Variable": "$.status", - "StringEquals": "complete", - "Next": "Job_Succeeded_{{ job.name }}" - } - ] - }, - "Job_Failed_{{ job.name }}": { - "Type": "Fail", - "Cause": "Exit code not 0" - }, - "Job_Succeeded_{{ job.name }}": { - "Type": "Pass", - "Next": "{{ job.end }}" - }{% if not loop.last %},{% endif %} - {% endfor -%} diff --git a/cli/stepfunctions/jobs/example/sleep.sh b/cli/stepfunctions/jobs/example/sleep.sh deleted file mode 100755 index ff7c50e747..0000000000 --- a/cli/stepfunctions/jobs/example/sleep.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash -sleep 30 -exit 0 diff --git a/cli/stepfunctions/jobs/hello_world.sh b/cli/stepfunctions/jobs/hello_world.sh deleted file mode 100755 index 1e44da15e7..0000000000 --- a/cli/stepfunctions/jobs/hello_world.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash -echo 'Hello World' -exit 0 diff --git a/cli/stepfunctions/jobs/jobs.config b/cli/stepfunctions/jobs/jobs.config deleted file mode 100644 index 64d2e3a1f9..0000000000 --- a/cli/stepfunctions/jobs/jobs.config +++ /dev/null @@ -1,11 +0,0 @@ -[order] -sequential = hello, folder - -[job hello] -handler = hello_world.sh -local_path = hello_world.sh -wait_time = 30 - -[job folder] -handler = sleep.sh -local_path = example diff --git a/cli/stepfunctions/package.sh b/cli/stepfunctions/package.sh deleted file mode 100755 index d41fae06e7..0000000000 --- a/cli/stepfunctions/package.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash -docker build -t cfncluster-stepfunctions $5 -docker run -v $1:/var/package cfncluster-stepfunctions \ - pip install -r requirements-lambda.txt -t /var/package -aws cloudformation package \ - --template-file $1/template.yaml \ - --output-template-file $1/deploy.yaml \ - --s3-bucket $2 -aws cloudformation deploy \ - --template-file $1/deploy.yaml \ - --capabilities CAPABILITY_IAM \ - --stack-name $3 \ - --region $4 diff --git a/cli/stepfunctions/requirements-lambda.txt b/cli/stepfunctions/requirements-lambda.txt deleted file mode 100644 index 08674a5b20..0000000000 --- a/cli/stepfunctions/requirements-lambda.txt +++ /dev/null @@ -1,3 +0,0 @@ -cfncluster==1.5.2 -setuptools==39.2.0 -paramiko==2.4.1 diff --git a/cli/stepfunctions/src/constants.py b/cli/stepfunctions/src/constants.py deleted file mode 100644 index c2b21b75c2..0000000000 --- a/cli/stepfunctions/src/constants.py +++ /dev/null @@ -1,9 +0,0 @@ -# functions used for func arg in cfncluster cli -def create(): - pass - -def delete(): - pass - -def update(): - pass diff --git a/cli/stepfunctions/src/handlers.py b/cli/stepfunctions/src/handlers.py deleted file mode 100644 index 47bf4aa58d..0000000000 --- a/cli/stepfunctions/src/handlers.py +++ /dev/null @@ -1,231 +0,0 @@ -from StringIO import StringIO -import configparser -import logging -import os -import sys -import traceback - -from botocore.exceptions import ClientError -from cfncluster import cli, cfncluster -import boto3 -import paramiko - -import constants - -# set logger and log level -logger = logging.getLogger() -logger.setLevel(logging.INFO) - -idle = ['CREATE_IN_PROGRESS', 'UPDATE_IN_PROGRESS', 'REVIEW_IN_PROGRESS'] -complete = ['CREATE_COMPLETE', 'UPDATE_COMPLETE'] - - -class Args: - """Setup arguments to pass to cfncluster cli - - Initializes with all possible arguments that could be - passed into the cfncluster cli - """ - - config_file = 'config/cfncluster.config' - reset_desired = False - template_url = None - norollback = False - nowait = True - - def __init__(self, cluster_name, region, func): - self.cluster_name = cluster_name - self.region = region - self.func = func - - -class EC2_SSH: - """Creates a paramiko ssh client for EC2 instances - - Attributes: - ip: Master public IP address of EC2 instance - """ - - def __init__(self, ip, username, key): - self.ip = ip - self.username = username - self.key = key - - def __enter__(self): - try: - sm = boto3.client('secretsmanager') - secret = sm.get_secret_value(SecretId=self.key) - key_string = secret['SecretString'] - key_file = StringIO(key_string) - pkey = paramiko.RSAKey.from_private_key(key_file) - self.ssh = paramiko.SSHClient() - self.ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) - self.ssh.connect(self.ip, username=self.username, pkey=pkey) - except ClientError as e: - print(e.response.get('Error').get('Message')) - sys.exit(1) - return self.ssh - - def __exit__(self, exc_type, exc_value, tb): - if exc_type is not None: - traceback.print_exception(exc_type, exc_value, tb) - self.ssh.close() - - -class EC2_SFTP: - """Creates a paramiko sftp client for EC2 instances - - Attributes: - ip: Master public IP address of EC2 instance - """ - - def __init__(self, ip, username, key): - self.ip = ip - self.username = username - self.key = key - - def __enter__(self): - try: - sm = boto3.client('secretsmanager') - secret = sm.get_secret_value(SecretId=self.key) - key_string = secret['SecretString'] - key_file = StringIO(key_string) - pkey = paramiko.RSAKey.from_private_key(key_file) - self.transport = paramiko.Transport(self.ip) - self.transport.connect(username=self.username, pkey=pkey) - self.sftp = paramiko.SFTPClient.from_transport(self.transport) - except ClientError as e: - print(e.response.get('Error').get('Message')) - sys.exit(1) - return self.sftp - - def __exit__(self, exc_type, exc_value, tb): - if exc_type is not None: - traceback.print_exception(exc_type, exc_value, tb) - self.sftp.close() - self.transport.close() - - -def create_cfncluster(event, context): - """Handler for creating cfnclusters - - Args: - event: should contain 'cluster_name' attribute - """ - logging.info('event = {}\ncontext = {}'.format(event, context)) - - # variable check - if event.get('cluster_name') is None: - raise Exception('cluster_name not specified') - - config = configparser.ConfigParser() - config.readfp(open('config/cfncluster.config')) - event['key_name'] = config.get('cluster default', 'key_name') - - region = os.getenv('AWS_DEFAULT_REGION', 'us-east-1') - - # create/get ec2 key pair - try: - ec2 = boto3.client('ec2') - ec2.describe_key_pairs(KeyNames=[event['key_name']]) - sm = boto3.client('secretsmanager') - sm.describe_secret(SecretId=event['key_name']) - except ClientError as e: - if e.response.get('Error').get('Code') == 'InvalidKeyPair.NotFound': - try: - ec2 = boto3.client('ec2') - key = ec2.create_key_pair(KeyName=event['key_name']) - sm = boto3.client('secretsmanager') - sm.create_secret( - Name=event['key_name'], - SecretString=key['KeyMaterial'] - ) - except ClientError as e: - print(e.response.get('Error').get('Message')) - sys.exit(1) - else: - print(e.response.get('Error').get('Message')) - sys.exit(1) - - args = Args(event['cluster_name'], region, constants.create) - cli.create(args) - return event - -def is_cluster_ready(event, context): - """Handler for waiting on successful cfncluster deployment - - Args: - event: contains number of executions of this function - """ - logging.debug('event = {}\ncontext = {}'.format(event, context)) - - # variable check - if event.get('execution_count') is None: - event['execution_count'] = 0 - - # poll on cluster creation - stack = 'cfncluster-{}'.format(event['cluster_name']) - try: - cfn = boto3.resource('cloudformation') - stack = cfn.Stack(stack) - status = stack.stack_status - except ClientError as e: - print(e.response.get('Error').get('Message')) - sys.exit(1) - - logger.info('Poll {}: {}'.format(event['execution_count'], status)) - - if status in idle: - event['status'] = 'idle' - elif status in complete: - event['status'] = 'complete' - outputs = stack.outputs - parameters = stack.parameters - event['master_ip'] = filter( - lambda op: op['OutputKey'] == 'MasterPublicIP', outputs - )[0]['OutputValue'] - event['user_name'] = filter( - lambda op: op['OutputKey'] == 'ClusterUser', outputs - )[0]['OutputValue'] - event['scheduler'] = filter( - lambda param: param['ParameterKey'] == 'Scheduler', parameters - )[0]['ParameterValue'] - else: - logging.error(status) - event['status'] = 'failed' - - event['execution_count'] += 1 - - # give timeout if applicable - if event['execution_count'] == 15 and event['status'] == 'idle': - event['status'] = 'timeout' - - # make working temporary directory in master node - if event['status'] == 'complete': - master_ip = event['master_ip'] - user_name = event['user_name'] - key_name = event['key_name'] - with EC2_SSH(master_ip, user_name, key_name) as ssh_client: - command = 'mktemp -d -p /shared' - workdir = ssh_client.exec_command(command)[1].read().strip() - event['workdir'] = workdir - - return event - -def delete_cfncluster(event, context): - """Handler for deleting cfnclusters - - Args: - event: should contain 'cluster_name' attribute - """ - logging.debug('event = {}\ncontext = {}'.format(event, context)) - - # the output of parallel states is a list of outputs of all branches - is_list = isinstance(event, list) - name = event[0]['cluster_name'] if is_list else event['cluster_name'] - - region = os.getenv('AWS_DEFAULT_REGION', 'us-east-1') - args = Args(name, region, constants.delete) - cli.delete(args) - - return event diff --git a/cli/stepfunctions/src/jobs.py b/cli/stepfunctions/src/jobs.py deleted file mode 100644 index 32d36e953d..0000000000 --- a/cli/stepfunctions/src/jobs.py +++ /dev/null @@ -1,139 +0,0 @@ -import configparser -import logging -import os -import time -import zipfile - -from handlers import EC2_SSH, EC2_SFTP - -# set logger and log level -logger = logging.getLogger() -logger.setLevel(logging.INFO) - -class JobSchedulingException(Exception): - pass - -commands = { - 'schedule': { - 'sge': '. /opt/sge/default/common/settings.sh; cd {}; qsub {} |& grep \'submitted\' | awk \'{{print $3}}\'', - 'torque': 'cd {}; qsub {} |& awk -F. \'{{$0=$1}}1\'', - 'slurm': 'cd {}; sbatch {} |& awk \'{{print $4}}\'' - }, - 'poll': { - 'sge': '. /opt/sge/default/common/settings.sh; qstat | awk \'$1 == {} {{print $5}}\'', - 'torque': 'qstat -c | awk -F. \'$1 == {} {{print $0}}\' | awk \'{{print $5}}\'', - 'slurm': 'scontrol show job {} | grep JobState | awk \'{{print $1}}\' | awk -F= \'{{print $2}}\'' - }, - 'job_status': { - 'queued': { - 'sge': 'qw', - 'torque': 'Q', - 'slurm': 'PENDING' - }, - 'running': { - 'sge': 'r', - 'torque': 'R', - 'slurm': 'RUNNING' - } - }, - 'exit_code': { - 'sge': '. /opt/sge/default/common/settings.sh; qacct -j {} | grep exit_status | awk \'{{print $2}}\'', - 'torque': 'qstat -f {} | grep exit_status | awk \'{{print $3}}\'', - 'slurm': 'scontrol show job {} | grep ExitCode= | awk \'{{print $5}}\' | awk -F= \'{{print $2}}\' | awk -F: \'{{print $1}}\'' - } -} - -def run_job(event, context): - """Runs an example job - - Args: - event: contains ip for the master node of the cfncluster - """ - logging.debug('event = {}\ncontext = {}'.format(event, context)) - - job_name = event['job_info']['name'] - job_handler = event['job_info']['handler'] - scheduler = event['scheduler'] - - workdir = event['workdir'] - master_ip = event['master_ip'] - user_name = event['user_name'] - key_name = event['key_name'] - - # package job - zip_name = '{}.zip'.format(job_name) - zip_path = os.path.join('/tmp', zip_name) - zip_file = zipfile.ZipFile(zip_path, 'w') - for root, dirs, files in os.walk(os.path.join('jobs', job_name)): - for file in files: - local_path = os.path.join(root, file) - remote_path = os.path.join(root[5:], file) - zip_file.write(local_path, remote_path) - zip_file.close() - - # upload job via sftp - with EC2_SFTP(master_ip, user_name, key_name) as sftp_client: - sftp_client.chdir(workdir) - sftp_client.put(zip_path, zip_name) - - # schedule job - with EC2_SSH(master_ip, user_name, key_name) as ssh_client: - zip_path = os.path.join(workdir, job_name) - command = 'unzip {}.zip -d {}'.format(zip_path, workdir) - output = ssh_client.exec_command(command)[1].read().strip() - - command = commands['schedule'][scheduler] - command = command.format(os.path.join(workdir, job_name), job_handler) - logging.info(command) - schedule = ssh_client.exec_command(command) - job_id = schedule[1].read().strip() - logging.info(schedule[1]) - logging.info(schedule[1].read()) - - # handle errors - if job_id == '': - message = 'Job {} failed to schedule'.format(job_name) - raise JobSchedulingException(message) - - event['job_id'] = job_id - return event - -def is_job_done(event, context): - """Determines whether the job is complete - - Args: - event: contains job id to check whether complete - """ - logging.debug('event = {}\ncontext = {}'.format(event, context)) - - scheduler = event['scheduler'] - master_ip = event['master_ip'] - user_name = event['user_name'] - key_name = event['key_name'] - - # check job status - with EC2_SSH(master_ip, user_name, key_name) as ssh_client: - command = commands['poll'][scheduler].format(event['job_id']) - status = ssh_client.exec_command(command)[1].read().strip() - - queued = commands['job_status']['queued'][scheduler] - running = commands['job_status']['running'][scheduler] - - if status == queued or status == running: - event['status'] = 'idle' - else: - command = commands['exit_code'][scheduler].format(event['job_id']) - - # attempt to wait for job journaling - t_end = time.time() + 30 - while time.time() < t_end: - try: - code = ssh_client.exec_command(command)[1].read().strip() - code = int(code) - break - except ValueError: - time.sleep(1) - - event['status'] = 'complete' if code == 0 else 'failed' - - return event diff --git a/cli/stepfunctions/templates/template.yaml b/cli/stepfunctions/templates/template.yaml deleted file mode 100644 index 6518041d8e..0000000000 --- a/cli/stepfunctions/templates/template.yaml +++ /dev/null @@ -1,348 +0,0 @@ -AWSTemplateFormatVersion: '2010-09-09' -Description: 'AWS CloudFormation template for Step Function Integration' -Resources: - CfnClusterLambdaRole: - Type: 'AWS::IAM::Role' - Properties: - AssumeRolePolicyDocument: - Version: '2012-10-17' - Statement: - - Effect: 'Allow' - Action: 'sts:AssumeRole' - Principal: - Service: 'lambda.amazonaws.com' - ManagedPolicyArns: - - 'arn:aws:iam::aws:policy/AWSLambdaFullAccess' - Path: '/' - Policies: - - PolicyName: 'SecretManagerPolicy' - PolicyDocument: - Version: '2012-10-17' - Statement: - - Resource: '*' - Effect: 'Allow' - Action: - - 'ec2:CreateKeyPair' - - 'secretsmanager:CreateSecret' - - 'secretsmanager:DescribeSecret' - - 'secretsmanager:GetSecretValue' - CfnClusterStateMachineRole: - Type: 'AWS::IAM::Role' - Properties: - AssumeRolePolicyDocument: - Version: '2012-10-17' - Statement: - - Effect: 'Allow' - Action: 'sts:AssumeRole' - Principal: - Service: - - !Sub 'states.${AWS::Region}.amazonaws.com' - Path: '/' - Policies: - - PolicyName: 'StateExecutionPolicy' - PolicyDocument: - Version: '2012-10-17' - Statement: - - Resource: '*' - Effect: 'Allow' - Action: 'lambda:InvokeFunction' - CfnClusterUserPolicy: - Type: 'AWS::IAM::Policy' - Properties: - PolicyName: 'CfnClusterUserPolicy' - PolicyDocument: - Version: '2012-10-17' - Statement: - - Sid: 'EC2Describe' - Resource: '*' - Effect: 'Allow' - Action: - - 'ec2:DescribeKeyPairs' - - 'ec2:DescribeVpcs' - - 'ec2:DescribeSubnets' - - 'ec2:DescribeSecurityGroups' - - 'ec2:DescribePlacementGroups' - - 'ec2:DescribeImages' - - 'ec2:DescribeInstances' - - 'ec2:DescribeSnapshots' - - 'ec2:DescribeVolumes' - - 'ec2:DescribeVpcAttribute' - - 'ec2:DescribeAddresses' - - 'ec2:CreateTags' - - 'ec2:DescribeNetworkInterfaces' - - 'ec2:DescribeAvailabilityZones' - - Sid: 'EC2Modify' - Resource: '*' - Effect: 'Allow' - Action: - - 'ec2:CreateVolume' - - 'ec2:RunInstances' - - 'ec2:AllocateAddress' - - 'ec2:AssociateAddress' - - 'ec2:AttachNetworkInterface' - - 'ec2:AuthorizeSecurityGroupEgress' - - 'ec2:AuthorizeSecurityGroupIngress' - - 'ec2:CreateNetworkInterface' - - 'ec2:CreateSecurityGroup' - - 'ec2:ModifyVolumeAttribute' - - 'ec2:ModifyNetworkInterfaceAttribute' - - 'ec2:DeleteNetworkInterface' - - 'ec2:DeleteVolume' - - 'ec2:TerminateInstances' - - 'ec2:DeleteSecurityGroup' - - 'ec2:DisassociateAddress' - - 'ec2:RevokeSecurityGroupIngress' - - 'ec2:ReleaseAddress' - - Sid: 'AutoScalingDescribe' - Resource: '*' - Effect: 'Allow' - Action: - - 'autoscaling:DescribeAutoScalingGroups' - - 'autoscaling:DescribeLaunchConfigurations' - - 'autoscaling:DescribeAutoScalingInstances' - - Sid: 'AutoScalingModify' - Resource: '*' - Effect: 'Allow' - Action: - - 'autoscaling:CreateAutoScalingGroup' - - 'autoscaling:CreateLaunchConfiguration' - - 'autoscaling:PutNotificationConfiguration' - - 'autoscaling:UpdateAutoScalingGroup' - - 'autoscaling:PutScalingPolicy' - - 'autoscaling:DeleteLaunchConfiguration' - - 'autoscaling:DescribeScalingActivities' - - 'autoscaling:DeleteAutoScalingGroup' - - 'autoscaling:DeletePolicy' - - Sid: 'DynamoDBDescribe' - Resource: '*' - Effect: 'Allow' - Action: 'dynamodb:DescribeTable' - - Sid: 'DynamoDBModify' - Resource: '*' - Effect: 'Allow' - Action: - - 'dynamodb:CreateTable' - - 'dynamodb:DeleteTable' - - Sid: 'CloudWatchModify' - Resource: '*' - Effect: 'Allow' - Action: - - 'cloudwatch:PutMetricAlarm' - - 'cloudwatch:DeleteAlarms' - - Sid: 'SQSDescribe' - Resource: '*' - Effect: 'Allow' - Action: 'sqs:GetQueueAttributes' - - Sid: 'SQSModify' - Resource: '*' - Effect: 'Allow' - Action: - - 'sqs:CreateQueue' - - 'sqs:SetQueueAttributes' - - 'sqs:DeleteQueue' - - Sid: 'SNSDescribe' - Resource: '*' - Effect: 'Allow' - Action: - - 'sns:ListTopics' - - 'sns:GetTopicAttributes' - - Sid: 'SNSModify' - Resource: '*' - Effect: 'Allow' - Action: - - 'sns:CreateTopic' - - 'sns:Subscribe' - - 'sns:DeleteTopic' - - Sid: 'CloudFormationDescribe' - Resource: '*' - Effect: 'Allow' - Action: - - 'cloudformation:DescribeStackEvents' - - 'cloudformation:DescribeStackResources' - - 'cloudformation:DescribeStacks' - - 'cloudformation:ListStacks' - - Sid: 'CloudFormationModify' - Resource: '*' - Effect: 'Allow' - Action: - - 'cloudformation:CreateStack' - - 'cloudformation:DeleteStack' - - 'cloudformation:UpdateStack' - - Sid: 'S3CfnClusterReadOnly' - Resource: !Sub 'arn:aws:s3:::${AWS::Region}-cfncluster*' - Effect: 'Allow' - Action: - - 's3:Get*' - - 's3:List*' - - Sid: 'IAMModify' - Resource: !Sub 'arn:aws:iam::${AWS::AccountId}:role/*' - Effect: 'Allow' - Action: - - 'iam:PassRole' - - 'iam:CreateRole' - - 'iam:DeleteRole' - - Sid: 'IAMInstanceProfile' - Resource: !Sub 'arn:aws:iam::${AWS::AccountId}:instance-profile/*' - Effect: 'Allow' - Action: - - 'iam:CreateInstanceProfile' - - 'iam:DeleteInstanceProfile' - - Sid: 'IAMAddRoleToProfile' - Resource: '*' - Effect: 'Allow' - Action: - - 'iam:AddRoleToInstanceProfile' - - 'iam:RemoveRoleFromInstanceProfile' - - 'iam:PutRolePolicy' - - 'iam:DeleteRolePolicy' - Roles: - - Ref: 'CfnClusterLambdaRole' - CreateCfnCluster: - Type: 'AWS::Lambda::Function' - Properties: - Description: 'Creates a cluster' - FunctionName: 'CreateCfnCluster' - Handler: 'handlers.create_cfncluster' - MemorySize: 1536 - Role: !GetAtt [ CfnClusterLambdaRole, Arn ] - Runtime: 'python2.7' - Timeout: 300 - PollOnCluster: - Type: 'AWS::Lambda::Function' - Properties: - Description: 'Polls on creation of a cluster' - FunctionName: 'PollOnCluster' - Handler: 'handlers.is_cluster_ready' - MemorySize: 1536 - Role: !GetAtt [ CfnClusterLambdaRole, Arn ] - Runtime: 'python2.7' - Timeout: 300 - ScheduleJob: - Type: 'AWS::Lambda::Function' - Properties: - Description: 'Schedules the next job' - FunctionName: 'ScheduleJob' - Handler: 'jobs.run_job' - MemorySize: 1536 - Role: !GetAtt [ CfnClusterLambdaRole, Arn ] - Runtime: 'python2.7' - Timeout: 300 - PollOnJob: - Type: 'AWS::Lambda::Function' - Properties: - Description: 'Polls on status of jobs' - FunctionName: 'PollOnJob' - Handler: 'jobs.is_job_done' - MemorySize: 1536 - Role: !GetAtt [ CfnClusterLambdaRole, Arn ] - Runtime: 'python2.7' - Timeout: 300 - RunJob: - Type: 'AWS::Lambda::Function' - Properties: - Description: 'Runs the next job' - FunctionName: 'RunJob' - Handler: 'jobs.run_job' - MemorySize: 1536 - Role: !GetAtt [ CfnClusterLambdaRole, Arn ] - Runtime: 'python2.7' - Timeout: 300 - DeleteCluster: - Type: 'AWS::Lambda::Function' - Properties: - Description: 'Deletes a cluster' - FunctionName: 'DeleteCfnCluster' - Handler: 'handlers.delete_cfncluster' - MemorySize: 1536 - Role: !GetAtt [ CfnClusterLambdaRole, Arn ] - Runtime: 'python2.7' - Timeout: 300 - CfnClusterStateMachine: - Type: 'AWS::StepFunctions::StateMachine' - Properties: - StateMachineName: 'CfnClusterStateMachine' - DefinitionString: - !Sub - - |- - { - "Comment": "CfnCluster Step Function", - "StartAt": "Create_CfnCluster", - "States": { - "Create_CfnCluster": { - "Type": "Task", - "Resource": "${createClusterArn}", - "Next": "Wait_For_Cluster" - }, - "Wait_For_Cluster": { - "Type": "Wait", - "Seconds": 120, - "Next": "Poll_On_Cluster" - }, - "Poll_On_Cluster": { - "Type": "Task", - "Resource": "${pollOnClusterArn}", - "Next": "Poll_Choice" - }, - "Poll_Choice": { - "Type": "Choice", - "Choices": [ - { - "Variable": "$.status", - "StringEquals": "idle", - "Next": "Wait_For_Cluster" - }, - { - "Variable": "$.status", - "StringEquals": "complete", - "Next": "Creation_Success" - }, - { - "Variable": "$.status", - "StringEquals": "failed", - "Next": "Creation_Failed" - }, - { - "Variable": "$.status", - "StringEquals": "timeout", - "Next": "Cluster_Timeout" - } - ] - }, - "Creation_Failed": { - "Type": "Fail", - "Cause": "Cluster failed to create" - }, - "Cluster_Timeout": { - "Type": "Fail", - "Cause": "Cluster creation timed out" - }, - "Creation_Success": { - "Type": "Pass", - "Next": "{{ entry }}" - }, -{{ jobs }}, - "Delete_CfnCluster": { - "Type": "Task", - "Resource": "${deleteClusterArn}", - "End": true - } - } - } - - { - createClusterArn: !GetAtt [ CreateCfnCluster, Arn ], - pollOnClusterArn: !GetAtt [ PollOnCluster, Arn ], - deleteClusterArn: !GetAtt [ DeleteCluster, Arn ], - pollOnJobArn: !GetAtt [ PollOnJob, Arn ], - runJobArn: !GetAtt [ RunJob, Arn ] - } - RoleArn: !GetAtt [ CfnClusterStateMachineRole, Arn ] -Outputs: - StateMachineArn: - Value: - Ref: CfnClusterStateMachine - ExecutionInput: - Description: 'Sample Input to start execution' - Value: |- - { - "cluster_name": "cfnclusterstepfunction" - } From c9659ecd3845cb09551b4ced72255be1d9cde7a1 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Tue, 28 Aug 2018 10:32:43 -0700 Subject: [PATCH 19/31] Revert "Update html documentation to include step functions" This reverts commit 46ccc5b0aacf9f8f1673d6392e24aaa32690fc0e. Signed-off-by: Sean Smith --- cli/stepfunctions/README.md | 24 +++---- docs/source/commands.rst | 33 +--------- docs/source/index.rst | 1 - docs/source/stepfunctions.rst | 118 ---------------------------------- 4 files changed, 14 insertions(+), 162 deletions(-) delete mode 100644 docs/source/stepfunctions.rst diff --git a/cli/stepfunctions/README.md b/cli/stepfunctions/README.md index 1a8166768d..8de2a5c9d0 100644 --- a/cli/stepfunctions/README.md +++ b/cli/stepfunctions/README.md @@ -11,7 +11,8 @@ CfnCluster Step Function is a state management solution for deploying high-perfo * Visit the [AWS Documentation](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-getting-started.html) for more information ``` -$ cfncluster stepfunctions --bucket --region --config --jobs +$ pip install -r requirements.txt +$ ./deploy.py --bucket --region --config --jobs ``` To Run Step Function: @@ -102,7 +103,7 @@ handler = script/path/in/project.sh Optional Parameters: -`wait_time`: Period between polling on the status of the job in seconds; default = 10; range 1-240 due to scheduler limitations +`wait_time`: How long to wait between rechecking the status of the job to see if it's completed; default = 10; range 1-240 due to scheduler limitations ``` [job donut] @@ -113,6 +114,10 @@ wait_time = 240 ## Arguments +### `--config` or `-c` + +Specifies the CfnCluster configuration file to use. This will be utilized by the step function to deploy user defined clusters. For more information on how to configure CfnCluster visit the [CfnCluster Documentation](http://cfncluster.readthedocs.io/en/latest/getting_started.html#configuring-cfncluster). + ### `--bucket` or `-b` Specifies the name of the S3 bucket to be used to store the source code that creates and terminates the CfnClusters. **Important**: if the bucket already exists, it must be in the same region as that given by the --region argument. If it does not exist, it will be made for you in the specified region. @@ -123,10 +128,6 @@ Specifies the job configuration file to use. This will be used to package your j ## Optional Arguments -### `--config` or `-c` - -Specifies the CfnCluster configuration file to use. This will be utilized by the step function to deploy user defined clusters. For more information on how to configure CfnCluster visit the [CfnCluster Documentation](http://cfncluster.readthedocs.io/en/latest/getting_started.html#configuring-cfncluster). - ### `--region` or `-r` Specifies the AWS region to deploy the CloudFormation stack that contains the Step Function and corresponding source code to deploy and terminate CfnClusters. Defaults to us-east-1. @@ -146,19 +147,18 @@ Specifies the name of the EC2 key pair to use for the CfnCluster master node. ** Prints the help menu and usage to standard output. ``` -usage: cfncluster stepfunctions [-h] --bucket BUCKET_NAME - [--config CONFIG_FILE] --jobs JOBS_CONFIG - [--stack-name STACK_NAME] [--region REGION] - [--key-name KEY_NAME] +usage: deploy.py [-h] --bucket BUCKET_NAME --config CONFIG_FILE --jobs + JOBS_CONFIG [--stack-name STACK_NAME] [--region REGION] + [--key-name KEY_NAME] -deploy a cfncluster stepfunction via cloudformation +Deploys CfnCluster Step Function optional arguments: -h, --help show this help message and exit --bucket BUCKET_NAME, -b BUCKET_NAME Specify s3 bucket to use/create --config CONFIG_FILE, -c CONFIG_FILE - Specify cfncluster config file to use + Specify config file to use --jobs JOBS_CONFIG, -j JOBS_CONFIG Specify jobs config file to use --stack-name STACK_NAME, -s STACK_NAME diff --git a/docs/source/commands.rst b/docs/source/commands.rst index 89e48fb46f..6fb32f7f11 100644 --- a/docs/source/commands.rst +++ b/docs/source/commands.rst @@ -178,36 +178,7 @@ optional arguments: :: - $ cfncluster ssh mycluster -i ~/.ssh/id_rsa -v - -stepfunctions -============= - -Creates a step function that automatically creates a cluster, runs user specified jobs, and tears the cluster down. - -For example: - cfncluster stepfunctions -b s3bucket -j path/to/jobs.config - -This uses the bucket name s3bucket, or creates it if it doesn't exist, to store the lambda source code and user specified jobs. To see how jobs in the ``jobs.config`` file should be specified, see `Job Config `_. - -arguments: - -h, --help show this help message and exit - --jobs JOBS_CONFIG, -j JOBS_CONFIG - specify jobs config file to use (REQUIRED) - --bucket BUCKET_NAME, -b BUCKET_NAME - specify s3 bucket to use/create (REQUIRED) - --config CONFIG_FILE, -c CONFIG_FILE - specify an alternative config file (default: ~/.cfncluster/config) - --region REGION, -r REGION - specify a region to deploy in (default: us-east-1) - --stack-name STACK_NAME, -s STACK_NAME - specify the stack name to use (default: CfnClusterStepFunction) - --key-name KEY_NAME, -k KEY_NAME - specify the ec2 key pair (default: cfncluster-stepfunctions) - -:: - - $ cfncluster stepfunctions -b s3bucket -j path/to/jobs.config + $cfncluster ssh mycluster -i ~/.ssh/id_rsa -v status ====== @@ -228,7 +199,7 @@ optional arguments: :: - $ cfncluster status mycluster + $cfncluster status mycluster list ==== diff --git a/docs/source/index.rst b/docs/source/index.rst index dd14504331..c5ee3de911 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -18,7 +18,6 @@ CfnCluster ("cloud formation cluster") is a framework that deploys and maintains functional tutorials development - stepfunctions Getting Started --------------- diff --git a/docs/source/stepfunctions.rst b/docs/source/stepfunctions.rst deleted file mode 100644 index 78ce15bd61..0000000000 --- a/docs/source/stepfunctions.rst +++ /dev/null @@ -1,118 +0,0 @@ -.. _stepfunctions: - -CfnCluster Stepfunctions -######################## - -Why Stepfunctions -================= - -* Allows for complex workflows with CfnCluster -* Handles cluster creation, teardown, and updates -* Useful for conditional automated job execution -* Interfaces with other AWS services - -.. image:: https://s3.amazonaws.com/global-cfncluster/doc-images/parallel_job.gif - -Getting Started -=============== - -.. image:: https://s3.amazonaws.com/global-cfncluster/doc-images/command_start.gif - -1. Configure CfnCluster configuration file with ``cfncluster configure`` or manually -2. Collect jobs that you would like CfnCluster Step Functions to schedule -3. Configure jobs configuration file using the following `guide <#jobs-configuration-guide>`_ -4. Deploy a Step Function using ``cfncluster stepfunctions``; see `here `_ -5. Navigate to the Step Function using the deeplink given from the command -6. Click Start Execution and provide a cluster name via JSON execution input - -:: - - { - "cluster_name": "cfnclusterstepfunctions" - } - - -.. image:: https://s3.amazonaws.com/global-cfncluster/doc-images/command_end.gif -.. image:: https://s3.amazonaws.com/global-cfncluster/doc-images/JSON.gif - -Jobs Config -=========== - -:: - - [order] - sequential = job1, banana, job2 - - [job job1] - handler = src/script.sh - s3_uri = s3://bucket-to-use/folder/path/to/project - - [job job2] - handler = is-this-even-a-job - local_path = /path/to/the/job/is-this-even-a-job - - [job banana] - handler = long-running-script.sh - s3_uri = s3://bucket-to-use/folder/path/to/project - wait_time = 240 - -Sections Options: - ``[order]`` required parameters: - * ``sequential``: List of job names to schedule sequentially given in the form of a comma separated list; order matters - - :: - - [order] - sequential = firstjob, secondjob, thirdjob - - OR - - * ``parallel``: List of job names to schedule in parallel given in the form of a comma separated list; order does not matter - - :: - - [order] - parallel = paralleljob1, paralleljob2, otherjob - - **IMPORTANT**: either ``sequential`` or ``parallel`` must be specified; not both - - ``[job ]`` required parameters: - * ``s3_uri``: An S3 URI pointing to the script or folder to pacakge for job scheduling and execution - - :: - - [job apple] - s3_uri = s3://thebucket/thefolder - handler = thescript - - OR - - * ``local_path``: A local path (relative to the jobs config file or absolute) pointing to the script or folder for job scheduling and execution - - :: - - [job banana] - local_path = /path/to/the/script - handler = script - - AND - - * ``handler``: The path and name of the script to run. Since the ``s3_uri`` and ``local_path`` can both be directories, this is to specify which file to send off to the scheduler - - :: - - [job carrot] - local_path = relative/path/project - handler = script/path/in/project.sh - - **IMPORTANT**: either ``s3_uri`` or ``local_path`` must be specified; not both - - ``[job ]`` optional parameters: - * ``wait_time``: Period between polling on the status of the job in seconds; default = 10; range 1-240 due to scheduler limitations - - :: - - [job danish] - s3_uri = s3://bucket/script - handler = script - wait_time = 240 From dbd30fdc4721f6714472d95f34f8a40c057aaff5 Mon Sep 17 00:00:00 2001 From: Balaji Sridharan Date: Tue, 28 Aug 2018 13:32:52 -0700 Subject: [PATCH 20/31] Fix the version of boto3 in order to prevent the unittests from failing As boto3 was upgraded to 1.8.x we ran into the following issue: https://github.com/spulec/moto/issues/1796 (moto is used for unittests) As a fix, we decided to pin the version of boto3 to 1.7.84 which we knew to work. Signed-off-by: Balaji Sridharan --- cli/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/requirements.txt b/cli/requirements.txt index 648439aaa2..deae808fcb 100644 --- a/cli/requirements.txt +++ b/cli/requirements.txt @@ -1,2 +1,2 @@ -boto3>=1.7.33 +boto3==1.7.84 moto>=1.3.3 From a7cc019e0aaf570fc44f0373844715df7880d91a Mon Sep 17 00:00:00 2001 From: Balaji Sridharan Date: Tue, 28 Aug 2018 15:18:48 -0700 Subject: [PATCH 21/31] Fix versions of boto3 and awscli in setup.py Signed-off-by: Balaji Sridharan --- .travis.yml | 6 ++++-- cli/requirements.txt | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 8d6c95434a..65733f6d34 100644 --- a/.travis.yml +++ b/.travis.yml @@ -24,7 +24,9 @@ install: sudo: false script: - - if [[ $TRAVIS_PYTHON_VERSION != 2.6 ]]; - then python cli/tests/cfncluster-unittest.py; + - if [[ $TRAVIS_PYTHON_VERSION != 2.6 ]]; then + pip install 'boto3==1.7.84' --force-reinstall; + python cli/tests/cfncluster-unittest.py; + pip install boto3 --upgrade fi - sh tests/test.sh diff --git a/cli/requirements.txt b/cli/requirements.txt index deae808fcb..648439aaa2 100644 --- a/cli/requirements.txt +++ b/cli/requirements.txt @@ -1,2 +1,2 @@ -boto3==1.7.84 +boto3>=1.7.33 moto>=1.3.3 From 08fbab580d3dd2a945879fce02fe336e22140c1f Mon Sep 17 00:00:00 2001 From: Balaji Sridharan Date: Tue, 28 Aug 2018 16:40:21 -0700 Subject: [PATCH 22/31] Modify travis.yml file to force install a version of boto3 before the tests and upgrade the version after Instead of pinning the boto3 version for the entire test suite and the version of awscli for cfncluster, just install the version of boto3 that works before the unit test and upgrade it later. Signed-off-by: Balaji Sridharan --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 65733f6d34..0b052315f7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -27,6 +27,6 @@ script: - if [[ $TRAVIS_PYTHON_VERSION != 2.6 ]]; then pip install 'boto3==1.7.84' --force-reinstall; python cli/tests/cfncluster-unittest.py; - pip install boto3 --upgrade + pip install boto3 --upgrade; fi - sh tests/test.sh From 369e86c6394cae389c3a5743c5baba0497a581c4 Mon Sep 17 00:00:00 2001 From: Balaji Sridharan Date: Wed, 29 Aug 2018 10:44:10 -0700 Subject: [PATCH 23/31] Make changes to requirements file and the unit tests Remove moto from requirements and install them only before tests in travis yml Remove moto from requirements26 Signed-off-by: Balaji Sridharan --- .travis.yml | 1 + cli/requirements.txt | 1 - cli/requirements26.txt | 3 +-- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 0b052315f7..37e0c314ad 100644 --- a/.travis.yml +++ b/.travis.yml @@ -26,6 +26,7 @@ sudo: false script: - if [[ $TRAVIS_PYTHON_VERSION != 2.6 ]]; then pip install 'boto3==1.7.84' --force-reinstall; + pip install moto python cli/tests/cfncluster-unittest.py; pip install boto3 --upgrade; fi diff --git a/cli/requirements.txt b/cli/requirements.txt index 648439aaa2..57d3af4c34 100644 --- a/cli/requirements.txt +++ b/cli/requirements.txt @@ -1,2 +1 @@ boto3>=1.7.33 -moto>=1.3.3 diff --git a/cli/requirements26.txt b/cli/requirements26.txt index c4b36252af..155bd149c6 100644 --- a/cli/requirements26.txt +++ b/cli/requirements26.txt @@ -1,3 +1,2 @@ boto3>=1.7.33 -argparse>=1.1 -moto>=1.3.3 \ No newline at end of file +argparse>=1.1 \ No newline at end of file From 7dfd1e1c82e3010e25948d7d39e403fd83ffc7a7 Mon Sep 17 00:00:00 2001 From: CfnCluster AMI bot Date: Tue, 28 Aug 2018 22:28:26 +0000 Subject: [PATCH 24/31] Update AMI list Signed-off-by: CfnCluster AMI bot --- amis.txt | 163 ++++++++++++++-------------- cloudformation/cfncluster.cfn.json | 165 ++++++++++++++--------------- 2 files changed, 160 insertions(+), 168 deletions(-) diff --git a/amis.txt b/amis.txt index a54288c050..6645ff1fd1 100644 --- a/amis.txt +++ b/amis.txt @@ -1,88 +1,85 @@ # alinux -ap-northeast-1: ami-05468a90639fdc483 -ap-northeast-2: ami-0b4d5a4a999581db1 -ap-northeast-3: ami-0594d5d373771f10a -ap-south-1: ami-0ec7be770210af7c0 -ap-southeast-1: ami-02181eb4bbe285f28 -ap-southeast-2: ami-03199596cd474bb05 -ca-central-1: ami-099a5f4b65d56215e -eu-central-1: ami-00e39ce170dbd6e25 -eu-west-1: ami-059b3db381c18305a -eu-west-2: ami-0df738bfb4cf98f4e -eu-west-3: ami-0b1472bf5fed029bd -sa-east-1: ami-0a390e1998fb6b22f -us-east-1: ami-0fd6854902340b407 -us-east-2: ami-04fc90a3f60cb3dc7 -us-gov-west-1: ami-af2cbece -us-west-1: ami-0054acc8361046ccc -us-west-2: ami-01382f63e2667a7d1 +ap-northeast-1: ami-00047986f877e6760 +ap-northeast-2: ami-0ca5afbc9d0b20f3d +ap-northeast-3: ami-0b3af1c2fe35b1ae4 +ap-south-1: ami-0250a8315b099785d +ap-southeast-1: ami-038312aa862774599 +ap-southeast-2: ami-0dce078cd2c343a5c +ca-central-1: ami-04e27bd54e171fbad +eu-central-1: ami-075bf1c36cf6b855f +eu-west-1: ami-08d507a759b9ee0e4 +eu-west-2: ami-03ddfb1462e2c92fd +eu-west-3: ami-0e72015cf2b21159f +sa-east-1: ami-02bd755bfe8704cef +us-east-1: ami-07f61c3e18538c70c +us-east-2: ami-096b7925b1f9c8cbb +us-west-1: ami-04794da830cb9dc86 +us-west-2: ami-03f86a2920360dfbe # centos6 -ap-northeast-1: ami-0603daa665c23cf74 -ap-northeast-2: ami-06ddec14c10c7de14 -ap-northeast-3: ami-0e718e0794e781ead -ap-south-1: ami-043871a624a63c4b3 -ap-southeast-1: ami-07df16800a8f39379 -ap-southeast-2: ami-09b23d993fbc703d9 -ca-central-1: ami-0af06655a7341c2b5 -eu-central-1: ami-0362d5eb8ec477a30 -eu-west-1: ami-04c68c9303ee0165c -eu-west-2: ami-0d3a080fd047d6f03 -eu-west-3: ami-0ce8fcafcd4cb66ba -sa-east-1: ami-010873231de6443db -us-east-1: ami-0051f4828527649d0 -us-east-2: ami-0a585da16a01304bb -us-west-1: ami-0fcbc3afb66cfcffd -us-west-2: ami-0430c74956ce8e41a +ap-northeast-1: ami-00c9ad360d255b462 +ap-northeast-2: ami-08e84ffe9aead7873 +ap-northeast-3: ami-056531169587ffc23 +ap-south-1: ami-03a57d339b58b2f23 +ap-southeast-1: ami-00063e5635939c9d1 +ap-southeast-2: ami-015e730525001bf53 +ca-central-1: ami-0fea713db092ac77f +eu-central-1: ami-0f6b91d011f44aaf6 +eu-west-1: ami-029accac823a2a844 +eu-west-2: ami-032b6b895dab544c6 +eu-west-3: ami-039be4bd46c878e2c +sa-east-1: ami-0e507bdec09b8a091 +us-east-1: ami-091bbc83b27f92634 +us-east-2: ami-0fa89e68b0222f50b +us-west-1: ami-06f3909be633646b9 +us-west-2: ami-087d4f29315fd19d2 # centos7 -ap-northeast-1: ami-03e99430742e7d834 -ap-northeast-2: ami-0a3ad3da5f45318a4 -ap-northeast-3: ami-0804a0d2dcf1e675f -ap-south-1: ami-06b1a64ab30690802 -ap-southeast-1: ami-06eec4db7a3965f04 -ap-southeast-2: ami-0ab1def1c27a54ada -ca-central-1: ami-089b3d6d4ef8c5772 -eu-central-1: ami-0842b79de396801b5 -eu-west-1: ami-019ecc5629a990951 -eu-west-2: ami-03e914c8e1b836bcf -eu-west-3: ami-099b3883ce5e219b3 -sa-east-1: ami-0edbfab1cbd73ac82 -us-east-1: ami-00976a3c38aaac1a2 -us-east-2: ami-0f7dbb8d94e96b0ab -us-west-1: ami-0fae379328b9061bc -us-west-2: ami-0df85f5bc2d980e57 +ap-northeast-1: ami-09dc3b175ed2905f8 +ap-northeast-2: ami-03f494fed9f0f9c98 +ap-northeast-3: ami-04094dd3b4013b75f +ap-south-1: ami-0c20297a2c00b0528 +ap-southeast-1: ami-0f610508513f2012f +ap-southeast-2: ami-09d0b89439c0c1b65 +ca-central-1: ami-0362a3ef26ab5dfce +eu-central-1: ami-03d5519f5f4e7619f +eu-west-1: ami-0575304add4986551 +eu-west-2: ami-08263362b21b98916 +eu-west-3: ami-0befb803ea7ce1b83 +sa-east-1: ami-0860c11596f954bbc +us-east-1: ami-06c93be112f880acd +us-east-2: ami-0493261a1b2adfd27 +us-west-1: ami-0af95249ebb8de7a5 +us-west-2: ami-0a259cf8220fabdcb # ubuntu1404 -ap-northeast-1: ami-068214c5cfb518070 -ap-northeast-2: ami-0d55e5524b8536518 -ap-northeast-3: ami-0ad3400658fbc51fa -ap-south-1: ami-0b54a1bd3d82929e6 -ap-southeast-1: ami-0d3382bea35764ad2 -ap-southeast-2: ami-0a2a8aa487ce39e8c -ca-central-1: ami-0bc510154aa82211d -eu-central-1: ami-0174c3f5501fc441b -eu-west-1: ami-01848c6ae1a9c6460 -eu-west-2: ami-0b867b5579d0f7a08 -eu-west-3: ami-0e8f93d19635c5d49 -sa-east-1: ami-048422e42509a1596 -us-east-1: ami-0ba2fcbf9fa378762 -us-east-2: ami-05aea9ea89d577194 -us-gov-west-1: ami-4aca572b -us-west-1: ami-0089ace30f2612d13 -us-west-2: ami-06142a55749b7912b +ap-northeast-1: ami-09dc9e9da730c0248 +ap-northeast-2: ami-06346bcc8b825a458 +ap-northeast-3: ami-0ae6e02016436404a +ap-south-1: ami-0e4294a5b9d5de425 +ap-southeast-1: ami-01e6359021e3bac23 +ap-southeast-2: ami-0cfebd51a3470f1d4 +ca-central-1: ami-0097472502a04776d +eu-central-1: ami-0a82ac2872821deef +eu-west-1: ami-0d6c90f092266b787 +eu-west-2: ami-0f23dffc4db122b53 +eu-west-3: ami-02b6ca8fd14f58265 +sa-east-1: ami-04e943b561cd32659 +us-east-1: ami-0920fa86d5a85a07a +us-east-2: ami-0d63a602f26709e70 +us-west-1: ami-06fe4f837d8715dbb +us-west-2: ami-01170828a21d86a72 # ubuntu1604 -ap-northeast-1: ami-03f6879ede5f51a6f -ap-northeast-2: ami-0036ef722f2d0a331 -ap-northeast-3: ami-0a8db47e6bc6f585b -ap-south-1: ami-08870f6ce8227e90f -ap-southeast-1: ami-04950204e84f8bf64 -ap-southeast-2: ami-02b4175018a85c03d -ca-central-1: ami-04a4a63323b55af6e -eu-central-1: ami-089e9903d9c3bee1b -eu-west-1: ami-033b9190bab9d42aa -eu-west-2: ami-049d2247a1c3d582c -eu-west-3: ami-09c4322a65746f77b -sa-east-1: ami-0740b75b4caaa3b77 -us-east-1: ami-05790a6a634720a0d -us-east-2: ami-0a0fcd1219e40a8f5 -us-gov-west-1: ami-3dc8555c -us-west-1: ami-056472d51cf2fed45 -us-west-2: ami-0702d59e57ada76b8 +ap-northeast-1: ami-0b0bf645cde38fe52 +ap-northeast-2: ami-0b2c6c9e988689d7c +ap-northeast-3: ami-0dae0e941e3bce10b +ap-south-1: ami-011d655005fe854e9 +ap-southeast-1: ami-03428422e2cb20c9f +ap-southeast-2: ami-0bbef9e2b1170757f +ca-central-1: ami-0916119fde83c29b7 +eu-central-1: ami-003273c4072efbe43 +eu-west-1: ami-009af0d819973e1d6 +eu-west-2: ami-029f01bbfec52a10e +eu-west-3: ami-0ae405adcb3d1477c +sa-east-1: ami-0b0057ecd5c465d09 +us-east-1: ami-0dfb1662fcc845c55 +us-east-2: ami-01bd1361428c384c3 +us-west-1: ami-0e144dd24634766a2 +us-west-2: ami-02087bf0ff99cc127 diff --git a/cloudformation/cfncluster.cfn.json b/cloudformation/cfncluster.cfn.json index 58506c9ade..f66614388f 100644 --- a/cloudformation/cfncluster.cfn.json +++ b/cloudformation/cfncluster.cfn.json @@ -1631,121 +1631,116 @@ "Mappings": { "AWSRegionOS2AMI": { "ap-northeast-1": { - "alinux": "ami-05468a90639fdc483", - "centos6": "ami-0603daa665c23cf74", - "centos7": "ami-03e99430742e7d834", - "ubuntu1404": "ami-068214c5cfb518070", - "ubuntu1604": "ami-03f6879ede5f51a6f" + "alinux": "ami-00047986f877e6760", + "centos6": "ami-00c9ad360d255b462", + "centos7": "ami-09dc3b175ed2905f8", + "ubuntu1404": "ami-09dc9e9da730c0248", + "ubuntu1604": "ami-0b0bf645cde38fe52" }, "ap-northeast-2": { - "alinux": "ami-0b4d5a4a999581db1", - "centos6": "ami-06ddec14c10c7de14", - "centos7": "ami-0a3ad3da5f45318a4", - "ubuntu1404": "ami-0d55e5524b8536518", - "ubuntu1604": "ami-0036ef722f2d0a331" + "alinux": "ami-0ca5afbc9d0b20f3d", + "centos6": "ami-08e84ffe9aead7873", + "centos7": "ami-03f494fed9f0f9c98", + "ubuntu1404": "ami-06346bcc8b825a458", + "ubuntu1604": "ami-0b2c6c9e988689d7c" }, "ap-northeast-3": { - "alinux": "ami-0594d5d373771f10a", - "centos6": "ami-0e718e0794e781ead", - "centos7": "ami-0804a0d2dcf1e675f", - "ubuntu1404": "ami-0ad3400658fbc51fa", - "ubuntu1604": "ami-0a8db47e6bc6f585b" + "alinux": "ami-0b3af1c2fe35b1ae4", + "centos6": "ami-056531169587ffc23", + "centos7": "ami-04094dd3b4013b75f", + "ubuntu1404": "ami-0ae6e02016436404a", + "ubuntu1604": "ami-0dae0e941e3bce10b" }, "ap-south-1": { - "alinux": "ami-0ec7be770210af7c0", - "centos6": "ami-043871a624a63c4b3", - "centos7": "ami-06b1a64ab30690802", - "ubuntu1404": "ami-0b54a1bd3d82929e6", - "ubuntu1604": "ami-08870f6ce8227e90f" + "alinux": "ami-0250a8315b099785d", + "centos6": "ami-03a57d339b58b2f23", + "centos7": "ami-0c20297a2c00b0528", + "ubuntu1404": "ami-0e4294a5b9d5de425", + "ubuntu1604": "ami-011d655005fe854e9" }, "ap-southeast-1": { - "alinux": "ami-02181eb4bbe285f28", - "centos6": "ami-07df16800a8f39379", - "centos7": "ami-06eec4db7a3965f04", - "ubuntu1404": "ami-0d3382bea35764ad2", - "ubuntu1604": "ami-04950204e84f8bf64" + "alinux": "ami-038312aa862774599", + "centos6": "ami-00063e5635939c9d1", + "centos7": "ami-0f610508513f2012f", + "ubuntu1404": "ami-01e6359021e3bac23", + "ubuntu1604": "ami-03428422e2cb20c9f" }, "ap-southeast-2": { - "alinux": "ami-03199596cd474bb05", - "centos6": "ami-09b23d993fbc703d9", - "centos7": "ami-0ab1def1c27a54ada", - "ubuntu1404": "ami-0a2a8aa487ce39e8c", - "ubuntu1604": "ami-02b4175018a85c03d" + "alinux": "ami-0dce078cd2c343a5c", + "centos6": "ami-015e730525001bf53", + "centos7": "ami-09d0b89439c0c1b65", + "ubuntu1404": "ami-0cfebd51a3470f1d4", + "ubuntu1604": "ami-0bbef9e2b1170757f" }, "ca-central-1": { - "alinux": "ami-099a5f4b65d56215e", - "centos6": "ami-0af06655a7341c2b5", - "centos7": "ami-089b3d6d4ef8c5772", - "ubuntu1404": "ami-0bc510154aa82211d", - "ubuntu1604": "ami-04a4a63323b55af6e" + "alinux": "ami-04e27bd54e171fbad", + "centos6": "ami-0fea713db092ac77f", + "centos7": "ami-0362a3ef26ab5dfce", + "ubuntu1404": "ami-0097472502a04776d", + "ubuntu1604": "ami-0916119fde83c29b7" }, "eu-central-1": { - "alinux": "ami-00e39ce170dbd6e25", - "centos6": "ami-0362d5eb8ec477a30", - "centos7": "ami-0842b79de396801b5", - "ubuntu1404": "ami-0174c3f5501fc441b", - "ubuntu1604": "ami-089e9903d9c3bee1b" + "alinux": "ami-075bf1c36cf6b855f", + "centos6": "ami-0f6b91d011f44aaf6", + "centos7": "ami-03d5519f5f4e7619f", + "ubuntu1404": "ami-0a82ac2872821deef", + "ubuntu1604": "ami-003273c4072efbe43" }, "eu-west-1": { - "alinux": "ami-059b3db381c18305a", - "centos6": "ami-04c68c9303ee0165c", - "centos7": "ami-019ecc5629a990951", - "ubuntu1404": "ami-01848c6ae1a9c6460", - "ubuntu1604": "ami-033b9190bab9d42aa" + "alinux": "ami-08d507a759b9ee0e4", + "centos6": "ami-029accac823a2a844", + "centos7": "ami-0575304add4986551", + "ubuntu1404": "ami-0d6c90f092266b787", + "ubuntu1604": "ami-009af0d819973e1d6" }, "eu-west-2": { - "alinux": "ami-0df738bfb4cf98f4e", - "centos6": "ami-0d3a080fd047d6f03", - "centos7": "ami-03e914c8e1b836bcf", - "ubuntu1404": "ami-0b867b5579d0f7a08", - "ubuntu1604": "ami-049d2247a1c3d582c" + "alinux": "ami-03ddfb1462e2c92fd", + "centos6": "ami-032b6b895dab544c6", + "centos7": "ami-08263362b21b98916", + "ubuntu1404": "ami-0f23dffc4db122b53", + "ubuntu1604": "ami-029f01bbfec52a10e" }, "eu-west-3": { - "alinux": "ami-0b1472bf5fed029bd", - "centos6": "ami-0ce8fcafcd4cb66ba", - "centos7": "ami-099b3883ce5e219b3", - "ubuntu1404": "ami-0e8f93d19635c5d49", - "ubuntu1604": "ami-09c4322a65746f77b" + "alinux": "ami-0e72015cf2b21159f", + "centos6": "ami-039be4bd46c878e2c", + "centos7": "ami-0befb803ea7ce1b83", + "ubuntu1404": "ami-02b6ca8fd14f58265", + "ubuntu1604": "ami-0ae405adcb3d1477c" }, "sa-east-1": { - "alinux": "ami-0a390e1998fb6b22f", - "centos6": "ami-010873231de6443db", - "centos7": "ami-0edbfab1cbd73ac82", - "ubuntu1404": "ami-048422e42509a1596", - "ubuntu1604": "ami-0740b75b4caaa3b77" + "alinux": "ami-02bd755bfe8704cef", + "centos6": "ami-0e507bdec09b8a091", + "centos7": "ami-0860c11596f954bbc", + "ubuntu1404": "ami-04e943b561cd32659", + "ubuntu1604": "ami-0b0057ecd5c465d09" }, "us-east-1": { - "alinux": "ami-0fd6854902340b407", - "centos6": "ami-0051f4828527649d0", - "centos7": "ami-00976a3c38aaac1a2", - "ubuntu1404": "ami-0ba2fcbf9fa378762", - "ubuntu1604": "ami-05790a6a634720a0d" + "alinux": "ami-07f61c3e18538c70c", + "centos6": "ami-091bbc83b27f92634", + "centos7": "ami-06c93be112f880acd", + "ubuntu1404": "ami-0920fa86d5a85a07a", + "ubuntu1604": "ami-0dfb1662fcc845c55" }, "us-east-2": { - "alinux": "ami-04fc90a3f60cb3dc7", - "centos6": "ami-0a585da16a01304bb", - "centos7": "ami-0f7dbb8d94e96b0ab", - "ubuntu1404": "ami-05aea9ea89d577194", - "ubuntu1604": "ami-0a0fcd1219e40a8f5" + "alinux": "ami-096b7925b1f9c8cbb", + "centos6": "ami-0fa89e68b0222f50b", + "centos7": "ami-0493261a1b2adfd27", + "ubuntu1404": "ami-0d63a602f26709e70", + "ubuntu1604": "ami-01bd1361428c384c3" }, "us-west-1": { - "alinux": "ami-0054acc8361046ccc", - "centos6": "ami-0fcbc3afb66cfcffd", - "centos7": "ami-0fae379328b9061bc", - "ubuntu1404": "ami-0089ace30f2612d13", - "ubuntu1604": "ami-056472d51cf2fed45" + "alinux": "ami-04794da830cb9dc86", + "centos6": "ami-06f3909be633646b9", + "centos7": "ami-0af95249ebb8de7a5", + "ubuntu1404": "ami-06fe4f837d8715dbb", + "ubuntu1604": "ami-0e144dd24634766a2" }, "us-west-2": { - "alinux": "ami-01382f63e2667a7d1", - "centos6": "ami-0430c74956ce8e41a", - "centos7": "ami-0df85f5bc2d980e57", - "ubuntu1404": "ami-06142a55749b7912b", - "ubuntu1604": "ami-0702d59e57ada76b8" - }, - "us-gov-west-1": { - "alinux": "ami-af2cbece", - "ubuntu1404": "ami-4aca572b", - "ubuntu1604": "ami-3dc8555c" + "alinux": "ami-03f86a2920360dfbe", + "centos6": "ami-087d4f29315fd19d2", + "centos7": "ami-0a259cf8220fabdcb", + "ubuntu1404": "ami-01170828a21d86a72", + "ubuntu1604": "ami-02087bf0ff99cc127" } }, "OSFeatures": { From 6c27f3056eb1b5bdb97a80381670e85f99cebe0a Mon Sep 17 00:00:00 2001 From: root Date: Wed, 29 Aug 2018 18:30:34 +0000 Subject: [PATCH 25/31] Update AMI list Signed-off-by: CfnCluster AMI bot --- amis.txt | 3 +++ cloudformation/cfncluster.cfn.json | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/amis.txt b/amis.txt index 6645ff1fd1..48ff4df4db 100644 --- a/amis.txt +++ b/amis.txt @@ -13,6 +13,7 @@ eu-west-3: ami-0e72015cf2b21159f sa-east-1: ami-02bd755bfe8704cef us-east-1: ami-07f61c3e18538c70c us-east-2: ami-096b7925b1f9c8cbb +us-gov-west-1: ami-cfd34dae us-west-1: ami-04794da830cb9dc86 us-west-2: ami-03f86a2920360dfbe # centos6 @@ -64,6 +65,7 @@ eu-west-3: ami-02b6ca8fd14f58265 sa-east-1: ami-04e943b561cd32659 us-east-1: ami-0920fa86d5a85a07a us-east-2: ami-0d63a602f26709e70 +us-gov-west-1: ami-c2d34da3 us-west-1: ami-06fe4f837d8715dbb us-west-2: ami-01170828a21d86a72 # ubuntu1604 @@ -81,5 +83,6 @@ eu-west-3: ami-0ae405adcb3d1477c sa-east-1: ami-0b0057ecd5c465d09 us-east-1: ami-0dfb1662fcc845c55 us-east-2: ami-01bd1361428c384c3 +us-gov-west-1: ami-0fd6486e us-west-1: ami-0e144dd24634766a2 us-west-2: ami-02087bf0ff99cc127 diff --git a/cloudformation/cfncluster.cfn.json b/cloudformation/cfncluster.cfn.json index f66614388f..040e7460a6 100644 --- a/cloudformation/cfncluster.cfn.json +++ b/cloudformation/cfncluster.cfn.json @@ -1741,6 +1741,11 @@ "centos7": "ami-0a259cf8220fabdcb", "ubuntu1404": "ami-01170828a21d86a72", "ubuntu1604": "ami-02087bf0ff99cc127" + }, + "us-gov-west-1": { + "alinux": "ami-cfd34dae", + "ubuntu1404": "ami-c2d34da3", + "ubuntu1604": "ami-0fd6486e" } }, "OSFeatures": { From 1e33c7f14300e8d6fdbe9c5a6dad916efabd0ffa Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Wed, 29 Aug 2018 13:53:36 -0700 Subject: [PATCH 26/31] Release 1.5.4 Signed-off-by: Sean Smith --- CHANGELOG.rst | 7 +++++++ cli/setup.py | 2 +- cloudformation/cfncluster.cfn.json | 4 ++-- docs/source/conf.py | 2 +- 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 22b96cad72..cc02b77a24 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,13 @@ CHANGELOG ========= +1.5.4 +===== +* Add option to disable ganglia `extra_json = { "cfncluster" : { "ganglia_enabled" : "no" } }`` +* Fix `cfncluster update` bug +* Set SGE Accounting summary to be true, this reports a single accounting record for a mpi job +* Upgrade cfncluster-node to Boto3 + 1.5.3 ===== * Add support for GovCloud, us-gov-west-1 region diff --git a/cli/setup.py b/cli/setup.py index 1c6475cc0e..2bfa4a297d 100644 --- a/cli/setup.py +++ b/cli/setup.py @@ -20,7 +20,7 @@ def read(fname): return open(os.path.join(os.path.dirname(__file__), fname)).read() console_scripts = ['cfncluster = cfncluster.cli:main'] -version = "1.5.3" +version = "1.5.4" requires = ['boto3>=1.7.33', 'awscli>=1.11.175', 'future>=0.16.0'] if sys.version_info[:2] == (2, 6): diff --git a/cloudformation/cfncluster.cfn.json b/cloudformation/cfncluster.cfn.json index 040e7460a6..25a74eda8c 100644 --- a/cloudformation/cfncluster.cfn.json +++ b/cloudformation/cfncluster.cfn.json @@ -1772,8 +1772,8 @@ }, "CfnClusterVersions": { "default": { - "cfncluster": "cfncluster-1.5.3", - "cookbook": "cfncluster-cookbook-1.5.2", + "cfncluster": "cfncluster-1.5.4", + "cookbook": "cfncluster-cookbook-1.5.4", "chef": "14.2.0", "ridley": "5.1.1", "berkshelf": "7.0.4", diff --git a/docs/source/conf.py b/docs/source/conf.py index 99d5f2b405..621d23a311 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -52,7 +52,7 @@ # The short X.Y version. version = '1.5' # The full version, including alpha/beta/rc tags. -release = '1.5.3' +release = '1.5.4' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. From 25b0ba2a55e5efdcd2801ae25affd0f64f90146d Mon Sep 17 00:00:00 2001 From: Enrico Usai Date: Wed, 13 Jun 2018 16:00:25 +0200 Subject: [PATCH 27/31] Add option validation in cfncluster configure cli I'm adding a flag to the prompt function of th easyconfig.py script to check if the inserted value is one of the acceptable options. It is important for region, key pair, vpc and subnet ids. Signed-off-by: Enrico Usai --- cli/cfncluster/easyconfig.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/cli/cfncluster/easyconfig.py b/cli/cfncluster/easyconfig.py index 61edbc6989..49f42716ed 100644 --- a/cli/cfncluster/easyconfig.py +++ b/cli/cfncluster/easyconfig.py @@ -27,7 +27,7 @@ logger = logging.getLogger('cfncluster.cfncluster') unsupported_regions = ['ap-northeast-3', 'cn-north-1', 'cn-northwest-1'] -def prompt(prompt, default_value=None, hidden=False, options=None): +def prompt(prompt, default_value=None, hidden=False, options=None, check_validity=False): if hidden and default_value is not None: user_prompt = prompt + ' [*******' + default_value[-4:] + ']: ' else: @@ -42,12 +42,17 @@ def prompt(prompt, default_value=None, hidden=False, options=None): for o in options: print(' %s' % o) - var = input(user_prompt) + var = input(user_prompt).strip() if var == '': return default_value else: - return var.strip() + if check_validity and options is not None and var not in options: + print('ERROR: The value (%s) is not valid ' % var) + print('Please select one of the Acceptable Values listed above.') + sys.exit(1) + else: + return var def get_regions(): ec2 = boto3.client('ec2') @@ -130,13 +135,13 @@ def configure(args): aws_secret_access_key = prompt('AWS Secret Access Key ID', config.get('aws', 'aws_secret_access_key') if config.has_option('aws', 'aws_secret_access_key') else None, True) # Use built in boto regions as an available option - aws_region_name = prompt('AWS Region ID', config.get('aws', 'aws_region_name') if config.has_option('aws', 'aws_region_name') else None, options=get_regions()) + aws_region_name = prompt('AWS Region ID', config.get('aws', 'aws_region_name') if config.has_option('aws', 'aws_region_name') else None, options=get_regions(), check_validity=True) vpcname = prompt('VPC Name', config.get('cluster ' + cluster_template, 'vpc_settings') if config.has_option('cluster ' + cluster_template, 'vpc_settings') else 'public') # Query EC2 for available keys as options - key_name = prompt('Key Name', config.get('cluster ' + cluster_template, 'key_name') if config.has_option('cluster ' + cluster_template, 'key_name') else None, options=list_keys(aws_access_key_id, aws_secret_access_key, aws_region_name)) - vpc_id = prompt('VPC ID', config.get('vpc ' + vpcname, 'vpc_id') if config.has_option('vpc ' + vpcname, 'vpc_id') else None, options=list_vpcs(aws_access_key_id, aws_secret_access_key, aws_region_name)) - master_subnet_id = prompt('Master Subnet ID', config.get('vpc ' + vpcname, 'master_subnet_id') if config.has_option('vpc ' + vpcname, 'master_subnet_id') else None, options=list_subnets(aws_access_key_id, aws_secret_access_key, aws_region_name, vpc_id)) + key_name = prompt('Key Name', config.get('cluster ' + cluster_template, 'key_name') if config.has_option('cluster ' + cluster_template, 'key_name') else None, options=list_keys(aws_access_key_id, aws_secret_access_key, aws_region_name), check_validity=True) + vpc_id = prompt('VPC ID', config.get('vpc ' + vpcname, 'vpc_id') if config.has_option('vpc ' + vpcname, 'vpc_id') else None, options=list_vpcs(aws_access_key_id, aws_secret_access_key, aws_region_name), check_validity=True) + master_subnet_id = prompt('Master Subnet ID', config.get('vpc ' + vpcname, 'master_subnet_id') if config.has_option('vpc ' + vpcname, 'master_subnet_id') else None, options=list_subnets(aws_access_key_id, aws_secret_access_key, aws_region_name, vpc_id), check_validity=True) # Dictionary of values we want to set s_global = { '__name__': 'global', 'cluster_template': cluster_template, 'update_check': 'true', 'sanity_check': 'true' } From 49f3ad40e7476cbcb87c4a6d9c679e9f5873fd5d Mon Sep 17 00:00:00 2001 From: Luca Carrogu Date: Thu, 30 Aug 2018 11:55:22 +0200 Subject: [PATCH 28/31] Update README for cli Signed-off-by: Luca Carrogu --- cli/README | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/cli/README b/cli/README index 532284b6ab..39045ca349 100644 --- a/cli/README +++ b/cli/README @@ -1,26 +1,27 @@ cfncluster is an example framework for launching clusters. - usage: cfncluster [-h] [--config CONFIG_FILE] [--region REGION] [--nowait] - {create,update,delete,status,list,instances,version} + usage: cfncluster [-h] + {create,update,delete,start,stop,status,list,instances,ssh,configure,version} ... - cfncluster is the a tool to launch and manage cluster. + cfncluster is a tool to launch and manage a cluster. positional arguments: - {create,update,stop,delete,status,list,instances,sshmaster} + {create,update,delete,start,stop,status,list,instances,ssh,configure,version} create creates a cluster update update a running cluster delete delete a cluster + start start the compute fleet that has been stopped + stop stop the compute fleet, but leave the master server + running for debugging/development status pull the current status of the cluster list display a list of stacks associated with cfncluster instances display a list of all instances in a cluster - version display the version of cfncluster cli + ssh connect to the master server using SSH + configure creating initial cfncluster configuration + version display version of cfncluster optional arguments: -h, --help show this help message and exit - --config CONFIG_FILE, -c CONFIG_FILE - specify a alternative config file - --region REGION, -r REGION - specify a specific region to connect to - --nowait, -nw do not wait for stack events, after executing stack - command + + For command specific flags run cfncluster [command] --help From 7bc355ff00196650c2f404bdeedd146d67405d8c Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Wed, 11 Apr 2018 20:31:13 -0600 Subject: [PATCH 29/31] doc: Update credentials section Add note saying not to store creds in the CfnCluster config. Refs #345 Signed-off-by: Brian Barrett --- docs/source/configuration.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/configuration.rst b/docs/source/configuration.rst index 1e4d954cc9..86e5d1b6c6 100644 --- a/docs/source/configuration.rst +++ b/docs/source/configuration.rst @@ -50,9 +50,9 @@ Attempts to validate that resources defined in parameters actually exist. :: aws ^^^ -This is the AWS credentials section (required). These settings apply to all clusters. +This is the AWS credentials/region section (required). These settings apply to all clusters. -If not defined, boto will attempt to use a) environment or b) EC2 IAM role. :: +We highly recommend use of the environment, EC2 IAM Roles, or storing credentials using the `AWS CLI `_ to store credentials, rather than storing them in the CfnCluster config file. :: [aws] aws_access_key_id = #your_aws_access_key_id From 4cdb211b3e36adf1965583c3a3b9b703668d39df Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Thu, 30 Aug 2018 10:16:38 -0700 Subject: [PATCH 30/31] Remove StepFunctions README.md Signed-off-by: Sean Smith --- cli/stepfunctions/README.md | 170 ------------------------------------ 1 file changed, 170 deletions(-) delete mode 100644 cli/stepfunctions/README.md diff --git a/cli/stepfunctions/README.md b/cli/stepfunctions/README.md deleted file mode 100644 index 8de2a5c9d0..0000000000 --- a/cli/stepfunctions/README.md +++ /dev/null @@ -1,170 +0,0 @@ -# CfnCluster Step Functions - -CfnCluster Step Function is a state management solution for deploying high-performance computing (HPC) CfnClusters in an environment with a configurable state machine. This allows our customers to not only run jobs based on particular state of previous job executions, but it also provides real-time visualizations through AWS Step Functions. Additionally, the Step Function state machine handles the setup and teardown process during execution so that customers can focus on their workloads instead of the compute infastructure. - -## Usage - -* Dependencies: - * `docker` installed - * `aws-cli` installed -* Ensure that your AWS credentials are properly configured - * Visit the [AWS Documentation](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-getting-started.html) for more information - -``` -$ pip install -r requirements.txt -$ ./deploy.py --bucket --region --config --jobs -``` - -To Run Step Function: - -1. Wait for CloudFormation stack to deploy -2. Click the link generated by the deploy.py script which links to the [AWS Step Functions Console](https://console.aws.amazon.com/states) -3. Input format: -``` -{ - "cluster_name": "" -} -``` -4. Click `Start Execution` - -## How to Specify Jobs - -Jobs are specified in a configuration file whose path is passed to the `--jobs` or `-j` parameter. An example of a given configuration file can be seen below: - -``` -[order] -sequential = job1, job2 - -[job job1] -name = thejobtouse.sh -s3_uri = s3://job-bucket/thejobtouse.sh - -[job job2] -handler = a_real_job.sh -local_path = /path/where/job/lives -wait_time = 30 -``` - -### Order Section [order] - -Required Parameters: - -`sequential`: List of job names to schedule sequentially given in the form of a comma separated list; order matters - -``` -[order] -sequential = goodjob, badjob, otherjob -``` - -OR - -`parallel`: List of job names to schedule in parallel given in the form of a comma separated list; order does not matter - -``` -[order] -parallel = goodjob, badjob, otherjob -``` - -**Important**: either `sequential` or `parallel` must be specified; not both - -### Job Section [job ] - -Required Parameters: - -`s3_uri`: An S3 URI pointing to the script or folder to package for job scheduling or execution - -``` -[job apple] -s3_uri = s3://thebucket/thefolder -handler = thescript -``` - -OR - -`local_path`: A local path (relative to the jobs config file or absolute) pointing to the script or folder to package for job scheduling and execution - -``` -[job banana] -local_path = /path/to/the/script -handler = script -``` - -AND - -`handler`: The path and name of the script to run. Since the `s3_uri` and `local_path` can both be directories, this is to specify which file to send off to the scheduler - -``` -[job carrot] -local_path = relative/path/project -handler = script/path/in/project.sh -``` - -**Important**: either `s3_uri` or `local_path` must be specified; not both - -Optional Parameters: - -`wait_time`: How long to wait between rechecking the status of the job to see if it's completed; default = 10; range 1-240 due to scheduler limitations - -``` -[job donut] -s3_uri = s3://bucket/script -handler = script -wait_time = 240 -``` - -## Arguments - -### `--config` or `-c` - -Specifies the CfnCluster configuration file to use. This will be utilized by the step function to deploy user defined clusters. For more information on how to configure CfnCluster visit the [CfnCluster Documentation](http://cfncluster.readthedocs.io/en/latest/getting_started.html#configuring-cfncluster). - -### `--bucket` or `-b` - -Specifies the name of the S3 bucket to be used to store the source code that creates and terminates the CfnClusters. **Important**: if the bucket already exists, it must be in the same region as that given by the --region argument. If it does not exist, it will be made for you in the specified region. - -### `--jobs` or `-j` - -Specifies the job configuration file to use. This will be used to package your jobs for use in the Step Function. - -## Optional Arguments - -### `--region` or `-r` - -Specifies the AWS region to deploy the CloudFormation stack that contains the Step Function and corresponding source code to deploy and terminate CfnClusters. Defaults to us-east-1. - -### `--stack-name` or `-s` - -Specifies the name that should be given to the CloudFormation stack that the script deploys. - -### `--key-name` or `-k` - -Specifies the name of the EC2 key pair to use for the CfnCluster master node. **Important**: the `key_name` parameter is optional but if you choose to specify it, the [EC2 key pair](https://console.aws.amazon.com/ec2#KeyPairs) with this name must exist and a secret in [AWS Secrets Manager](https://console.aws.amazon.com/secretsmanager) must exist with the same name and a secret value set to the private key. If `key_name` is omitted, it is defaulted to `cfncluster-stepfunctions`. - -## Flags - -### `--help` or `-h` - -Prints the help menu and usage to standard output. - -``` -usage: deploy.py [-h] --bucket BUCKET_NAME --config CONFIG_FILE --jobs - JOBS_CONFIG [--stack-name STACK_NAME] [--region REGION] - [--key-name KEY_NAME] - -Deploys CfnCluster Step Function - -optional arguments: - -h, --help show this help message and exit - --bucket BUCKET_NAME, -b BUCKET_NAME - Specify s3 bucket to use/create - --config CONFIG_FILE, -c CONFIG_FILE - Specify config file to use - --jobs JOBS_CONFIG, -j JOBS_CONFIG - Specify jobs config file to use - --stack-name STACK_NAME, -s STACK_NAME - Specify the stack name to use - --region REGION, -r REGION - Specify the region to deploy in - --key-name KEY_NAME, -k KEY_NAME - Specify the ec2 key pair -``` From 8e9ac161ee7217ba03632df565198223d5ee9b41 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Thu, 30 Aug 2018 10:48:40 -0700 Subject: [PATCH 31/31] Update CHANGELOG.rst --- CHANGELOG.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index cc02b77a24..e36318f05d 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,7 +4,7 @@ CHANGELOG 1.5.4 ===== -* Add option to disable ganglia `extra_json = { "cfncluster" : { "ganglia_enabled" : "no" } }`` +* Add option to disable ganglia `extra_json = { "cfncluster" : { "ganglia_enabled" : "no" } }` * Fix `cfncluster update` bug * Set SGE Accounting summary to be true, this reports a single accounting record for a mpi job * Upgrade cfncluster-node to Boto3