Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(stepfunctions-tasks): FastFile mode for SageMaker Training Job #26675

Merged
merged 12 commits into from
Aug 23, 2023
Merged
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"version":"30.0.0"}
{"version":"33.0.0"}
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
{
"version": "30.0.0",
"version": "33.0.0",
"files": {
"81a9a9ef07dfc045ad84514bf0771e76c20386202c7c26130f4f954b8411de26": {
"11bc70dd89e4afb387b1b639ea823eb492c0e7a7dd664e50aa483f10ef3e204a": {
"source": {
"path": "integ-stepfunctions-sagemaker.template.json",
"packaging": "file"
},
"destinations": {
"current_account-current_region": {
"bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}",
"objectKey": "81a9a9ef07dfc045ad84514bf0771e76c20386202c7c26130f4f954b8411de26.json",
"objectKey": "11bc70dd89e4afb387b1b639ea823eb492c0e7a7dd664e50aa483f10ef3e204a.json",
"assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}"
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -276,12 +276,6 @@
"StateMachine2E01A3A5": {
"Type": "AWS::StepFunctions::StateMachine",
"Properties": {
"RoleArn": {
"Fn::GetAtt": [
"StateMachineRoleB840431D",
"Arn"
]
},
"DefinitionString": {
"Fn::Join": [
"",
Expand All @@ -297,7 +291,7 @@
"Arn"
]
},
"\",\"AlgorithmSpecification\":{\"TrainingInputMode\":\"File\",\"AlgorithmName\":\"arn:aws:sagemaker:us-east-1:865070037744:algorithm/scikit-decision-trees-15423055-57b73412d2e93e9239e4e16f83298b8f\"},\"InputDataConfig\":[{\"ChannelName\":\"InputData\",\"DataSource\":{\"S3DataSource\":{\"S3Uri\":\"https://s3.",
"\",\"AlgorithmSpecification\":{\"TrainingInputMode\":\"FastFile\",\"AlgorithmName\":\"BlazingText\"},\"InputDataConfig\":[{\"ChannelName\":\"InputData\",\"DataSource\":{\"S3DataSource\":{\"S3Uri\":\"https://s3.",
{
"Ref": "AWS::Region"
},
Expand All @@ -324,6 +318,12 @@
"/result/\"},\"ResourceConfig\":{\"InstanceCount\":1,\"InstanceType\":\"ml.m4.xlarge\",\"VolumeSizeInGB\":10},\"StoppingCondition\":{\"MaxRuntimeInSeconds\":3600}}}}}"
]
]
},
"RoleArn": {
"Fn::GetAtt": [
"StateMachineRoleB840431D",
"Arn"
]
}
},
"DependsOn": [
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"version": "30.0.0",
"version": "33.0.0",
"testCases": {
"integ.create-training-job": {
"stacks": [
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"version": "30.0.0",
"version": "33.0.0",
"artifacts": {
"integ-stepfunctions-sagemaker.assets": {
"type": "cdk:asset-manifest",
Expand All @@ -17,7 +17,7 @@
"validateOnSynth": false,
"assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}",
"cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}",
"stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/81a9a9ef07dfc045ad84514bf0771e76c20386202c7c26130f4f954b8411de26.json",
"stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/11bc70dd89e4afb387b1b639ea823eb492c0e7a7dd664e50aa483f10ef3e204a.json",
"requiresBootstrapStackVersion": 6,
"bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version",
"additionalDependencies": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,13 @@
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-kms.CfnKey",
"fqn": "aws-cdk-lib.aws_kms.CfnKey",
"version": "0.0.0"
}
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-kms.Key",
"fqn": "aws-cdk-lib.aws_kms.Key",
"version": "0.0.0"
}
},
Expand Down Expand Up @@ -87,13 +87,13 @@
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-s3.CfnBucket",
"fqn": "aws-cdk-lib.aws_s3.CfnBucket",
"version": "0.0.0"
}
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-s3.Bucket",
"fqn": "aws-cdk-lib.aws_s3.Bucket",
"version": "0.0.0"
}
},
Expand All @@ -109,7 +109,7 @@
"id": "ImportSagemakerRole",
"path": "integ-stepfunctions-sagemaker/TrainTask/SagemakerRole/ImportSagemakerRole",
"constructInfo": {
"fqn": "@aws-cdk/core.Resource",
"fqn": "aws-cdk-lib.Resource",
"version": "0.0.0"
}
},
Expand Down Expand Up @@ -156,7 +156,7 @@
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-iam.CfnRole",
"fqn": "aws-cdk-lib.aws_iam.CfnRole",
"version": "0.0.0"
}
},
Expand Down Expand Up @@ -264,25 +264,25 @@
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-iam.CfnPolicy",
"fqn": "aws-cdk-lib.aws_iam.CfnPolicy",
"version": "0.0.0"
}
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-iam.Policy",
"fqn": "aws-cdk-lib.aws_iam.Policy",
"version": "0.0.0"
}
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-iam.Role",
"fqn": "aws-cdk-lib.aws_iam.Role",
"version": "0.0.0"
}
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-stepfunctions-tasks.SageMakerCreateTrainingJob",
"fqn": "aws-cdk-lib.aws_stepfunctions_tasks.SageMakerCreateTrainingJob",
"version": "0.0.0"
}
},
Expand All @@ -298,7 +298,7 @@
"id": "ImportRole",
"path": "integ-stepfunctions-sagemaker/StateMachine/Role/ImportRole",
"constructInfo": {
"fqn": "@aws-cdk/core.Resource",
"fqn": "aws-cdk-lib.Resource",
"version": "0.0.0"
}
},
Expand All @@ -323,7 +323,7 @@
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-iam.CfnRole",
"fqn": "aws-cdk-lib.aws_iam.CfnRole",
"version": "0.0.0"
}
},
Expand Down Expand Up @@ -399,19 +399,19 @@
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-iam.CfnPolicy",
"fqn": "aws-cdk-lib.aws_iam.CfnPolicy",
"version": "0.0.0"
}
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-iam.Policy",
"fqn": "aws-cdk-lib.aws_iam.Policy",
"version": "0.0.0"
}
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-iam.Role",
"fqn": "aws-cdk-lib.aws_iam.Role",
"version": "0.0.0"
}
},
Expand All @@ -421,12 +421,6 @@
"attributes": {
"aws:cdk:cloudformation:type": "AWS::StepFunctions::StateMachine",
"aws:cdk:cloudformation:props": {
"roleArn": {
"Fn::GetAtt": [
"StateMachineRoleB840431D",
"Arn"
]
},
"definitionString": {
"Fn::Join": [
"",
Expand All @@ -442,7 +436,7 @@
"Arn"
]
},
"\",\"AlgorithmSpecification\":{\"TrainingInputMode\":\"File\",\"AlgorithmName\":\"arn:aws:sagemaker:us-east-1:865070037744:algorithm/scikit-decision-trees-15423055-57b73412d2e93e9239e4e16f83298b8f\"},\"InputDataConfig\":[{\"ChannelName\":\"InputData\",\"DataSource\":{\"S3DataSource\":{\"S3Uri\":\"https://s3.",
"\",\"AlgorithmSpecification\":{\"TrainingInputMode\":\"FastFile\",\"AlgorithmName\":\"BlazingText\"},\"InputDataConfig\":[{\"ChannelName\":\"InputData\",\"DataSource\":{\"S3DataSource\":{\"S3Uri\":\"https://s3.",
{
"Ref": "AWS::Region"
},
Expand All @@ -469,47 +463,53 @@
"/result/\"},\"ResourceConfig\":{\"InstanceCount\":1,\"InstanceType\":\"ml.m4.xlarge\",\"VolumeSizeInGB\":10},\"StoppingCondition\":{\"MaxRuntimeInSeconds\":3600}}}}}"
]
]
},
"roleArn": {
"Fn::GetAtt": [
"StateMachineRoleB840431D",
"Arn"
]
}
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-stepfunctions.CfnStateMachine",
"fqn": "aws-cdk-lib.aws_stepfunctions.CfnStateMachine",
"version": "0.0.0"
}
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-stepfunctions.StateMachine",
"fqn": "aws-cdk-lib.aws_stepfunctions.StateMachine",
"version": "0.0.0"
}
},
"stateMachineArn": {
"id": "stateMachineArn",
"path": "integ-stepfunctions-sagemaker/stateMachineArn",
"constructInfo": {
"fqn": "@aws-cdk/core.CfnOutput",
"fqn": "aws-cdk-lib.CfnOutput",
"version": "0.0.0"
}
},
"BootstrapVersion": {
"id": "BootstrapVersion",
"path": "integ-stepfunctions-sagemaker/BootstrapVersion",
"constructInfo": {
"fqn": "@aws-cdk/core.CfnParameter",
"fqn": "aws-cdk-lib.CfnParameter",
"version": "0.0.0"
}
},
"CheckBootstrapVersion": {
"id": "CheckBootstrapVersion",
"path": "integ-stepfunctions-sagemaker/CheckBootstrapVersion",
"constructInfo": {
"fqn": "@aws-cdk/core.CfnRule",
"fqn": "aws-cdk-lib.CfnRule",
"version": "0.0.0"
}
}
},
"constructInfo": {
"fqn": "@aws-cdk/core.Stack",
"fqn": "aws-cdk-lib.Stack",
"version": "0.0.0"
}
},
Expand All @@ -518,12 +518,12 @@
"path": "Tree",
"constructInfo": {
"fqn": "constructs.Construct",
"version": "10.1.237"
"version": "10.2.69"
}
}
},
"constructInfo": {
"fqn": "@aws-cdk/core.App",
"fqn": "aws-cdk-lib.App",
"version": "0.0.0"
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { Key } from 'aws-cdk-lib/aws-kms';
import { Bucket, BucketEncryption } from 'aws-cdk-lib/aws-s3';
import { StateMachine } from 'aws-cdk-lib/aws-stepfunctions';
import { App, CfnOutput, RemovalPolicy, Stack } from 'aws-cdk-lib';
import { S3Location, SageMakerCreateTrainingJob } from 'aws-cdk-lib/aws-stepfunctions-tasks';
import { S3Location, SageMakerCreateTrainingJob, InputMode } from 'aws-cdk-lib/aws-stepfunctions-tasks';

/*
* Creates a state machine with a task state to create a training job in AWS SageMaker
Expand Down Expand Up @@ -35,7 +35,8 @@ const trainingData = new Bucket(stack, 'TrainingData', {
const sm = new StateMachine(stack, 'StateMachine', {
definition: new SageMakerCreateTrainingJob(stack, 'TrainTask', {
algorithmSpecification: {
algorithmName: 'arn:aws:sagemaker:us-east-1:865070037744:algorithm/scikit-decision-trees-15423055-57b73412d2e93e9239e4e16f83298b8f',
algorithmName: 'BlazingText',
trainingInputMode: InputMode.FASTFILE,
},
inputDataConfig: [{
channelName: 'InputData',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,12 @@ export enum InputMode {
/**
* File mode.
*/
FILE = 'File'
FILE = 'File',

/**
* FastFile mode.
*/
FASTFILE = 'FastFile'
tmyoda marked this conversation as resolved.
Show resolved Hide resolved
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,14 @@ export class SageMakerCreateTrainingJob extends sfn.TaskStateBase implements iam
throw new Error('Must define either an algorithm name or training image URI in the algorithm specification');
}

// check that both algorithm name and image are not defined
if (props.algorithmSpecification.algorithmName && props.algorithmSpecification.trainingImage) {
throw new Error('Cannot define both an algorithm name and training image URI in the algorithm specification');
}

// validate algorithm name
this.validateAlgorithmName(props.algorithmSpecification.algorithmName);

// set the input mode to 'File' if not defined
this.algorithmSpecification = props.algorithmSpecification.trainingInputMode
? props.algorithmSpecification
Expand Down Expand Up @@ -324,6 +332,21 @@ export class SageMakerCreateTrainingJob extends sfn.TaskStateBase implements iam
: {};
}

private validateAlgorithmName(algorithmName?: string): void {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this doesn't have anything to do with FastFile mode, right? Can it be included in a separate PR?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alright, will create another PR for the algorithm name validation

if (algorithmName === undefined) {
return;
}

if (algorithmName.length < 1 || 170 < algorithmName.length) {
throw new Error(`Algorithm name length must be between 1 and 170, but got ${algorithmName.length}`);
}

const regex = /^(arn:aws[a-z\-]*:sagemaker:[a-z0-9\-]*:[0-9]{12}:[a-z\-]*\/)?([a-zA-Z0-9]([a-zA-Z0-9-]){0,62})(?<!-)$/;
if (!regex.test(algorithmName)) {
throw new Error(`Expected algorithm name to match pattern ${regex.source}, but got ${algorithmName}`);
}
}

private makePolicyStatements(): iam.PolicyStatement[] {
// set the sagemaker role or create new one
this._grantPrincipal = this._role =
Expand Down
Loading