Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(stepfunctions-tasks): FastFile mode for SageMaker Training Job #26675

Merged
merged 12 commits into from
Aug 23, 2023
Merged
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"version":"30.0.0"}
{"version":"33.0.0"}
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
{
"version": "30.0.0",
"version": "33.0.0",
"files": {
"81a9a9ef07dfc045ad84514bf0771e76c20386202c7c26130f4f954b8411de26": {
"11bc70dd89e4afb387b1b639ea823eb492c0e7a7dd664e50aa483f10ef3e204a": {
"source": {
"path": "integ-stepfunctions-sagemaker.template.json",
"packaging": "file"
},
"destinations": {
"current_account-current_region": {
"bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}",
"objectKey": "81a9a9ef07dfc045ad84514bf0771e76c20386202c7c26130f4f954b8411de26.json",
"objectKey": "11bc70dd89e4afb387b1b639ea823eb492c0e7a7dd664e50aa483f10ef3e204a.json",
"assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}"
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -276,12 +276,6 @@
"StateMachine2E01A3A5": {
"Type": "AWS::StepFunctions::StateMachine",
"Properties": {
"RoleArn": {
"Fn::GetAtt": [
"StateMachineRoleB840431D",
"Arn"
]
},
"DefinitionString": {
"Fn::Join": [
"",
Expand All @@ -297,7 +291,7 @@
"Arn"
]
},
"\",\"AlgorithmSpecification\":{\"TrainingInputMode\":\"File\",\"AlgorithmName\":\"arn:aws:sagemaker:us-east-1:865070037744:algorithm/scikit-decision-trees-15423055-57b73412d2e93e9239e4e16f83298b8f\"},\"InputDataConfig\":[{\"ChannelName\":\"InputData\",\"DataSource\":{\"S3DataSource\":{\"S3Uri\":\"https://s3.",
"\",\"AlgorithmSpecification\":{\"TrainingInputMode\":\"FastFile\",\"AlgorithmName\":\"BlazingText\"},\"InputDataConfig\":[{\"ChannelName\":\"InputData\",\"DataSource\":{\"S3DataSource\":{\"S3Uri\":\"https://s3.",
{
"Ref": "AWS::Region"
},
Expand All @@ -324,6 +318,12 @@
"/result/\"},\"ResourceConfig\":{\"InstanceCount\":1,\"InstanceType\":\"ml.m4.xlarge\",\"VolumeSizeInGB\":10},\"StoppingCondition\":{\"MaxRuntimeInSeconds\":3600}}}}}"
]
]
},
"RoleArn": {
"Fn::GetAtt": [
"StateMachineRoleB840431D",
"Arn"
]
}
},
"DependsOn": [
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"version": "30.0.0",
"version": "33.0.0",
"testCases": {
"integ.create-training-job": {
"stacks": [
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"version": "30.0.0",
"version": "33.0.0",
"artifacts": {
"integ-stepfunctions-sagemaker.assets": {
"type": "cdk:asset-manifest",
Expand All @@ -17,7 +17,7 @@
"validateOnSynth": false,
"assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}",
"cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}",
"stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/81a9a9ef07dfc045ad84514bf0771e76c20386202c7c26130f4f954b8411de26.json",
"stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/11bc70dd89e4afb387b1b639ea823eb492c0e7a7dd664e50aa483f10ef3e204a.json",
"requiresBootstrapStackVersion": 6,
"bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version",
"additionalDependencies": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,13 @@
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-kms.CfnKey",
"fqn": "aws-cdk-lib.aws_kms.CfnKey",
"version": "0.0.0"
}
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-kms.Key",
"fqn": "aws-cdk-lib.aws_kms.Key",
"version": "0.0.0"
}
},
Expand Down Expand Up @@ -87,13 +87,13 @@
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-s3.CfnBucket",
"fqn": "aws-cdk-lib.aws_s3.CfnBucket",
"version": "0.0.0"
}
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-s3.Bucket",
"fqn": "aws-cdk-lib.aws_s3.Bucket",
"version": "0.0.0"
}
},
Expand All @@ -109,7 +109,7 @@
"id": "ImportSagemakerRole",
"path": "integ-stepfunctions-sagemaker/TrainTask/SagemakerRole/ImportSagemakerRole",
"constructInfo": {
"fqn": "@aws-cdk/core.Resource",
"fqn": "aws-cdk-lib.Resource",
"version": "0.0.0"
}
},
Expand Down Expand Up @@ -156,7 +156,7 @@
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-iam.CfnRole",
"fqn": "aws-cdk-lib.aws_iam.CfnRole",
"version": "0.0.0"
}
},
Expand Down Expand Up @@ -264,25 +264,25 @@
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-iam.CfnPolicy",
"fqn": "aws-cdk-lib.aws_iam.CfnPolicy",
"version": "0.0.0"
}
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-iam.Policy",
"fqn": "aws-cdk-lib.aws_iam.Policy",
"version": "0.0.0"
}
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-iam.Role",
"fqn": "aws-cdk-lib.aws_iam.Role",
"version": "0.0.0"
}
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-stepfunctions-tasks.SageMakerCreateTrainingJob",
"fqn": "aws-cdk-lib.aws_stepfunctions_tasks.SageMakerCreateTrainingJob",
"version": "0.0.0"
}
},
Expand All @@ -298,7 +298,7 @@
"id": "ImportRole",
"path": "integ-stepfunctions-sagemaker/StateMachine/Role/ImportRole",
"constructInfo": {
"fqn": "@aws-cdk/core.Resource",
"fqn": "aws-cdk-lib.Resource",
"version": "0.0.0"
}
},
Expand All @@ -323,7 +323,7 @@
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-iam.CfnRole",
"fqn": "aws-cdk-lib.aws_iam.CfnRole",
"version": "0.0.0"
}
},
Expand Down Expand Up @@ -399,19 +399,19 @@
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-iam.CfnPolicy",
"fqn": "aws-cdk-lib.aws_iam.CfnPolicy",
"version": "0.0.0"
}
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-iam.Policy",
"fqn": "aws-cdk-lib.aws_iam.Policy",
"version": "0.0.0"
}
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-iam.Role",
"fqn": "aws-cdk-lib.aws_iam.Role",
"version": "0.0.0"
}
},
Expand All @@ -421,12 +421,6 @@
"attributes": {
"aws:cdk:cloudformation:type": "AWS::StepFunctions::StateMachine",
"aws:cdk:cloudformation:props": {
"roleArn": {
"Fn::GetAtt": [
"StateMachineRoleB840431D",
"Arn"
]
},
"definitionString": {
"Fn::Join": [
"",
Expand All @@ -442,7 +436,7 @@
"Arn"
]
},
"\",\"AlgorithmSpecification\":{\"TrainingInputMode\":\"File\",\"AlgorithmName\":\"arn:aws:sagemaker:us-east-1:865070037744:algorithm/scikit-decision-trees-15423055-57b73412d2e93e9239e4e16f83298b8f\"},\"InputDataConfig\":[{\"ChannelName\":\"InputData\",\"DataSource\":{\"S3DataSource\":{\"S3Uri\":\"https://s3.",
"\",\"AlgorithmSpecification\":{\"TrainingInputMode\":\"FastFile\",\"AlgorithmName\":\"BlazingText\"},\"InputDataConfig\":[{\"ChannelName\":\"InputData\",\"DataSource\":{\"S3DataSource\":{\"S3Uri\":\"https://s3.",
{
"Ref": "AWS::Region"
},
Expand All @@ -469,47 +463,53 @@
"/result/\"},\"ResourceConfig\":{\"InstanceCount\":1,\"InstanceType\":\"ml.m4.xlarge\",\"VolumeSizeInGB\":10},\"StoppingCondition\":{\"MaxRuntimeInSeconds\":3600}}}}}"
]
]
},
"roleArn": {
"Fn::GetAtt": [
"StateMachineRoleB840431D",
"Arn"
]
}
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-stepfunctions.CfnStateMachine",
"fqn": "aws-cdk-lib.aws_stepfunctions.CfnStateMachine",
"version": "0.0.0"
}
}
},
"constructInfo": {
"fqn": "@aws-cdk/aws-stepfunctions.StateMachine",
"fqn": "aws-cdk-lib.aws_stepfunctions.StateMachine",
"version": "0.0.0"
}
},
"stateMachineArn": {
"id": "stateMachineArn",
"path": "integ-stepfunctions-sagemaker/stateMachineArn",
"constructInfo": {
"fqn": "@aws-cdk/core.CfnOutput",
"fqn": "aws-cdk-lib.CfnOutput",
"version": "0.0.0"
}
},
"BootstrapVersion": {
"id": "BootstrapVersion",
"path": "integ-stepfunctions-sagemaker/BootstrapVersion",
"constructInfo": {
"fqn": "@aws-cdk/core.CfnParameter",
"fqn": "aws-cdk-lib.CfnParameter",
"version": "0.0.0"
}
},
"CheckBootstrapVersion": {
"id": "CheckBootstrapVersion",
"path": "integ-stepfunctions-sagemaker/CheckBootstrapVersion",
"constructInfo": {
"fqn": "@aws-cdk/core.CfnRule",
"fqn": "aws-cdk-lib.CfnRule",
"version": "0.0.0"
}
}
},
"constructInfo": {
"fqn": "@aws-cdk/core.Stack",
"fqn": "aws-cdk-lib.Stack",
"version": "0.0.0"
}
},
Expand All @@ -518,12 +518,12 @@
"path": "Tree",
"constructInfo": {
"fqn": "constructs.Construct",
"version": "10.1.237"
"version": "10.2.69"
}
}
},
"constructInfo": {
"fqn": "@aws-cdk/core.App",
"fqn": "aws-cdk-lib.App",
"version": "0.0.0"
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { Key } from 'aws-cdk-lib/aws-kms';
import { Bucket, BucketEncryption } from 'aws-cdk-lib/aws-s3';
import { StateMachine } from 'aws-cdk-lib/aws-stepfunctions';
import { App, CfnOutput, RemovalPolicy, Stack } from 'aws-cdk-lib';
import { S3Location, SageMakerCreateTrainingJob } from 'aws-cdk-lib/aws-stepfunctions-tasks';
import { S3Location, SageMakerCreateTrainingJob, InputMode } from 'aws-cdk-lib/aws-stepfunctions-tasks';

/*
* Creates a state machine with a task state to create a training job in AWS SageMaker
Expand Down Expand Up @@ -35,7 +35,8 @@ const trainingData = new Bucket(stack, 'TrainingData', {
const sm = new StateMachine(stack, 'StateMachine', {
definition: new SageMakerCreateTrainingJob(stack, 'TrainTask', {
algorithmSpecification: {
algorithmName: 'arn:aws:sagemaker:us-east-1:865070037744:algorithm/scikit-decision-trees-15423055-57b73412d2e93e9239e4e16f83298b8f',
algorithmName: 'BlazingText',
trainingInputMode: InputMode.FAST_FILE,
},
inputDataConfig: [{
channelName: 'InputData',
Expand Down
6 changes: 6 additions & 0 deletions packages/aws-cdk-lib/aws-stepfunctions-tasks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1060,6 +1060,12 @@ new tasks.SageMakerCreateTrainingJob(this, 'TrainSagemaker', {
});
```

You can specify [TrainingInputMode](https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_AlgorithmSpecification.html#API_AlgorithmSpecification_Contents) via the trainingInputMode property.

- To download the data from Amazon Simple Storage Service (Amazon S3) to the provisioned ML storage volume, and mount the directory to a Docker volume, choose `InputMode.FILE` if an algorithm supports it.
- To stream data directly from Amazon S3 to the container, choose `InputMode.PIPE` if an algorithm supports it.
- To stream data directly from Amazon S3 to the container with no code changes and to provide file system access to the data, choose `InputMode.FAST_FILE` if an algorithm supports it.

### Create Transform Job

You can call the [`CreateTransformJob`](https://docs.aws.amazon.com/sagemaker/latest/dg/API_CreateTransformJob.html) API from a `Task` state.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,12 @@ export enum InputMode {
/**
* File mode.
*/
FILE = 'File'
FILE = 'File',

/**
* FastFile mode.
*/
FAST_FILE = 'FastFile'
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ test('create complex training job', () => {
role,
algorithmSpecification: {
algorithmName: 'BlazingText',
trainingInputMode: tasks.InputMode.FILE,
trainingInputMode: tasks.InputMode.FAST_FILE,
metricDefinitions: [
{
name: 'mymetric', regex: 'regex_pattern',
Expand Down Expand Up @@ -218,7 +218,7 @@ test('create complex training job', () => {
TrainingJobName: 'MyTrainJob',
RoleArn: { 'Fn::GetAtt': ['Role1ABCC5F0', 'Arn'] },
AlgorithmSpecification: {
TrainingInputMode: 'File',
TrainingInputMode: 'FastFile',
AlgorithmName: 'BlazingText',
MetricDefinitions: [
{ Name: 'mymetric', Regex: 'regex_pattern' },
Expand Down