Skip to content

Commit

Permalink
Fix for #15709 race when creating log groups
Browse files Browse the repository at this point in the history
Fixes #15709

Include OperationAbortedException into retryableErrors to handle race of "another create operation is in progress" when the target lambda creates the log group and this function also tries to create the same log group at the same time.
  • Loading branch information
ddl-denis-parnovskiy committed Sep 8, 2021
1 parent e70f491 commit adb5e13
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 14 deletions.
34 changes: 22 additions & 12 deletions packages/@aws-cdk/aws-logs/lib/log-retention-provider/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,21 +32,25 @@ async function createLogGroupSafe(logGroupName: string, region?: string, options
await cloudwatchlogs.createLogGroup({ logGroupName }).promise();
return;
} catch (error) {
if (error.code == 'ResourceAlreadyExistsException') {
// Yes, the log group is already created by the lambda execution
if (error.code === 'ResourceAlreadyExistsException') {
// The log group is already created by the lambda execution
return;
}
if (error.code != 'OperationAbortedException') {
throw error;
}
if (retryCount == 0) {
console.error(error);
throw error;
if (error.code === 'OperationAbortedException') {
if (retryCount > 0) {
retryCount--;
await new Promise(resolve => setTimeout(resolve, delay));
continue;
} else {
// The log group is still being created by another execution but we are out of retries
throw new Error('Out of attempts to create a logGroup');
}
}
// Any other error
console.error(error);
throw error;
}
await new Promise(resolve => setTimeout(resolve, delay));
} while (retryCount > 0);
throw new Error('Out of attempts to create logGroup');
} while (true); // exit happens on retry count check
}

/**
Expand Down Expand Up @@ -85,9 +89,15 @@ export async function handler(event: AWSLambda.CloudFormationCustomResourceEvent
await setRetentionPolicy(logGroupName, logGroupRegion, retryOptions, parseInt(event.ResourceProperties.RetentionInDays, 10));

if (event.RequestType === 'Create') {
// Set a retention policy of 1 day on the logs of this function.
// Set a retention policy of 1 day on the logs of this very function.
// Due to the async nature of the log group creation, the log group for this function might
// still be not created yet at this point. Therefore we attempt to create it.
// In case it is being created, createLogGroupSafe will handle the conflic.
const region = process.env.AWS_REGION;
await createLogGroupSafe(`/aws/lambda/${context.functionName}`, region, retryOptions);
// If createLogGroupSafe fails, the log group is not created even after multiple attempts
// In this case we have nothing to set the retention policy on but an exception will skip
// the next line.
await setRetentionPolicy(`/aws/lambda/${context.functionName}`, region, retryOptions, 1);
}
}
Expand Down
40 changes: 38 additions & 2 deletions packages/@aws-cdk/aws-logs/test/test.log-retention-provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ export = {
test.done();
},

async 'does not fail when operations on provider log group fail'(test: Test) {
async 'does not if when operations on provider log group fails'(test: Test) {
let attempt = 2;
const createLogGroupFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => {
if (params.logGroupName === '/aws/lambda/provider') {
Expand Down Expand Up @@ -281,7 +281,7 @@ export = {
test.done();
},

async 'does not fail when operations on CDK lambda log group fail'(test: Test) {
async 'does not fail if operations on CDK lambda log group fails twice'(test: Test) {
let attempt = 2;
const createLogGroupFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => {
if (params.logGroupName === 'group') {
Expand Down Expand Up @@ -323,6 +323,42 @@ export = {
test.done();
},

async 'does fail if operations on CDK lambda log group fails indefinitely'(test: Test) {
const createLogGroupFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => {
if (params.logGroupName === 'group') {
return Promise.reject(new MyError(
'A conflicting operation is currently in progress against this resource. Please try again.',
'OperationAbortedException'));
}
return Promise.resolve({});
};

const putRetentionPolicyFake = sinon.fake.resolves({});
const deleteRetentionPolicyFake = sinon.fake.resolves({});

AWS.mock('CloudWatchLogs', 'createLogGroup', createLogGroupFake);
AWS.mock('CloudWatchLogs', 'putRetentionPolicy', putRetentionPolicyFake);
AWS.mock('CloudWatchLogs', 'deleteRetentionPolicy', deleteRetentionPolicyFake);

const event = {
...eventCommon,
RequestType: 'Create',
ResourceProperties: {
ServiceToken: 'token',
RetentionInDays: '30',
LogGroupName: 'group',
},
};

const request = createRequest('FAILED');

await provider.handler(event as AWSLambda.CloudFormationCustomResourceCreateEvent, context);

test.equal(request.isDone(), true);

test.done();
},

async 'response data contains the log group name'(test: Test) {
AWS.mock('CloudWatchLogs', 'createLogGroup', sinon.fake.resolves({}));
AWS.mock('CloudWatchLogs', 'putRetentionPolicy', sinon.fake.resolves({}));
Expand Down

0 comments on commit adb5e13

Please sign in to comment.