Skip to content

Commit

Permalink
Redriven Step Functions Trace Merging (#598)
Browse files Browse the repository at this point in the history
* use redrive count to generate step functions parent ID

* dont use redrive_count when 0

* fix tests

* fix redrive count type

* added link to matching snapshot in logs-backend
  • Loading branch information
avedmala authored Dec 17, 2024
1 parent 4061917 commit 1a75734
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 22 deletions.
8 changes: 7 additions & 1 deletion src/trace/context/extractor.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -669,6 +669,7 @@ describe("TraceContextExtractor", () => {
Name: "85a9933e-9e11-83dc-6a61-b92367b6c3be",
RoleArn:
"arn:aws:iam::425362996713:role/service-role/StepFunctions-logs-to-traces-sequential-role-ccd69c03",
RedriveCount: 0,
StartTime: "2022-12-08T21:08:17.924Z",
},
State: {
Expand Down Expand Up @@ -883,6 +884,7 @@ describe("TraceContextExtractor", () => {
},
Name: "85a9933e-9e11-83dc-6a61-b92367b6c3be",
RoleArn: "arn:aws:iam::425362996713:role/service-role/StepFunctions-logs-to-traces-sequential-role-ccd69c03",
RedriveCount: 0,
StartTime: "2022-12-08T21:08:17.924Z",
},
State: {
Expand Down Expand Up @@ -919,6 +921,7 @@ describe("TraceContextExtractor", () => {
Name: "85a9933e-9e11-83dc-6a61-b92367b6c3be",
RoleArn:
"arn:aws:iam::425362996713:role/service-role/StepFunctions-logs-to-traces-sequential-role-ccd69c03",
RedriveCount: 0,
StartTime: "2022-12-08T21:08:17.924Z",
},
State: {
Expand Down Expand Up @@ -959,6 +962,7 @@ describe("TraceContextExtractor", () => {
Name: "85a9933e-9e11-83dc-6a61-b92367b6c3be",
RoleArn:
"arn:aws:iam::425362996713:role/service-role/StepFunctions-logs-to-traces-sequential-role-ccd69c03",
RedriveCount: 0,
StartTime: "2022-12-08T21:08:17.924Z",
},
State: {
Expand Down Expand Up @@ -999,6 +1003,7 @@ describe("TraceContextExtractor", () => {
Name: "85a9933e-9e11-83dc-6a61-b92367b6c3be",
RoleArn:
"arn:aws:iam::425362996713:role/service-role/StepFunctions-logs-to-traces-sequential-role-ccd69c03",
RedriveCount: 0,
StartTime: "2022-12-08T21:08:17.924Z",
},
State: {
Expand Down Expand Up @@ -1049,6 +1054,7 @@ describe("TraceContextExtractor", () => {
},
Name: "85a9933e-9e11-83dc-6a61-b92367b6c3be",
RoleArn: "arn:aws:iam::425362996713:role/service-role/StepFunctions-logs-to-traces-sequential-role-ccd69c03",
RedriveCount: 0,
StartTime: "2022-12-08T21:08:17.924Z",
},
State: {
Expand Down Expand Up @@ -1086,7 +1092,7 @@ describe("TraceContextExtractor", () => {

const sentMessage = sentSegment.toString();
expect(sentMessage).toEqual(
'{"format": "json", "version": 1}\n{"id":"11111","trace_id":"1-5e272390-8c398be037738dc042009320","parent_id":"94ae789b969f1cc5","name":"datadog-metadata","start_time":1487076708,"end_time":1487076708,"type":"subsegment","metadata":{"datadog":{"root_span_metadata":{"execution_id":"arn:aws:states:sa-east-1:425362996713:express:logs-to-traces-sequential:85a9933e-9e11-83dc-6a61-b92367b6c3be:3f7ef5c7-c8b8-4c88-90a1-d54aa7e7e2bf","state_entered_time":"2022-12-08T21:08:19.224Z","state_name":"step-one"}}}}',
'{"format": "json", "version": 1}\n{"id":"11111","trace_id":"1-5e272390-8c398be037738dc042009320","parent_id":"94ae789b969f1cc5","name":"datadog-metadata","start_time":1487076708,"end_time":1487076708,"type":"subsegment","metadata":{"datadog":{"root_span_metadata":{"execution_id":"arn:aws:states:sa-east-1:425362996713:express:logs-to-traces-sequential:85a9933e-9e11-83dc-6a61-b92367b6c3be:3f7ef5c7-c8b8-4c88-90a1-d54aa7e7e2bf","redrive_count":"0","state_entered_time":"2022-12-08T21:08:19.224Z","state_name":"step-one"}}}}',
);
});

Expand Down
72 changes: 54 additions & 18 deletions src/trace/context/extractors/step-function.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,41 @@ describe("StepFunctionEventTraceExtractor", () => {
describe("extract", () => {
const payload = {
Execution: {
Id: "arn:aws:states:sa-east-1:425362996713:express:logs-to-traces-sequential:85a9933e-9e11-83dc-6a61-b92367b6c3be:3f7ef5c7-c8b8-4c88-90a1-d54aa7e7e2bf",
Input: {
MyInput: "MyValue",
},
Name: "85a9933e-9e11-83dc-6a61-b92367b6c3be",
RoleArn: "arn:aws:iam::425362996713:role/service-role/StepFunctions-logs-to-traces-sequential-role-ccd69c03",
StartTime: "2022-12-08T21:08:17.924Z",
Id: "arn:aws:states:sa-east-1:425362996713:execution:abhinav-activity-state-machine:72a7ca3e-901c-41bb-b5a3-5f279b92a316",
Name: "72a7ca3e-901c-41bb-b5a3-5f279b92a316",
RoleArn:
"arn:aws:iam::425362996713:role/service-role/StepFunctions-abhinav-activity-state-machine-role-22jpbgl6j",
StartTime: "2024-12-04T19:38:04.069Z",
RedriveCount: 0,
},
State: {
Name: "step-one",
EnteredTime: "2022-12-08T21:08:19.224Z",
RetryCount: 2,
Name: "Lambda Invoke",
EnteredTime: "2024-12-04T19:38:04.118Z",
RetryCount: 0,
},
StateMachine: {
Id: "arn:aws:states:sa-east-1:425362996713:stateMachine:logs-to-traces-sequential",
Name: "my-state-machine",
Id: "arn:aws:states:sa-east-1:425362996713:stateMachine:abhinav-activity-state-machine",
Name: "abhinav-activity-state-machine",
},
};

const redrivePayload = {
Execution: {
Id: "arn:aws:states:sa-east-1:425362996713:execution:abhinav-activity-state-machine:72a7ca3e-901c-41bb-b5a3-5f279b92a316",
Name: "72a7ca3e-901c-41bb-b5a3-5f279b92a316",
RoleArn:
"arn:aws:iam::425362996713:role/service-role/StepFunctions-abhinav-activity-state-machine-role-22jpbgl6j",
StartTime: "2024-12-04T19:38:04.069Z",
RedriveCount: 1,
},
State: {
Name: "Lambda Invoke",
EnteredTime: "2024-12-04T19:38:04.118Z",
RetryCount: 0,
},
StateMachine: {
Id: "arn:aws:states:sa-east-1:425362996713:stateMachine:abhinav-activity-state-machine",
Name: "abhinav-activity-state-machine",
},
};
it("extracts trace context with valid payload", () => {
Expand All @@ -36,8 +55,25 @@ describe("StepFunctionEventTraceExtractor", () => {
const traceContext = extractor.extract(payload);
expect(traceContext).not.toBeNull();

expect(traceContext?.toTraceId()).toBe("1139193989631387307");
expect(traceContext?.toSpanId()).toBe("5892738536804826142");
expect(traceContext?.toTraceId()).toBe("435175499815315247");
expect(traceContext?.toSpanId()).toBe("3929055471293792800");
expect(traceContext?.sampleMode()).toBe("1");
expect(traceContext?.source).toBe("event");
});

// https://github.com/DataDog/logs-backend/blob/c17618cb552fc369ca40282bae0a65803f82f694/domains/serverless/apps/logs-to-traces-reducer/src/test/resources/test-json-files/stepfunctions/RedriveTest/snapshots/RedriveLambdaSuccessTraceMerging.json#L46
it("extracts trace context with valid redriven payload", () => {
// Mimick TraceContextService.extract initialization
StepFunctionContextService.instance(redrivePayload);

const extractor = new StepFunctionEventTraceExtractor();

// Payload is sent again for safety in case the instance wasn't previously initialized
const traceContext = extractor.extract(redrivePayload);
expect(traceContext).not.toBeNull();

expect(traceContext?.toTraceId()).toBe("435175499815315247");
expect(traceContext?.toSpanId()).toBe("5063839446130725204");
expect(traceContext?.sampleMode()).toBe("1");
expect(traceContext?.source).toBe("event");
});
Expand All @@ -49,8 +85,8 @@ describe("StepFunctionEventTraceExtractor", () => {
const traceContext = extractor.extract(payload);
expect(traceContext).not.toBeNull();

expect(traceContext?.toTraceId()).toBe("1139193989631387307");
expect(traceContext?.toSpanId()).toBe("5892738536804826142");
expect(traceContext?.toTraceId()).toBe("435175499815315247");
expect(traceContext?.toSpanId()).toBe("3929055471293792800");
expect(traceContext?.sampleMode()).toBe("1");
expect(traceContext?.source).toBe("event");
});
Expand All @@ -65,8 +101,8 @@ describe("StepFunctionEventTraceExtractor", () => {
const traceContext = extractor.extract({ Payload: payload });
expect(traceContext).not.toBeNull();

expect(traceContext?.toTraceId()).toBe("1139193989631387307");
expect(traceContext?.toSpanId()).toBe("5892738536804826142");
expect(traceContext?.toTraceId()).toBe("435175499815315247");
expect(traceContext?.toSpanId()).toBe("3929055471293792800");
expect(traceContext?.sampleMode()).toBe("1");
expect(traceContext?.source).toBe("event");
});
Expand Down
16 changes: 16 additions & 0 deletions src/trace/step-function-service.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ describe("StepFunctionContextService", () => {
},
Name: "85a9933e-9e11-83dc-6a61-b92367b6c3be",
RoleArn: "arn:aws:iam::425362996713:role/service-role/StepFunctions-logs-to-traces-sequential-role-ccd69c03",
RedriveCount: 0,
StartTime: "2022-12-08T21:08:17.924Z",
},
State: {
Expand All @@ -30,6 +31,7 @@ describe("StepFunctionContextService", () => {
},
Name: "85a9933e-9e11-83dc-6a61-b92367b6c3be",
RoleArn: "arn:aws:iam::425362996713:role/service-role/StepFunctions-logs-to-traces-sequential-role-ccd69c03",
RedriveCount: 0,
StartTime: "2022-12-08T21:08:17.924Z",
},
State: {
Expand All @@ -55,6 +57,7 @@ describe("StepFunctionContextService", () => {
},
Name: "85a9933e-9e11-83dc-6a61-b92367b6c3be",
RoleArn: "arn:aws:iam::425362996713:role/service-role/StepFunctions-logs-to-traces-sequential-role-ccd69c03",
RedriveCount: 0,
StartTime: "2022-12-08T21:08:17.924Z",
},
State: {
Expand Down Expand Up @@ -106,6 +109,16 @@ describe("StepFunctionContextService", () => {
},
},
],
[
"Execution RedriveCount is not a number",
{
...legacyStepFunctionEvent,
Execution: {
...legacyStepFunctionEvent.Execution,
RedriveCount: "0",
},
},
],
[
"State is not defined",
{
Expand Down Expand Up @@ -146,6 +159,7 @@ describe("StepFunctionContextService", () => {
expect(instance.context).toEqual({
execution_id:
"arn:aws:states:sa-east-1:425362996713:express:logs-to-traces-sequential:85a9933e-9e11-83dc-6a61-b92367b6c3be:3f7ef5c7-c8b8-4c88-90a1-d54aa7e7e2bf",
redrive_count: "0",
state_entered_time: "2022-12-08T21:08:19.224Z",
state_name: "step-one",
});
Expand All @@ -158,6 +172,7 @@ describe("StepFunctionContextService", () => {
expect(instance.context).toEqual({
execution_id:
"arn:aws:states:sa-east-1:425362996713:express:logs-to-traces-sequential:85a9933e-9e11-83dc-6a61-b92367b6c3be:3f7ef5c7-c8b8-4c88-90a1-d54aa7e7e2bf",
redrive_count: "0",
state_entered_time: "2022-12-08T21:08:19.224Z",
state_name: "step-one",
root_execution_id:
Expand All @@ -173,6 +188,7 @@ describe("StepFunctionContextService", () => {
expect(instance.context).toEqual({
execution_id:
"arn:aws:states:sa-east-1:425362996713:express:logs-to-traces-sequential:85a9933e-9e11-83dc-6a61-b92367b6c3be:3f7ef5c7-c8b8-4c88-90a1-d54aa7e7e2bf",
redrive_count: "0",
state_entered_time: "2022-12-08T21:08:19.224Z",
state_name: "step-one",
trace_id: "10593586103637578129",
Expand Down
21 changes: 18 additions & 3 deletions src/trace/step-function-service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import { Sha256 } from "@aws-crypto/sha256-js";

interface NestedStepFunctionContext {
execution_id: string;
redrive_count: string;
state_entered_time: string;
state_name: string;
root_execution_id: string;
Expand All @@ -13,6 +14,7 @@ interface NestedStepFunctionContext {

interface LambdaRootStepFunctionContext {
execution_id: string;
redrive_count: string;
state_entered_time: string;
state_name: string;
trace_id: string;
Expand All @@ -22,6 +24,7 @@ interface LambdaRootStepFunctionContext {

interface LegacyStepFunctionContext {
execution_id: string;
redrive_count: string;
state_entered_time: string;
state_name: string;
}
Expand Down Expand Up @@ -88,12 +91,13 @@ export class StepFunctionContextService {
// Extract the common context variables
const stateMachineContext = this.extractStateMachineContext(event);
if (stateMachineContext === null) return;
const { execution_id, state_entered_time, state_name } = stateMachineContext;
const { execution_id, redrive_count, state_entered_time, state_name } = stateMachineContext;

if (typeof event["serverless-version"] === "string" && event["serverless-version"] === "v1") {
if (typeof event.RootExecutionId === "string") {
this.context = {
execution_id,
redrive_count,
state_entered_time,
state_name,
root_execution_id: event.RootExecutionId,
Expand All @@ -102,6 +106,7 @@ export class StepFunctionContextService {
} else if (typeof event["x-datadog-trace-id"] === "string" && typeof event["x-datadog-tags"] === "string") {
this.context = {
execution_id,
redrive_count,
state_entered_time,
state_name,
trace_id: event["x-datadog-trace-id"],
Expand All @@ -110,7 +115,7 @@ export class StepFunctionContextService {
} as LambdaRootStepFunctionContext;
}
} else {
this.context = { execution_id, state_entered_time, state_name } as LegacyStepFunctionContext;
this.context = { execution_id, redrive_count, state_entered_time, state_name } as LegacyStepFunctionContext;
}
}

Expand All @@ -134,8 +139,15 @@ export class StepFunctionContextService {
return null;
}

const redrivePostfix = this.context.redrive_count === "0" ? "" : `#${this.context.redrive_count}`;

const parentId = this.deterministicSha256HashToBigIntString(
this.context.execution_id + "#" + this.context.state_name + "#" + this.context.state_entered_time,
this.context.execution_id +
"#" +
this.context.state_name +
"#" +
this.context.state_entered_time +
redrivePostfix,
PARENT_ID,
);
const sampleMode = SampleMode.AUTO_KEEP;
Expand Down Expand Up @@ -196,12 +208,14 @@ export class StepFunctionContextService {

private extractStateMachineContext(event: any): {
execution_id: string;
redrive_count: string;
state_entered_time: string;
state_name: string;
} | null {
if (this.isValidContextObject(event)) {
return {
execution_id: event.Execution.Id,
redrive_count: event.Execution.RedriveCount.toString(),
state_entered_time: event.State.EnteredTime,
state_name: event.State.Name,
};
Expand All @@ -214,6 +228,7 @@ export class StepFunctionContextService {
private isValidContextObject(context: any): boolean {
return (
typeof context?.Execution?.Id === "string" &&
typeof context?.Execution?.RedriveCount === "number" &&
typeof context?.State?.EnteredTime === "string" &&
typeof context?.State?.Name === "string"
);
Expand Down

0 comments on commit 1a75734

Please sign in to comment.