Skip to content

Commit

Permalink
[Port v2int 2.1] Add threshold for non-runtime ops triggering summary…
Browse files Browse the repository at this point in the history
… heuristics (#13026) (#13032)

#13026

## Description

We are summarizing too frequently when the summary only contains a few
non-system ops. There needs to be a threshold to control how many ops
since last summary are required before a non-system op can trigger a
summary.

https://portal.microsofticm.com/imp/v3/incidents/details/349754396/home

A threshold of 20 was chosen based on looking at telemetry. For every
summary with 100 ops or less, the average number of non-runtime ops is
below 20.
  • Loading branch information
kian-thompson authored Nov 19, 2022
1 parent fdd4e13 commit 746e973
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 3 deletions.
1 change: 1 addition & 0 deletions api-report/container-runtime.api.md
Original file line number Diff line number Diff line change
Expand Up @@ -584,6 +584,7 @@ export interface ISummaryConfigurationHeuristics extends ISummaryBaseConfigurati
maxTime: number;
minIdleTime: number;
minOpsForLastSummaryAttempt: number;
nonRuntimeHeuristicThreshold?: number;
nonRuntimeOpWeight: number;
runtimeOpWeight: number;
// (undocumented)
Expand Down
13 changes: 13 additions & 0 deletions packages/runtime/container-runtime/src/containerRuntime.ts
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,17 @@ export interface ISummaryConfigurationHeuristics extends ISummaryBaseConfigurati
* For example: (multiplier) * (number of non-runtime ops) = weighted number of non-runtime ops
*/
nonRuntimeOpWeight: number;

/**
* Number of ops since last summary needed before a non-runtime op can trigger running summary heuristics.
*
* Note: Any runtime ops sent before the threshold is reached will trigger heuristics normally.
* This threshold ONLY applies to non-runtime ops triggering summaries.
*
* For example: Say the threshold is 20. Sending 19 non-runtime ops will not trigger any heuristic checks.
* Sending the 20th non-runtime op will trigger the heuristic checks for summarizing.
*/
nonRuntimeHeuristicThreshold?: number;
}

export interface ISummaryConfigurationDisableSummarizer {
Expand Down Expand Up @@ -317,6 +328,8 @@ export const DefaultSummaryConfiguration: ISummaryConfiguration = {
nonRuntimeOpWeight: 0.1,

runtimeOpWeight: 1.0,

nonRuntimeHeuristicThreshold: 20,
};

export interface IGCRuntimeOptions {
Expand Down
14 changes: 11 additions & 3 deletions packages/runtime/container-runtime/src/runningSummarizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -263,9 +263,9 @@ export class RunningSummarizer implements IDisposable {

/**
* Can the given op trigger a summary?
* # Currently only prevents summaries for Summarize and SummaryAck ops
* # Currently always prevents summaries for Summarize and SummaryAck/Nack ops
* @param op - op to check
* @returns true if this type of op can trigger a summary
* @returns true if this op can trigger a summary
*/
private opCanTriggerSummary(op: ISequencedDocumentMessage): boolean {
switch (op.type) {
Expand All @@ -274,10 +274,18 @@ export class RunningSummarizer implements IDisposable {
case MessageType.SummaryNack:
return false;
default:
return true;
return isRuntimeMessage(op) || this.nonRuntimeOpCanTriggerSummary();
}
}

private nonRuntimeOpCanTriggerSummary(): boolean {
// eslint-disable-next-line max-len
const opsSinceLastAck = this.heuristicData.lastOpSequenceNumber - this.heuristicData.lastSuccessfulSummary.refSequenceNumber;
return this.configuration.state === "enabled"
&& (this.configuration.nonRuntimeHeuristicThreshold === undefined
|| this.configuration.nonRuntimeHeuristicThreshold <= opsSinceLastAck);
}

public async waitStop(allowLastSummary: boolean): Promise<void> {
if (this.stopping) {
return;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ describe("Runtime", () => {
maxIdleTime: 5000, // This must remain the same as minIdleTime for tests to pass nicely
nonRuntimeOpWeight: 0.1,
runtimeOpWeight: 1.0,
nonRuntimeHeuristicThreshold: 20,
...summaryCommon,
};
const summaryConfigDisableHeuristics: ISummaryConfiguration = {
Expand Down Expand Up @@ -93,6 +94,20 @@ describe("Runtime", () => {
await flushPromises();
}

async function emitNoOp(
increment: number = 1,
) {
heuristicData.numNonRuntimeOps += increment - 1; // -1 because we emit an op below
lastRefSeq += increment;
const op: Partial<ISequencedDocumentMessage> = {
sequenceNumber: lastRefSeq,
timestamp: Date.now(),
type: MessageType.NoOp,
};
mockDeltaManager.emit("op", op);
await flushPromises();
}

function emitBroadcast(timestamp = Date.now()) {
mockDeltaManager.emit("op", {
type: MessageType.Summarize,
Expand Down Expand Up @@ -454,6 +469,29 @@ describe("Runtime", () => {
assert.strictEqual(heuristicData.numRuntimeOps, 0);
assert.strictEqual(heuristicData.numNonRuntimeOps, 1);
});

it("Should not summarize on non-runtime op before threshold is reached", async () => {
// Creating RunningSummarizer starts heuristics automatically
await emitNoOp(1);
await tickAndFlushPromises(summaryConfig.minIdleTime);
assertRunCounts(1, 0, 0, "should perform summary");
await emitAck();

assert(summaryConfig.nonRuntimeHeuristicThreshold !== undefined,
"Expect nonRuntimeHeuristicThreshold to be provided");

await emitNoOp(summaryConfig.nonRuntimeHeuristicThreshold - 2); // SummaryAck is included
await tickAndFlushPromises(summaryConfig.minIdleTime);

assertRunCounts(1, 0, 0, "should not perform summary");
assert.strictEqual(heuristicData.numRuntimeOps, 0);
assert.strictEqual(heuristicData.numNonRuntimeOps, summaryConfig.nonRuntimeHeuristicThreshold - 1);

await emitNoOp(1);
await tickAndFlushPromises(summaryConfig.minIdleTime);

assertRunCounts(2, 0, 0, "should perform summary");
});
});

describe("Safe Retries", () => {
Expand Down

0 comments on commit 746e973

Please sign in to comment.