Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

services, xds, orca: use application_utilization and fallback to cpu_utilization if unset in WRR, import cncf/xds #10256

Merged
merged 3 commits into from
Jun 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions repositories.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,10 @@ def grpc_java_repositories():
if not native.existing_rule("com_github_cncf_xds"):
http_archive(
name = "com_github_cncf_xds",
strip_prefix = "xds-32f1caf87195bf3390061c29f18987e51ca56a88",
sha256 = "fcd0b50c013452fda9c5e28c131c287b655ebb361271a76ad3bffc08b3ecd82e",
strip_prefix = "xds-e9ce68804cb4e64cab5a52e3c8baf840d4ff87b7",
sha256 = "0d33b83f8c6368954e72e7785539f0d272a8aba2f6e2e336ed15fd1514bc9899",
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI: how I got this sha256 value is by running a wget https://github.com/cncf/xds/archive/e9ce68804cb4e64cab5a52e3c8baf840d4ff87b7.tar.gz, then followed by sha256sum e9ce68804cb4e64cab5a52e3c8baf840d4ff87b7.tar.gz

urls = [
"https://github.com/cncf/xds/archive/32f1caf87195bf3390061c29f18987e51ca56a88.tar.gz",
"https://github.com/cncf/xds/archive/e9ce68804cb4e64cab5a52e3c8baf840d4ff87b7.tar.gz",
],
)
if not native.existing_rule("com_github_grpc_grpc"):
Expand Down
25 changes: 21 additions & 4 deletions services/src/main/java/io/grpc/services/CallMetricRecorder.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ public final class CallMetricRecorder {
private final AtomicReference<ConcurrentHashMap<String, Double>> requestCostMetrics =
new AtomicReference<>();
private double cpuUtilizationMetric = 0;
private double applicationUtilizationMetric = 0;
private double memoryUtilizationMetric = 0;
private double qps = 0;
private double eps = 0;
Expand Down Expand Up @@ -127,7 +128,7 @@ public CallMetricRecorder recordRequestCostMetric(String name, double value) {
}

/**
* Records a call metric measurement for CPU utilization in the range [0, 1]. Values outside the
* Records a call metric measurement for CPU utilization in the range [0, inf). Values outside the
* valid range are ignored. If RPC has already finished, this method is no-op.
*
* <p>A latter record will overwrite its former name-sakes.
Expand All @@ -136,13 +137,29 @@ public CallMetricRecorder recordRequestCostMetric(String name, double value) {
* @since 1.47.0
*/
public CallMetricRecorder recordCpuUtilizationMetric(double value) {
if (disabled || !MetricRecorderHelper.isCpuUtilizationValid(value)) {
if (disabled || !MetricRecorderHelper.isCpuOrApplicationUtilizationValid(value)) {
return this;
}
cpuUtilizationMetric = value;
return this;
}

/**
* Records a call metric measurement for application specific utilization in the range [0, inf).
* Values outside the valid range are ignored. If RPC has already finished, this method is no-op.
*
* <p>A latter record will overwrite its former name-sakes.
*
* @return this recorder object
*/
public CallMetricRecorder recordApplicationUtilizationMetric(double value) {
if (disabled || !MetricRecorderHelper.isCpuOrApplicationUtilizationValid(value)) {
return this;
}
applicationUtilizationMetric = value;
return this;
}

/**
* Records a call metric measurement for memory utilization in the range [0, 1]. Values outside
* the valid range are ignored. If RPC has already finished, this method is no-op.
Expand Down Expand Up @@ -221,8 +238,8 @@ MetricReport finalizeAndDump2() {
if (savedUtilizationMetrics == null) {
savedUtilizationMetrics = Collections.emptyMap();
}
return new MetricReport(cpuUtilizationMetric, memoryUtilizationMetric, qps, eps,
Collections.unmodifiableMap(savedRequestCostMetrics),
return new MetricReport(cpuUtilizationMetric, applicationUtilizationMetric,
memoryUtilizationMetric, qps, eps, Collections.unmodifiableMap(savedRequestCostMetrics),
Collections.unmodifiableMap(savedUtilizationMetrics)
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@ public static MetricReport finalizeAndDump2(CallMetricRecorder recorder) {
return recorder.finalizeAndDump2();
}

public static MetricReport createMetricReport(double cpuUtilization, double memoryUtilization,
double qps, double eps, Map<String, Double> requestCostMetrics,
Map<String, Double> utilizationMetrics) {
return new MetricReport(cpuUtilization, memoryUtilization, qps, eps, requestCostMetrics,
utilizationMetrics);
public static MetricReport createMetricReport(double cpuUtilization,
double applicationUtilization, double memoryUtilization, double qps, double eps,
Map<String, Double> requestCostMetrics, Map<String, Double> utilizationMetrics) {
return new MetricReport(cpuUtilization, applicationUtilization, memoryUtilization, qps, eps,
requestCostMetrics, utilizationMetrics);
}
}
23 changes: 21 additions & 2 deletions services/src/main/java/io/grpc/services/MetricRecorder.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
public final class MetricRecorder {
private volatile ConcurrentHashMap<String, Double> metricsData = new ConcurrentHashMap<>();
private volatile double cpuUtilization;
private volatile double applicationUtilization;
private volatile double memoryUtilization;
private volatile double qps;
private volatile double eps;
Expand Down Expand Up @@ -69,7 +70,7 @@ public void removeUtilizationMetric(String key) {
* are ignored.
*/
public void setCpuUtilizationMetric(double value) {
if (!MetricRecorderHelper.isCpuUtilizationValid(value)) {
if (!MetricRecorderHelper.isCpuOrApplicationUtilizationValid(value)) {
return;
}
cpuUtilization = value;
Expand All @@ -82,6 +83,24 @@ public void clearCpuUtilizationMetric() {
cpuUtilization = 0;
}

/**
* Update the application specific utilization metrics data in the range [0, inf). Values outside
* the valid range are ignored.
*/
public void setApplicationUtilizationMetric(double value) {
if (!MetricRecorderHelper.isCpuOrApplicationUtilizationValid(value)) {
return;
}
applicationUtilization = value;
}

/**
* Clear the application specific utilization metrics data.
*/
public void clearApplicationUtilizationMetric() {
applicationUtilization = 0;
}

/**
* Update the memory utilization metrics data in the range [0, 1]. Values outside the valid range
* are ignored.
Expand Down Expand Up @@ -135,7 +154,7 @@ public void clearEpsMetric() {
}

MetricReport getMetricReport() {
return new MetricReport(cpuUtilization, memoryUtilization, qps, eps,
return new MetricReport(cpuUtilization, applicationUtilization, memoryUtilization, qps, eps,
Collections.emptyMap(), Collections.unmodifiableMap(metricsData));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,12 @@ static boolean isUtilizationValid(double utilization) {
}

/**
* Return true if the cpu utilization value is in the range [0, inf) and false otherwise.
* Occasionally users have over 100% cpu utilization and get a runaway effect where the backend
* with highest qps gets more and more qps sent to it. So we allow cpu utilization > 1.0.
* Return true if the cpu utilization or application specific utilization value is in the range
* [0, inf) and false otherwise. Occasionally users have over 100% cpu utilization and get a
* runaway effect where the backend with highest qps gets more and more qps sent to it. So we
* allow cpu utilization > 1.0, similarly for application specific utilization.
*/
static boolean isCpuUtilizationValid(double utilization) {
static boolean isCpuOrApplicationUtilizationValid(double utilization) {
return utilization >= 0.0;
}

Expand Down
13 changes: 10 additions & 3 deletions services/src/main/java/io/grpc/services/MetricReport.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,18 @@
@ExperimentalApi("https://github.com/grpc/grpc-java/issues/9381")
public final class MetricReport {
private double cpuUtilization;
private double applicationUtilization;
private double memoryUtilization;
private double qps;
private double eps;
private Map<String, Double> requestCostMetrics;
private Map<String, Double> utilizationMetrics;

MetricReport(double cpuUtilization, double memoryUtilization, double qps, double eps,
Map<String, Double> requestCostMetrics,
Map<String, Double> utilizationMetrics) {
MetricReport(double cpuUtilization, double applicationUtilization, double memoryUtilization,
double qps, double eps, Map<String, Double> requestCostMetrics,
Map<String, Double> utilizationMetrics) {
this.cpuUtilization = cpuUtilization;
this.applicationUtilization = applicationUtilization;
this.memoryUtilization = memoryUtilization;
this.qps = qps;
this.eps = eps;
Expand All @@ -50,6 +52,10 @@ public double getCpuUtilization() {
return cpuUtilization;
}

public double getApplicationUtilization() {
return applicationUtilization;
}

public double getMemoryUtilization() {
return memoryUtilization;
}
Expand All @@ -74,6 +80,7 @@ public double getEps() {
public String toString() {
return MoreObjects.toStringHelper(this)
.add("cpuUtilization", cpuUtilization)
.add("applicationUtilization", applicationUtilization)
.add("memoryUtilization", memoryUtilization)
.add("requestCost", requestCostMetrics)
.add("utilization", utilizationMetrics)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ public void dumpDumpsAllSavedMetricValues() {
recorder.recordRequestCostMetric("cost2", 10293.0);
recorder.recordRequestCostMetric("cost3", 1.0);
recorder.recordCpuUtilizationMetric(0.1928);
recorder.recordApplicationUtilizationMetric(0.9987);
recorder.recordMemoryUtilizationMetric(0.474);
recorder.recordQpsMetric(2522.54);
recorder.recordEpsMetric(1.618);
Expand All @@ -55,15 +56,18 @@ public void dumpDumpsAllSavedMetricValues() {
Truth.assertThat(dump.getRequestCostMetrics())
.containsExactly("cost1", 37465.12, "cost2", 10293.0, "cost3", 1.0);
Truth.assertThat(dump.getCpuUtilization()).isEqualTo(0.1928);
Truth.assertThat(dump.getApplicationUtilization()).isEqualTo(0.9987);
Truth.assertThat(dump.getMemoryUtilization()).isEqualTo(0.474);
Truth.assertThat(dump.getQps()).isEqualTo(2522.54);
Truth.assertThat(dump.getEps()).isEqualTo(1.618);
Truth.assertThat(dump.toString()).contains("eps=1.618");
Truth.assertThat(dump.toString()).contains("applicationUtilization=0.9987");
}

@Test
public void noMetricsRecordedAfterSnapshot() {
Map<String, Double> initDump = recorder.finalizeAndDump();
recorder.recordApplicationUtilizationMetric(0.01);
recorder.recordUtilizationMetric("cost", 0.154353423);
recorder.recordQpsMetric(3.14159);
recorder.recordEpsMetric(1.618);
Expand All @@ -87,13 +91,15 @@ public void noMetricsRecordedIfUtilizationIsGreaterThanUpperBound() {
@Test
public void noMetricsRecordedIfUtilizationAndQpsAreLessThanLowerBound() {
recorder.recordCpuUtilizationMetric(-0.001);
recorder.recordApplicationUtilizationMetric(-0.001);
recorder.recordMemoryUtilizationMetric(-0.001);
recorder.recordQpsMetric(-0.001);
recorder.recordEpsMetric(-0.001);
recorder.recordUtilizationMetric("util1", -0.001);

MetricReport dump = recorder.finalizeAndDump2();
Truth.assertThat(dump.getCpuUtilization()).isEqualTo(0);
Truth.assertThat(dump.getApplicationUtilization()).isEqualTo(0);
Truth.assertThat(dump.getMemoryUtilization()).isEqualTo(0);
Truth.assertThat(dump.getQps()).isEqualTo(0);
Truth.assertThat(dump.getEps()).isEqualTo(0);
Expand All @@ -108,6 +114,8 @@ public void lastValueWinForMetricsWithSameName() {
recorder.recordRequestCostMetric("cost1", 6441.341);
recorder.recordRequestCostMetric("cost1", 4654.67);
recorder.recordRequestCostMetric("cost2", 75.83);
recorder.recordApplicationUtilizationMetric(0.92);
recorder.recordApplicationUtilizationMetric(1.78);
recorder.recordMemoryUtilizationMetric(0.13);
recorder.recordMemoryUtilizationMetric(0.31);
recorder.recordUtilizationMetric("util1", 0.2837421);
Expand All @@ -121,6 +129,7 @@ public void lastValueWinForMetricsWithSameName() {
MetricReport dump = recorder.finalizeAndDump2();
Truth.assertThat(dump.getRequestCostMetrics())
.containsExactly("cost1", 4654.67, "cost2", 75.83);
Truth.assertThat(dump.getApplicationUtilization()).isEqualTo(1.78);
Truth.assertThat(dump.getMemoryUtilization()).isEqualTo(0.93840);
Truth.assertThat(dump.getUtilizationMetrics())
.containsExactly("util1", 0.843233);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -228,12 +228,16 @@ final class OrcaReportListener implements OrcaPerRequestReportListener, OrcaOobR
@Override
public void onLoadReport(MetricReport report) {
double newWeight = 0;
if (report.getCpuUtilization() > 0 && report.getQps() > 0) {
// Prefer application utilization and fallback to CPU utilization if unset.
double utilization =
report.getApplicationUtilization() > 0 ? report.getApplicationUtilization()
: report.getCpuUtilization();
if (utilization > 0 && report.getQps() > 0) {
double penalty = 0;
if (report.getEps() > 0 && errorUtilizationPenalty > 0) {
penalty = report.getEps() / report.getQps() * errorUtilizationPenalty;
}
newWeight = report.getQps() / (report.getCpuUtilization() + penalty);
newWeight = report.getQps() / (utilization + penalty);
}
if (newWeight == 0) {
return;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ public void close(Status status, Metadata trailers) {
private static OrcaLoadReport.Builder fromInternalReport(MetricReport internalReport) {
return OrcaLoadReport.newBuilder()
.setCpuUtilization(internalReport.getCpuUtilization())
.setApplicationUtilization(internalReport.getApplicationUtilization())
.setMemUtilization(internalReport.getMemoryUtilization())
.setRpsFractional(internalReport.getQps())
.setEps(internalReport.getEps())
Expand All @@ -138,6 +139,10 @@ private static void mergeMetrics(
if (isReportValueSet(cpu)) {
metricRecorderReportBuilder.setCpuUtilization(cpu);
}
double applicationUtilization = callMetricRecorderReport.getApplicationUtilization();
if (isReportValueSet(applicationUtilization)) {
metricRecorderReportBuilder.setApplicationUtilization(applicationUtilization);
}
double mem = callMetricRecorderReport.getMemoryUtilization();
if (isReportValueSet(mem)) {
metricRecorderReportBuilder.setMemUtilization(mem);
Expand Down
5 changes: 3 additions & 2 deletions xds/src/main/java/io/grpc/xds/orca/OrcaPerRequestUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -254,8 +254,9 @@ public void inboundTrailers(Metadata trailers) {

static MetricReport fromOrcaLoadReport(OrcaLoadReport loadReport) {
return InternalCallMetricRecorder.createMetricReport(loadReport.getCpuUtilization(),
loadReport.getMemUtilization(), loadReport.getRpsFractional(), loadReport.getEps(),
loadReport.getRequestCostMap(), loadReport.getUtilizationMap());
loadReport.getApplicationUtilization(), loadReport.getMemUtilization(),
loadReport.getRpsFractional(), loadReport.getEps(), loadReport.getRequestCostMap(),
loadReport.getUtilizationMap());
}

/**
Expand Down
1 change: 1 addition & 0 deletions xds/src/main/java/io/grpc/xds/orca/OrcaServiceImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ private OrcaLoadReport generateMetricsReport() {
MetricReport internalReport =
InternalMetricRecorder.getMetricReport(metricRecorder);
return OrcaLoadReport.newBuilder().setCpuUtilization(internalReport.getCpuUtilization())
.setApplicationUtilization(internalReport.getApplicationUtilization())
.setMemUtilization(internalReport.getMemoryUtilization())
.setRpsFractional(internalReport.getQps())
.setEps(internalReport.getEps())
Expand Down
Loading