Skip to content

Commit

Permalink
feat: improve batching summary errors (#2509)
Browse files Browse the repository at this point in the history
To avoid data loss, the Batching api throws an exception when the
batcher is closed when at least 1 entry failed. To help debugging, the
BatchingException tries to be helpful by giving some details about the
errors. Since the Batcher lifetime can extend indefinitely, the Batcher
implementation tries to keep a bound on the amount of resources it uses
to track the errors. Previously it would only track exception types and
counts. The idea being that if the customer has the need for fine
grained details, the customer can retrieve the details from the
ApiFuture that was returned when an entry was added. However this hasn't
panned out and creates confusion.

This PR stores a sample of the error messages. The sample is by default
capped to 50 entry and 50 rpc error messages. This can be adjusted by
setting system properties

Thank you for opening a Pull Request! For general contributing
guidelines, please refer to [contributing
guide](https://github.com/googleapis/gapic-generator-java/blob/main/CONTRIBUTING.md)

Before submitting your PR, there are a few things you can do to make
sure it goes smoothly:

- [ ] Make sure to open an issue as a
[bug/issue](https://github.com/googleapis/gapic-generator-java/issues/new/choose)
before writing your code! That way we can discuss the change, evaluate
designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes #<issue_number_goes_here> ☕️
  • Loading branch information
igorbernstein2 authored Feb 29, 2024
1 parent aab08b4 commit d6964a4
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@

import com.google.api.gax.rpc.ApiException;
import com.google.api.gax.rpc.StatusCode.Code;
import com.google.common.base.Joiner;
import com.google.common.base.MoreObjects;
import com.google.common.collect.EvictingQueue;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
Expand All @@ -52,6 +54,21 @@ class BatcherStats {
private final Map<Class, Integer> entryExceptionCounts = new HashMap<>();
private final Map<Code, Integer> entryStatusCounts = new HashMap<>();

/**
* The maximum number of error messages that a Batcher instance will retain. By default, a Batcher
* instance will retain 50 entry error messages and 50 RPC error messages. This limit can be
* temporarily increased by setting the {@code com.google.api.gax.batching.errors.max-samples}
* system property. This should only be needed in very rare situations and should not be
* considered part of the public api.
*/
private final int MAX_ERROR_MSG_SAMPLES =
Integer.getInteger("com.google.api.gax.batching.errors.max-samples", 50);

private final EvictingQueue<String> sampleOfRpcErrors =
EvictingQueue.create(MAX_ERROR_MSG_SAMPLES);
private final EvictingQueue<String> sampleOfEntryErrors =
EvictingQueue.create(MAX_ERROR_MSG_SAMPLES);

/**
* Records the count of the exception and it's type when a complete batch failed to apply.
*
Expand All @@ -69,6 +86,8 @@ synchronized void recordBatchFailure(Throwable throwable) {
requestStatusCounts.put(code, oldStatusCount + 1);
}

sampleOfRpcErrors.add(throwable.toString());

int oldExceptionCount = MoreObjects.firstNonNull(requestExceptionCounts.get(exceptionClass), 0);
requestExceptionCounts.put(exceptionClass, oldExceptionCount + 1);
}
Expand Down Expand Up @@ -96,6 +115,8 @@ synchronized <T extends BatchEntry> void recordBatchElementsCompletion(
Throwable actualCause = throwable.getCause();
Class exceptionClass = actualCause.getClass();

sampleOfEntryErrors.add(actualCause.toString());

if (actualCause instanceof ApiException) {
Code code = ((ApiException) actualCause).getStatusCode().getCode();
exceptionClass = ApiException.class;
Expand Down Expand Up @@ -144,6 +165,17 @@ synchronized BatchingException asException() {
.append(buildExceptionList(entryExceptionCounts, entryStatusCounts))
.append(".");
}

if (!sampleOfRpcErrors.isEmpty()) {
messageBuilder.append(" Sample of RPC errors: ");
messageBuilder.append(Joiner.on(", ").join(sampleOfRpcErrors));
messageBuilder.append(".");
}
if (!sampleOfEntryErrors.isEmpty()) {
messageBuilder.append(" Sample of entry errors: ");
messageBuilder.append(Joiner.on(", ").join(sampleOfEntryErrors));
messageBuilder.append(".");
}
return new BatchingException(messageBuilder.toString());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,10 @@ public void testRequestFailuresOnly() {

batcherStats.recordBatchFailure(
ApiExceptionFactory.createException(
new RuntimeException(), FakeStatusCode.of(StatusCode.Code.INVALID_ARGUMENT), false));
"fake api error",
new RuntimeException(),
FakeStatusCode.of(StatusCode.Code.INVALID_ARGUMENT),
false));

batcherStats.recordBatchFailure(new RuntimeException("Request failed"));

Expand All @@ -65,6 +68,10 @@ public void testRequestFailuresOnly() {
assertThat(exception).hasMessageThat().contains("1 RuntimeException");
assertThat(exception).hasMessageThat().contains("1 ApiException(1 INVALID_ARGUMENT)");
assertThat(exception).hasMessageThat().contains("and 0 partial failures.");
assertThat(exception)
.hasMessageThat()
.contains(
"com.google.api.gax.rpc.InvalidArgumentException: fake api error, java.lang.RuntimeException: Request failed.");
}

@Test
Expand All @@ -79,7 +86,10 @@ public void testEntryFailureOnly() {
SettableApiFuture<Integer> batchTwoResult = SettableApiFuture.create();
batchTwoResult.setException(
ApiExceptionFactory.createException(
new RuntimeException(), FakeStatusCode.of(StatusCode.Code.UNAVAILABLE), false));
"fake entry error",
new RuntimeException(),
FakeStatusCode.of(StatusCode.Code.UNAVAILABLE),
false));
batcherStats.recordBatchElementsCompletion(
ImmutableList.of(BatchEntry.create(2, batchTwoResult)));

Expand All @@ -89,6 +99,10 @@ public void testEntryFailureOnly() {
.contains("The 2 partial failures contained 2 entries that failed with:");
assertThat(ex).hasMessageThat().contains("1 ApiException(1 UNAVAILABLE)");
assertThat(ex).hasMessageThat().contains("1 IllegalStateException");
assertThat(ex)
.hasMessageThat()
.contains(
"Sample of entry errors: java.lang.IllegalStateException: local element failure, com.google.api.gax.rpc.UnavailableException: fake entry error.");
}

@Test
Expand All @@ -110,6 +124,8 @@ public void testRequestAndEntryFailures() {
.contains(
"Batching finished with 1 batches failed to apply due to: 1 RuntimeException and 1 "
+ "partial failures. The 1 partial failures contained 1 entries that failed with:"
+ " 1 ApiException(1 ALREADY_EXISTS).");
+ " 1 ApiException(1 ALREADY_EXISTS)."
+ " Sample of RPC errors: java.lang.RuntimeException: Batch failure."
+ " Sample of entry errors: com.google.api.gax.rpc.AlreadyExistsException: java.lang.RuntimeException.");
}
}

0 comments on commit d6964a4

Please sign in to comment.