Skip to content

Commit

Permalink
Automatically retry the build if encountered remote cache eviction error
Browse files Browse the repository at this point in the history
  • Loading branch information
coeuvre committed Mar 13, 2023
1 parent f9008f6 commit f997f5b
Show file tree
Hide file tree
Showing 5 changed files with 118 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,7 @@ protected Completable onErrorResumeNext(Throwable error) {
new EnvironmentalExecException(
(BulkTransferException) error,
FailureDetail.newBuilder()
.setMessage(
"Failed to fetch blobs because they do not exist remotely."
+ " Build without the Bytes does not work if your remote"
+ " cache evicts blobs during builds")
.setMessage("Failed to fetch blobs because they do not exist remotely")
.setSpawn(FailureDetails.Spawn.newBuilder().setCode(code))
.build());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
import com.google.devtools.build.lib.util.AnsiStrippingOutputStream;
import com.google.devtools.build.lib.util.DebugLoggerConfigurator;
import com.google.devtools.build.lib.util.DetailedExitCode;
import com.google.devtools.build.lib.util.ExitCode;
import com.google.devtools.build.lib.util.InterruptedFailureDetails;
import com.google.devtools.build.lib.util.LoggingUtil;
import com.google.devtools.build.lib.util.Pair;
Expand Down Expand Up @@ -148,6 +149,54 @@ public BlazeCommandResult exec(
Optional<List<Pair<String, String>>> startupOptionsTaggedWithBazelRc,
List<Any> commandExtensions)
throws InterruptedException {
var remoteCacheEvictionRetries = parseRemoteCacheEvictionRetries(args, outErr);
while (true) {
var result =
execOnce(
invocationPolicy,
args,
outErr,
lockingMode,
clientDescription,
firstContactTimeMillis,
startupOptionsTaggedWithBazelRc,
commandExtensions);
if (result.getExitCode() == ExitCode.REMOTE_CACHE_EVICTED && remoteCacheEvictionRetries > 0) {
--remoteCacheEvictionRetries;
outErr.printErrLn("Found remote cache eviction error, retrying the build...");
continue;
}
return result;
}
}

private int parseRemoteCacheEvictionRetries(List<String> args, OutErr outErr) {
var retryFlagPrefix = "--experimental_remote_cache_eviction_retries=";
for (var arg : args) {
if (arg.startsWith(retryFlagPrefix)) {
try {
return Integer.parseInt(arg.substring(retryFlagPrefix.length()));
} catch (NumberFormatException e) {
outErr.printErrLn(
String.format(
"Failed to parse retry times: %s, remote cache eviction retry is disabled", e));
return 0;
}
}
}
return 0;
}

public BlazeCommandResult execOnce(
InvocationPolicy invocationPolicy,
List<String> args,
OutErr outErr,
LockingMode lockingMode,
String clientDescription,
long firstContactTimeMillis,
Optional<List<Pair<String, String>>> startupOptionsTaggedWithBazelRc,
List<Any> commandExtensions)
throws InterruptedException {
OriginalUnstructuredCommandLineEvent originalCommandLine =
new OriginalUnstructuredCommandLineEvent(args);
Preconditions.checkNotNull(clientDescription);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,15 @@ public String getTypeDescription() {
+ " them.")
public boolean heuristicallyDropNodes;

@Option(
name = "experimental_remote_cache_eviction_retries",
defaultValue = "0",
documentationCategory = OptionDocumentationCategory.REMOTE,
effectTags = {OptionEffectTag.EXECUTION},
help =
"The maximum number of attempts to retry if the build encountered remote cache eviction error.")
public int remoteRetryOnCacheEviction;

/** The option converter to check that the user can only specify legal profiler tasks. */
public static class ProfilerTaskConverter extends EnumConverter<ProfilerTask> {
public ProfilerTaskConverter() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -474,9 +474,7 @@ public void remoteCacheEvictBlobs_whenPrefetchingInput_exitWithCode39() throws E
// Assert: Exit code is 39
assertThat(error)
.hasMessageThat()
.contains(
"Build without the Bytes does not work if your remote cache evicts blobs"
+ " during builds");
.contains("Failed to fetch blobs because they do not exist remotely");
assertThat(error).hasMessageThat().contains(String.format("%s/%s", hashCode, bytes.length));
assertThat(error.getDetailedExitCode().getExitCode().getNumericExitCode()).isEqualTo(39);
}
Expand Down
58 changes: 58 additions & 0 deletions src/test/shell/bazel/remote/build_without_the_bytes_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1685,4 +1685,62 @@ end_of_record"
expect_log "$expected_result"
}

function test_remote_cache_eviction_when_prefetching_input() {
mkdir -p a

cat > a/BUILD <<'EOF'
genrule(
name = 'foo',
srcs = ['foo.in'],
outs = ['foo.out'],
cmd = 'cat $(SRCS) > $@',
)
genrule(
name = 'bar',
srcs = ['foo.out', 'bar.in'],
outs = ['bar.out'],
cmd = 'cat $(SRCS) > $@',
tags = ['no-remote-exec'],
)
EOF

echo foo > a/foo.in
echo bar > a/bar.in

# Populate remote cache
bazel build \
--remote_executor=grpc://localhost:${worker_port} \
--remote_download_minimal \
//a:bar >& $TEST_log || fail "Failed to build"

bazel clean

# Clean build, foo.out isn't downloaded
bazel build \
--remote_executor=grpc://localhost:${worker_port} \
--remote_download_minimal \
//a:bar >& $TEST_log || fail "Failed to build"

(! [[ -f bazel-bin/a/foo.out ]]) \
|| fail "Expected intermediate output bazel-bin/a/foo.out to not be downloaded"

# Evict blobs from remote cache
stop_worker
start_worker

echo "updated bar" > a/bar.in

# Incremental build triggers remote cache eviction error but Bazel
# automatically retries the build and reruns the generating actions for
# missing blobs
bazel build \
--remote_executor=grpc://localhost:${worker_port} \
--remote_download_minimal \
--experimental_remote_cache_eviction_retries=5 \
//a:bar >& $TEST_log || fail "Failed to build"

expect_log "Found remote cache eviction error, retrying the build..."
}

run_suite "Build without the Bytes tests"

0 comments on commit f997f5b

Please sign in to comment.