From c506a8f30f67ff2823bbcb862821aa9f712746a8 Mon Sep 17 00:00:00 2001 From: Fabian Meumertzheim Date: Wed, 21 Aug 2024 00:44:04 -0700 Subject: [PATCH] No longer eagerly fetch labels in repo rule attributes Eager fetching of all labels listed in repo rule attributes was introduced as a performance optimization to avoid costly restarts. Now that restarts are gone by default, this is no longer a benefit as it can cause unnecessary fetches and also create cycles where there wouldn't be any without this behavior (e.g. when two repos write each others labels into a file without resolving them). Work towards #19055 Closes #23371. PiperOrigin-RevId: 665744319 Change-Id: Ia27f207793a2da3fb8e37743b328483f9d45192c --- .../starlark/StarlarkRepositoryContext.java | 63 ---- .../starlark/StarlarkRepositoryFunction.java | 9 - src/test/shell/bazel/BUILD | 7 - .../shell/bazel/external_integration_test.sh | 4 +- .../shell/bazel/starlark_prefetching_test.sh | 281 ------------------ 5 files changed, 2 insertions(+), 362 deletions(-) delete mode 100755 src/test/shell/bazel/starlark_prefetching_test.sh diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryContext.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryContext.java index 028f5e2ae34d56..4c5ed3d3a6f040 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryContext.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryContext.java @@ -58,7 +58,6 @@ import net.starlark.java.annot.StarlarkMethod; import net.starlark.java.eval.Dict; import net.starlark.java.eval.EvalException; -import net.starlark.java.eval.Sequence; import net.starlark.java.eval.Starlark; import net.starlark.java.eval.StarlarkInt; import net.starlark.java.eval.StarlarkSemantics; @@ -516,66 +515,4 @@ public void watchTree(Object path) public String toString() { return "repository_ctx[" + rule.getLabel() + "]"; } - - /** - * Try to compute the paths of all attributes that are labels, including labels in list and dict - * arguments. - * - *

The value is ignored, but any missing information from the environment is detected (and an - * exception thrown). In this way, we can enforce that all arguments are evaluated before we start - * potentially more expensive operations. - */ - // TODO(wyv): somehow migrate this to the base context too. - public void enforceLabelAttributes() - throws EvalException, InterruptedException, NeedsSkyframeRestartException { - // TODO: If a labels fails to resolve to an existing regular file, we do not add a dependency on - // that fact - if the file is created later or changes its type, it will not trigger a rerun of - // the repository function. - StructImpl attr = getAttr(); - // Batch restarts as they are expensive - boolean needsRestart = false; - for (String name : attr.getFieldNames()) { - Object value = attr.getValue(name); - if (value instanceof Label) { - if (dependOnLabelIgnoringErrors((Label) value)) { - needsRestart = true; - } - } - if (value instanceof Sequence) { - for (Object entry : (Sequence) value) { - if (entry instanceof Label) { - if (dependOnLabelIgnoringErrors((Label) entry)) { - needsRestart = true; - } - } - } - } - if (value instanceof Dict) { - for (Object entry : ((Dict) value).keySet()) { - if (entry instanceof Label) { - if (dependOnLabelIgnoringErrors((Label) entry)) { - needsRestart = true; - } - } - } - } - } - - if (needsRestart) { - throw new NeedsSkyframeRestartException(); - } - } - - private boolean dependOnLabelIgnoringErrors(Label label) throws InterruptedException { - try { - getPathFromLabel(label); - } catch (NeedsSkyframeRestartException e) { - return true; - } catch (EvalException e) { - // EvalExceptions indicate labels not referring to existing files. This is fine, - // as long as they are never resolved to files in the execution of the rule; we allow - // non-strict rules. - } - return false; - } } diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryFunction.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryFunction.java index b546dabc2f79ab..0f71e8a9fef4e4 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryFunction.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryFunction.java @@ -302,15 +302,6 @@ private RepositoryDirectoryValue.Builder fetchInternal( PrecomputedValue.REMOTE_EXECUTION_ENABLED.get(env); } - // Since restarting a repository function can be really expensive, we first ensure that - // all label-arguments can be resolved to paths. - try { - starlarkRepositoryContext.enforceLabelAttributes(); - } catch (NeedsSkyframeRestartException e) { - // Missing values are expected; just restart before we actually start the rule - return null; - } - // This rule is mainly executed for its side effect. Nevertheless, the return value is // of importance, as it provides information on how the call has to be modified to be a // reproducible rule. diff --git a/src/test/shell/bazel/BUILD b/src/test/shell/bazel/BUILD index 22025b222c56db..fdfbc8cbfe2a97 100644 --- a/src/test/shell/bazel/BUILD +++ b/src/test/shell/bazel/BUILD @@ -844,13 +844,6 @@ sh_test( tags = ["no_windows"], ) -sh_test( - name = "starlark_prefetching_test", - srcs = ["starlark_prefetching_test.sh"], - data = [":test-deps"], - tags = ["no_windows"], -) - sh_test( name = "external_starlark_load_test", size = "large", diff --git a/src/test/shell/bazel/external_integration_test.sh b/src/test/shell/bazel/external_integration_test.sh index e3682c19c0c562..4efa706a937fc2 100755 --- a/src/test/shell/bazel/external_integration_test.sh +++ b/src/test/shell/bazel/external_integration_test.sh @@ -2665,7 +2665,7 @@ EOF mkdir main cd main - cat > foo.bzl <<'EOF' + cat > foo.bzl < bar.bzl <<'EOF' + cat > bar.bzl <&2; exit 1; } - -test_label_arg() { - # Verify that a repository rule does not get restarted, if accessing - # one of its label arguments as file. - WRKDIR=`pwd` - rm -rf repo - rm -rf log - mkdir repo - cd repo - touch BUILD - cat > rule.bzl <> ${WRKDIR}/log"]) - ctx.file("WORKSPACE", "workspace(name = \"%s\")\n" % ctx.name) - ctx.symlink(ctx.attr.build_file, "BUILD") - -myrule=repository_rule(implementation=_impl, - attrs={ - "build_file" : attr.label(), - }) -EOF - cat > ext.BUILD <<'EOF' -genrule( - name="foo", - outs=["foo.txt"], - cmd = "echo foo > $@", -) -EOF - cat > WORKSPACE <<'EOF' -load("//:rule.bzl", "myrule") -myrule(name="ext", build_file="//:ext.BUILD") -EOF - write_default_lockfile "MODULE.bazel.lock" - bazel build @ext//:foo || fail "expected success" - [ `cat "${WRKDIR}/log" | wc -l` -eq 1 ] \ - || fail "did not find precisely one invocation of the action" -} - -test_unused_invalid_label_arg() { - # Verify that we preserve the behavior of allowing to pass labels that - # do referring to an non-existing path, if they are never used. - WRKDIR=`pwd` - rm -rf repo - mkdir repo - cd repo - touch BUILD - cat > rule.bzl <<'EOF' -def _impl(ctx): - ctx.file("WORKSPACE", "workspace(name = \"%s\")\n" % ctx.name) - ctx.file("BUILD", - "genrule(name=\"foo\", outs=[\"foo.txt\"], cmd = \"echo foo > $@\")") - -myrule=repository_rule(implementation=_impl, - attrs={ - "unused" : attr.label(), - }) -EOF - cat > WORKSPACE <<'EOF' -load("//:rule.bzl", "myrule") -myrule(name="ext", unused="//does/not/exist:file") -EOF - write_default_lockfile "MODULE.bazel.lock" - bazel build @ext//:foo || fail "expected success" -} - - -test_label_list_arg() { - # Verify that a repository rule does not get restarted, if accessing - # the entries of a label list as files. - WRKDIR=`pwd` - rm -rf repo - rm -rf log - mkdir repo - cd repo - touch BUILD - cat > rule.bzl <> ${WRKDIR}/log"]) - ctx.file("WORKSPACE", "workspace(name = \"%s\")\n" % ctx.name) - ctx.file("BUILD", """ -genrule( - name="foo", - srcs= ["src.txt"], - outs=["foo.txt"], - cmd = "cp \$< \$@", -) -""") - for f in ctx.attr.data: - ctx.execute(["/bin/sh", "-c", "cat %s >> src.txt" % ctx.path(f)]) - -myrule=repository_rule(implementation=_impl, - attrs={ - "data" : attr.label_list(), - }) -EOF - cat > WORKSPACE <<'EOF' -load("//:rule.bzl", "myrule") -myrule(name="ext", data = ["//:a.txt", "//:b.txt"]) -EOF - write_default_lockfile "MODULE.bazel.lock" - echo Hello > a.txt - echo World > b.txt - bazel build @ext//:foo || fail "expected success" - [ `cat "${WRKDIR}/log" | wc -l` -eq 1 ] \ - || fail "did not find precisely one invocation of the action" -} - -test_unused_invalid_label_list_arg() { - # Verify that we preserve the behavior of allowing to pass labels that - # do referring to an non-existing path, if they are never used. - # Here, test it if such labels are passed in a label list. - WRKDIR=`pwd` - rm -rf repo - mkdir repo - cd repo - touch BUILD - cat > rule.bzl <<'EOF' -def _impl(ctx): - ctx.file("WORKSPACE", "workspace(name = \"%s\")\n" % ctx.name) - ctx.file("BUILD", - "genrule(name=\"foo\", outs=[\"foo.txt\"], cmd = \"echo foo > $@\")") - -myrule=repository_rule(implementation=_impl, - attrs={ - "unused_list" : attr.label_list(), - }) -EOF - cat > WORKSPACE <<'EOF' -load("//:rule.bzl", "myrule") -myrule(name="ext", unused_list=["//does/not/exist:file1", - "//does/not/exists:file2"]) -EOF - write_default_lockfile "MODULE.bazel.lock" - bazel build @ext//:foo || fail "expected success" -} - -# Regression test for https://github.com/bazelbuild/bazel/issues/10515 -test_label_keyed_string_dict_arg() { - # Verify that Bazel preloads Labels from label_keyed_string_dict, and as a - # result, it runs the repository's implementation only once (i.e. it won't - # restart the corresponding SkyFunction). - WRKDIR=`pwd` - rm -rf repo - rm -rf log - mkdir repo - cd repo - touch BUILD - cat > rule.bzl <> ${WRKDIR}/log"]) - ctx.file("WORKSPACE", "workspace(name = \"%s\")\n" % ctx.name) - ctx.file("BUILD", """ -genrule( - name = "foo", - srcs = ["src.txt"], - outs = ["foo.txt"], - cmd = "cp \$< \$@", -) -""") - for f in ctx.attr.data: - # ctx.path(f) shouldn't trigger a restart since we've prefetched the value. - ctx.execute(["/bin/sh", "-c", "cat %s >> src.txt" % ctx.path(f)]) - -myrule = repository_rule( - implementation = _impl, - attrs = { - "data": attr.label_keyed_string_dict(), - }, -) -EOF - cat > WORKSPACE <<'EOF' -load("//:rule.bzl", "myrule") -myrule(name="ext", data = {"//:a.txt": "a", "//:b.txt": "b"}) -EOF - write_default_lockfile "MODULE.bazel.lock" - echo Hello > a.txt - echo World > b.txt - bazel build @ext//:foo || fail "expected success" - [ `cat "${WRKDIR}/log" | wc -l` -eq 1 ] \ - || fail "did not find precisely one invocation of the action" -} - -test_unused_invalid_label_keyed_string_dict_arg() { - # Verify that we preserve the behavior of allowing to pass labels that - # do referring to an non-existing path, if they are never used. - # Here, test it if such labels are passed in a label_keyed_string_dict. - WRKDIR=`pwd` - rm -rf repo - mkdir repo - cd repo - touch BUILD - cat > rule.bzl <<'EOF' -def _impl(ctx): - ctx.file("WORKSPACE", "workspace(name = \"%s\")\n" % ctx.name) - ctx.file("BUILD", - "genrule(name=\"foo\", outs=[\"foo.txt\"], cmd = \"echo foo > $@\")") - -myrule=repository_rule(implementation=_impl, - attrs={ - "unused_dict" : attr.label_keyed_string_dict(), - }) -EOF - cat > WORKSPACE <<'EOF' -load("//:rule.bzl", "myrule") -myrule(name="ext", unused_dict={"//does/not/exist:file1": "file1", - "//does/not/exists:file2": "file2"}) -EOF - write_default_lockfile "MODULE.bazel.lock" - bazel build @ext//:foo || fail "expected success" -} - -# Regression test for https://github.com/bazelbuild/bazel/issues/13441 -function test_files_tracked_with_non_existing_files() { - cat > rules.bzl <<'EOF' -def _repo_impl(ctx): - ctx.symlink(ctx.path(Label("@//:WORKSPACE")).dirname, "link") - print("b.txt: " + ctx.read("link/b.txt")) - print("c.txt: " + ctx.read("link/c.txt")) - - ctx.file("BUILD") - ctx.file("WORKSPACE") - -repo = repository_rule( - _repo_impl, - attrs = {"_files": attr.label_list( - default = [ - Label("@//:a.txt"), - Label("@//:b.txt"), - Label("@//:c.txt"), - ], - )}, -) -EOF - - cat > WORKSPACE <<'EOF' -load(":rules.bzl", "repo") -repo(name = "ext") -EOF - write_default_lockfile "MODULE.bazel.lock" - touch BUILD - - # a.txt is intentionally not created - echo "bbbb" > b.txt - echo "cccc" > c.txt - - # The missing file dependency is tolerated. - bazel build @ext//:all &> "$TEST_log" || fail "Expected repository rule to build" - expect_log "b.txt: bbbb" - expect_log "c.txt: cccc" - - echo "not_cccc" > c.txt - bazel build @ext//:all &> "$TEST_log" || fail "Expected repository rule to build" - expect_log "b.txt: bbbb" - expect_log "c.txt: not_cccc" -} - -run_suite "Starlark repo prefetching tests"