Skip to content

Commit

Permalink
adhoc/shell: allow cache scope to be configured for process executions (
Browse files Browse the repository at this point in the history
#21245)

Expose a subset of the `ProcessCacheScope` capability to users of the
`adhoc_tool` / `shell_command` target types. The new `cache_scope` field
on both target types configures the following cache scope behaviors:

- `from_environment`: Use the default cache scope for the applicable
environment in which the process will execute. (`success` generally
except `session` for `experimental_workspace_environment`). This is the
default.

- `success`: Cache successful executions of the process.

- `success_per_pantsd_restart`: Cache successful executions of the
process for the life of the applicable pantsd process.

- `session`: Only cache the result for a single Pants session. This will
usually be a single invocation of the pants tool.

The intent is to support users who wish to reexecute certain processes
more frequently (probably because of non-idempotent and/or non-pure
semantics of their process).
  • Loading branch information
tdyas authored Sep 15, 2024
1 parent 01137e5 commit 9dc2fb1
Show file tree
Hide file tree
Showing 6 changed files with 92 additions and 0 deletions.
2 changes: 2 additions & 0 deletions docs/notes/2.24.x.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ The `pants.backend.url_handlers.s3` backend now correctly passes along query par

Pants will now warn if any errors are encountered while fingerprinting candidate binaries for the `system_binary` target type. The warnings may be disabled by setting the new `log_fingerprinting_errors` field on `system_binary` to `False`.

#### Shell

Added a new `cache_scope` field to `adhoc_tool` and `shell_command` targets to allow configuration of the "cache scope" of the invoked process. The cache scope determines how long Pants will cache the result of the invoked process absent any other invalidation of the result via source or dependency changes.

### Plugin API changes

Expand Down
4 changes: 4 additions & 0 deletions src/python/pants/backend/adhoc/adhoc_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from pants.backend.adhoc.target_types import (
AdhocToolArgumentsField,
AdhocToolCacheScopeField,
AdhocToolExecutionDependenciesField,
AdhocToolExtraEnvVarsField,
AdhocToolLogOutputField,
Expand Down Expand Up @@ -87,6 +88,9 @@ async def run_in_sandbox_request(
output_directories = target.get(AdhocToolOutputDirectoriesField).value or ()

cache_scope = environment_target.default_cache_scope
maybe_override_cache_scope = target.get(AdhocToolCacheScopeField).enum_value
if maybe_override_cache_scope is not None:
cache_scope = maybe_override_cache_scope

workspace_invalidation_globs: PathGlobs | None = None
workspace_invalidation_sources = (
Expand Down
33 changes: 33 additions & 0 deletions src/python/pants/backend/adhoc/adhoc_tool_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,39 @@ def run(target_name: str) -> str:
assert path_off == expected_path


def test_adhoc_tool_cache_scope_session(rule_runner: PythonRuleRunner) -> None:
rule_runner.write_files(
{
"src/BUILD": dedent(
"""\
system_binary(name="bash", binary_name="bash")
adhoc_tool(
name="cmd",
runnable=":bash",
# Use a random value so we can detect when re-execution occurs.
args=["-c", "echo $RANDOM > out.log"],
output_files=["out.log"],
cache_scope="session",
)
"""
),
"src/a-file": "",
}
)
address = Address("src", target_name="cmd")

# Re-executing the initial execution should be cached if in the same session.
result1 = execute_adhoc_tool(rule_runner, address)
result2 = execute_adhoc_tool(rule_runner, address)
assert result1.snapshot == result2.snapshot

# In a new session, the process should be re-executed.
rule_runner.new_session("second-session")
rule_runner.set_options([])
result3 = execute_adhoc_tool(rule_runner, address)
assert result2.snapshot != result3.snapshot


def test_adhoc_tool_check_outputs(rule_runner: PythonRuleRunner) -> None:
rule_runner.write_files(
{
Expand Down
43 changes: 43 additions & 0 deletions src/python/pants/backend/adhoc/target_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from pants.core.util_rules.adhoc_process_support import PathEnvModifyMode
from pants.core.util_rules.environments import EnvironmentField
from pants.engine.fs import GlobExpansionConjunction
from pants.engine.process import ProcessCacheScope
from pants.engine.target import (
COMMON_TARGET_FIELDS,
BoolField,
Expand All @@ -23,6 +24,7 @@
Target,
ValidNumbers,
)
from pants.util.docutil import bin_name
from pants.util.strutil import help_text


Expand Down Expand Up @@ -356,6 +358,46 @@ def enum_value(self) -> OutputsMatchMode:
return OutputsMatchMode(self.value)


class AdhocToolCacheScopeField(StringField):
alias = "cache_scope"
default = "from_environment"
help = help_text(
f"""
Set the "cache scope" of the executed process to provided value. The cache scope determines for how long
Pants will cache the result of the process execution (assuming no changes to files or dependencies
invalidate the result in the meantime).
The valid values are:
- `from_environment`: Use the default cache scope for the applicable environment in which the process will execute.
This is `success` for all environments except for `experimental_workspace_environment`, in which case `session`
cache scope will be used.
- `success`: Cache successful executions of the process.
- `success_per_pantsd_restart`: Cache successful executions of the process for the life of the
applicable pantsd process.
- `session`: Only cache the result for a single Pants session. This will usually be a single invocation of the
`{bin_name()}` tool.
"""
)
valid_choices = ("from_environment", "success", "success_per_pantsd_restart", "session")

@property
def enum_value(self) -> ProcessCacheScope | None:
value = self.value
if value == "success":
return ProcessCacheScope.SUCCESSFUL
elif value == "success_per_pantsd_restart":
return ProcessCacheScope.PER_RESTART_SUCCESSFUL
elif value == "session":
return ProcessCacheScope.PER_SESSION
else:
# Default case `from_environment`
return None


class AdhocToolTarget(Target):
alias: ClassVar[str] = "adhoc_tool"
core_fields = (
Expand All @@ -378,6 +420,7 @@ class AdhocToolTarget(Target):
AdhocToolWorkspaceInvalidationSourcesField,
AdhocToolPathEnvModifyModeField,
AdhocToolOutputsMatchMode,
AdhocToolCacheScopeField,
EnvironmentField,
)
help = help_text(
Expand Down
6 changes: 6 additions & 0 deletions src/python/pants/backend/shell/target_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from enum import Enum

from pants.backend.adhoc.target_types import (
AdhocToolCacheScopeField,
AdhocToolDependenciesField,
AdhocToolExecutionDependenciesField,
AdhocToolExtraEnvVarsField,
Expand Down Expand Up @@ -394,6 +395,10 @@ class ShellCommandOutputsMatchMode(AdhocToolOutputsMatchMode):
pass


class ShellCommandCacheScopeField(AdhocToolCacheScopeField):
pass


class SkipShellCommandTestsField(BoolField):
alias = "skip_tests"
default = False
Expand Down Expand Up @@ -421,6 +426,7 @@ class ShellCommandTarget(Target):
ShellCommandWorkspaceInvalidationSourcesField,
ShellCommandPathEnvModifyModeField,
ShellCommandOutputsMatchMode,
ShellCommandCacheScopeField,
EnvironmentField,
)
help = help_text(
Expand Down
4 changes: 4 additions & 0 deletions src/python/pants/backend/shell/util_rules/shell_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from pants.backend.shell.subsystems.shell_setup import ShellSetup
from pants.backend.shell.target_types import (
RunShellCommandWorkdirField,
ShellCommandCacheScopeField,
ShellCommandCommandField,
ShellCommandExecutionDependenciesField,
ShellCommandExtraEnvVarsField,
Expand Down Expand Up @@ -159,6 +160,9 @@ async def _prepare_process_request_from_target(
}

cache_scope = env_target.default_cache_scope
maybe_override_cache_scope = shell_command.get(ShellCommandCacheScopeField).enum_value
if maybe_override_cache_scope is not None:
cache_scope = maybe_override_cache_scope

workspace_invalidation_globs: PathGlobs | None = None
workspace_invalidation_sources = (
Expand Down

0 comments on commit 9dc2fb1

Please sign in to comment.