From cb1bdfbbbf0a9338b0904c845f105878b6f6aa06 Mon Sep 17 00:00:00 2001 From: Daniel Goldman Date: Tue, 31 Dec 2024 23:39:26 -0500 Subject: [PATCH] extra_env_vars support fnmatch globs (#21781) It first checks for exact matches, then tries an fnmatch. closes #20291 --- docs/docs/jvm/java-and-scala.mdx | 4 +-- docs/docs/jvm/kotlin.mdx | 4 +-- docs/docs/python/goals/test.mdx | 4 +-- docs/docs/shell/index.mdx | 4 +-- docs/notes/2.25.x.md | 3 +- .../pants/backend/adhoc/target_types.py | 6 ++-- .../pants/backend/helm/subsystems/helm.py | 5 +++- .../pants/backend/javascript/package_json.py | 6 ++-- .../backend/python/providers/pyenv/rules.py | 7 ++--- src/python/pants/backend/terraform/tool.py | 6 ++-- src/python/pants/core/goals/test.py | 17 +++++------ src/python/pants/engine/env_vars.py | 29 +++++++++++++++++-- src/python/pants/engine/environment_test.py | 29 +++++++++++++++++++ 13 files changed, 91 insertions(+), 33 deletions(-) diff --git a/docs/docs/jvm/java-and-scala.mdx b/docs/docs/jvm/java-and-scala.mdx index 613e3911fef..40dda18069e 100644 --- a/docs/docs/jvm/java-and-scala.mdx +++ b/docs/docs/jvm/java-and-scala.mdx @@ -330,11 +330,11 @@ Test runs are _hermetic_, meaning that they are stripped of the parent `pants` p To add any arbitrary environment variable back to the process, you can either add the environment variable to the specific tests with the `extra_env_vars` field on `junit_test` / `junit_tests` / `scala_junit_test` / `scala_junit_tests` / `scalatest_test` / `scalatest_tests` targets or to all your tests with the `[test].extra_env_vars` option. Generally, prefer the field `extra_env_vars` field so that more of your tests are hermetic. -With both `[test].extra_env_vars` and the `extra_env_vars` field, you can either hardcode a value or leave off a value to "allowlist" it and read from the parent `pants` process's environment. +With both `[test].extra_env_vars` and the `extra_env_vars` field, you can specify a value with the form `"VAR1=hardcoded_value"` or read it from the parent `pants` process's environment with the form `VAR2`. `fnmatch` globs like `"VAR_PREFIXED_*"` can be used to read multiple environment variables. ```toml tab={"label":"pants.toml"} [test] -extra_env_vars = ["VAR1", "VAR2=hardcoded_value"] +extra_env_vars = ["VAR1=hardcoded_value", "VAR2", "VAR_PREFIXED_*"] ``` ```python tab={"label":"project/BUILD"} diff --git a/docs/docs/jvm/kotlin.mdx b/docs/docs/jvm/kotlin.mdx index 8363b1fd4ea..7b028ea1292 100644 --- a/docs/docs/jvm/kotlin.mdx +++ b/docs/docs/jvm/kotlin.mdx @@ -205,11 +205,11 @@ Test runs are _hermetic_, meaning that they are stripped of the parent `pants` p To add any arbitrary environment variable back to the process, you can either add the environment variable to the specific tests with the `extra_env_vars` field on `kotlin_junit_test` / `kotlin_junit_tests` targets or to all your tests with the `[test].extra_env_vars` option. Generally, prefer the field `extra_env_vars` field so that more of your tests are hermetic. -With both `[test].extra_env_vars` and the `extra_env_vars` field, you can either hardcode a value or leave off a value to "allowlist" it and read from the parent `pants` process's environment. +With both `[test].extra_env_vars` and the `extra_env_vars` field, you can specify a value with the form `"VAR1=hardcoded_value"` or read it from the parent `pants` process's environment with the form `VAR2`. `fnmatch` globs like `"VAR_PREFIXED_*"` can be used to read multiple environment variables. ```toml tab={"label":"pants.toml"} [test] -extra_env_vars = ["VAR1", "VAR2=hardcoded_value"] +extra_env_vars = ["VAR1=hardcoded_value", "VAR2", "VARS_PREFIXED_*"] ``` ```python tab={"label":"project/BUILD"} diff --git a/docs/docs/python/goals/test.mdx b/docs/docs/python/goals/test.mdx index 0a15dd65b29..6d01a2e3dbd 100644 --- a/docs/docs/python/goals/test.mdx +++ b/docs/docs/python/goals/test.mdx @@ -156,11 +156,11 @@ Test runs are _hermetic_, meaning that they are stripped of the parent `pants` p To add any arbitrary environment variable back to the process, you can either add the environment variable to the specific tests with the `extra_env_vars` field on `python_test` / `python_tests` targets or to all your tests with the `[test].extra_env_vars` option. Generally, prefer the field `extra_env_vars` field so that more of your tests are hermetic. -With both `[test].extra_env_vars` and the `extra_env_vars` field, you can either hardcode a value or leave off a value to "allowlist" it and read from the parent `pants` process's environment. +With both `[test].extra_env_vars` and the `extra_env_vars` field, you can specify a value with the form `"VAR1=hardcoded_value"` or read it from the parent `pants` process's environment with the form `VAR2`. `fnmatch` globs like `"VAR_PREFIXED_*"` can be used to read multiple environment variables. ```toml tab={"label":"pants.toml"} [test] -extra_env_vars = ["VAR1", "VAR2=hardcoded_value"] +extra_env_vars = ["VAR1=hardcoded_value", "VAR2", "VAR_PREFIXED_*"] ``` ```python tab={"label":"project/BUILD"} diff --git a/docs/docs/shell/index.mdx b/docs/docs/shell/index.mdx index c059e7bdb74..d9362a3312b 100644 --- a/docs/docs/shell/index.mdx +++ b/docs/docs/shell/index.mdx @@ -302,11 +302,11 @@ To force your tests to run again, rather than reading from the cache, run `pants Test runs are _hermetic_, meaning that they are stripped of the parent `pants` process's environment variables. This is important for reproducibility, and it also increases cache hits. -To add any arbitrary environment variable back to the process, use the option `extra_env_vars` in the `[test]` options scope. You can hardcode a value for the option, or leave off a value to "allowlist" it and read from the parent `pants` process's environment. +To add any arbitrary environment variable back to the process, use the option `extra_env_vars` in the `[test]` options scope. You can specify a value with the form `"VAR1=hardcoded_value"` or read it from the parent `pants` process's environment with the form `VAR2`. `fnmatch` globs like `"VAR_PREFIXED_*"` can be used to read multiple environment variables. ```toml title="pants.toml" [test] -extra_env_vars = ["VAR1", "VAR2=hardcoded_value"] +extra_env_vars = ["VAR1=hardcoded_value", "VAR2", "VAR_PREFIXED_*"] ``` Use `[bash-setup].executable_search_paths` to change the `$PATH` env var used during test runs. You can use the special string `""` to read the value from the parent `pants` process's environment. diff --git a/docs/notes/2.25.x.md b/docs/notes/2.25.x.md index 73b25b23ace..f893dcda712 100644 --- a/docs/notes/2.25.x.md +++ b/docs/notes/2.25.x.md @@ -21,6 +21,7 @@ Thank you to [Klayvio](https://www.klaviyo.com/) and [Normal Computing](https:// - Fixed a longstanding bug in the processing of [synthetic targets](https://www.pantsbuild.org/2.24/docs/writing-plugins/the-target-api/concepts#synthetic-targets-api). This fix has the side-effect of requiring immutability and hashability of scalar values in BUILD files, which was always assumed but not enforced. This may cause BUILD file parsing errors, if you have custom field types involving custom mutable data structures. See ([#21725](https://github.com/pantsbuild/pants/pull/21725)) for more. - [Fixed](https://github.com/pantsbuild/pants/pull/21665) bug where `pants --export-resolve= --export-py-generated-sources-in-resolve=` fails (see [#21659](https://github.com/pantsbuild/pants/issues/21659) for more info). - [Fixed](https://github.com/pantsbuild/pants/pull/21694) bug where an `archive` target is unable to produce a ZIP file with no extension (see [#21693](https://github.com/pantsbuild/pants/issues/21693) for more info). +- `[subprocess-environment].env_vars` and `extra_env_vars` (on many subsystems and targets) now supports a generalised glob syntax using Python [fnmatch](https://docs.python.org/3/library/fnmatch.html) to construct patterns like `AWS_*`, `TF_*`, and `S2TESTS_*`. #### Remote Caching/Execution @@ -58,7 +59,7 @@ Fixed an error which was caused when the same tool appeaed in both the `--docker Strict adherence to the [schema of Helm OCI registry configuration](https://www.pantsbuild.org/2.25/reference/subsystems/helm#registries) is now required. Previously we did ad-hoc coercion of some field values, so that, e.g., you could provide a "true"/"false" string as a boolean value. Now we require actual booleans. -The `helm_infer.external_docker_images` glob syntax has been generalized. In addition to `*`, you can now use Python [fnmatch](https://docs.python.org/3/library/fnmatch.html) to construct matterns like `quay.io/*`. +The `helm_infer.external_docker_images` glob syntax has been generalized. In addition to `*`, you can now use Python [fnmatch](https://docs.python.org/3/library/fnmatch.html) to construct patterns like `quay.io/*`. Fixed a bug where linting with the Helm backend enabled could induce serialization errors with the [workunit-logger](https://www.pantsbuild.org/2.25/reference/subsystems/workunit-logger). diff --git a/src/python/pants/backend/adhoc/target_types.py b/src/python/pants/backend/adhoc/target_types.py index 6d1c41ba11f..f5b7a8e4f65 100644 --- a/src/python/pants/backend/adhoc/target_types.py +++ b/src/python/pants/backend/adhoc/target_types.py @@ -9,6 +9,7 @@ from pants.base.glob_match_error_behavior import GlobMatchErrorBehavior from pants.core.util_rules.adhoc_process_support import PathEnvModifyMode from pants.core.util_rules.environments import EnvironmentField +from pants.engine.env_vars import EXTRA_ENV_VARS_USAGE_HELP from pants.engine.fs import GlobExpansionConjunction from pants.engine.process import ProcessCacheScope from pants.engine.target import ( @@ -191,11 +192,10 @@ class AdhocToolTimeoutField(IntField): class AdhocToolExtraEnvVarsField(StringSequenceField): alias: ClassVar[str] = "extra_env_vars" help = help_text( - """ + f""" Additional environment variables to provide to the process. - Entries are strings in the form `ENV_VAR=value` to use explicitly; or just - `ENV_VAR` to copy the value of a variable in Pants's own environment. + {EXTRA_ENV_VARS_USAGE_HELP} """ ) diff --git a/src/python/pants/backend/helm/subsystems/helm.py b/src/python/pants/backend/helm/subsystems/helm.py index e366e5f17ba..68530b00e1b 100644 --- a/src/python/pants/backend/helm/subsystems/helm.py +++ b/src/python/pants/backend/helm/subsystems/helm.py @@ -9,6 +9,7 @@ from pants.backend.helm.resolve.remotes import HelmRemotes from pants.backend.helm.target_types import HelmChartTarget, HelmRegistriesField from pants.core.util_rules.external_tool import TemplatedExternalTool +from pants.engine.env_vars import EXTRA_ENV_VARS_USAGE_HELP from pants.engine.platform import Platform from pants.option.option_types import ( ArgsListOption, @@ -153,9 +154,11 @@ class HelmSubsystem(TemplatedExternalTool): ) extra_env_vars = StrListOption( help=softwrap( - """ + f""" Additional environment variables that would be made available to all Helm processes or during value interpolation. + + {EXTRA_ENV_VARS_USAGE_HELP} """ ), advanced=True, diff --git a/src/python/pants/backend/javascript/package_json.py b/src/python/pants/backend/javascript/package_json.py index 191b6693204..8f561f377bd 100644 --- a/src/python/pants/backend/javascript/package_json.py +++ b/src/python/pants/backend/javascript/package_json.py @@ -25,6 +25,7 @@ from pants.core.util_rules import stripped_source_files from pants.engine import fs from pants.engine.collection import Collection, DeduplicatedCollection +from pants.engine.env_vars import EXTRA_ENV_VARS_USAGE_HELP from pants.engine.fs import ( CreateDigest, DigestContents, @@ -416,11 +417,10 @@ class NodeBuildScriptExtraEnvVarsField(StringSequenceField): required = False default = () help = help_text( - """ + f""" Additional environment variables to include in environment when running a build script process. - Entries are strings in the form `ENV_VAR=value` to use explicitly; or just - `ENV_VAR` to copy the value of a variable in Pants's own environment. + {EXTRA_ENV_VARS_USAGE_HELP} """ ) diff --git a/src/python/pants/backend/python/providers/pyenv/rules.py b/src/python/pants/backend/python/providers/pyenv/rules.py index 535c9281df0..14688f77c64 100644 --- a/src/python/pants/backend/python/providers/pyenv/rules.py +++ b/src/python/pants/backend/python/providers/pyenv/rules.py @@ -16,7 +16,7 @@ TemplatedExternalTool, ) from pants.core.util_rules.external_tool import rules as external_tools_rules -from pants.engine.env_vars import EnvironmentVars, EnvironmentVarsRequest +from pants.engine.env_vars import EXTRA_ENV_VARS_USAGE_HELP, EnvironmentVars, EnvironmentVarsRequest from pants.engine.fs import CreateDigest, FileContent from pants.engine.internals.native_engine import Digest, MergeDigests from pants.engine.internals.selectors import Get, MultiGet @@ -67,11 +67,10 @@ class PyenvPythonProviderSubsystem(TemplatedExternalTool): class EnvironmentAware: installation_extra_env_vars = StrListOption( help=softwrap( - """ + f""" Additional environment variables to include when running `pyenv install`. - Entries are strings in the form `ENV_VAR=value` to use explicitly; or just - `ENV_VAR` to copy the value of a variable in Pants's own environment. + {EXTRA_ENV_VARS_USAGE_HELP} This is especially useful if you want to use an optimized Python (E.g. setting `PYTHON_CONFIGURE_OPTS='--enable-optimizations --with-lto'` and diff --git a/src/python/pants/backend/terraform/tool.py b/src/python/pants/backend/terraform/tool.py index e312ce05dcd..d2ae8bf20f5 100644 --- a/src/python/pants/backend/terraform/tool.py +++ b/src/python/pants/backend/terraform/tool.py @@ -29,7 +29,7 @@ ExternalToolRequest, TemplatedExternalTool, ) -from pants.engine.env_vars import EnvironmentVars, EnvironmentVarsRequest +from pants.engine.env_vars import EXTRA_ENV_VARS_USAGE_HELP, EnvironmentVars, EnvironmentVarsRequest from pants.engine.fs import EMPTY_DIGEST, Digest from pants.engine.internals.selectors import Get from pants.engine.platform import Platform @@ -359,8 +359,10 @@ def default_known_versions(cls): extra_env_vars = StrListOption( help=softwrap( - """ + f""" Additional environment variables that would be made available to all Terraform processes. + + {EXTRA_ENV_VARS_USAGE_HELP} """ ), advanced=True, diff --git a/src/python/pants/core/goals/test.py b/src/python/pants/core/goals/test.py index d8ed1848951..592d3782bdf 100644 --- a/src/python/pants/core/goals/test.py +++ b/src/python/pants/core/goals/test.py @@ -36,7 +36,7 @@ from pants.engine.console import Console from pants.engine.desktop import OpenFiles, OpenFilesRequest from pants.engine.engine_aware import EngineAwareReturnType -from pants.engine.env_vars import EnvironmentVars, EnvironmentVarsRequest +from pants.engine.env_vars import EXTRA_ENV_VARS_USAGE_HELP, EnvironmentVars, EnvironmentVarsRequest from pants.engine.fs import EMPTY_FILE_DIGEST, Digest, FileDigest, MergeDigests, Snapshot, Workspace from pants.engine.goal import Goal, GoalSubsystem from pants.engine.internals.session import RunId @@ -517,10 +517,10 @@ def activated(cls, union_membership: UnionMembership) -> bool: class EnvironmentAware: extra_env_vars = StrListOption( help=softwrap( - """ + f""" Additional environment variables to include in test processes. - Entries are strings in the form `ENV_VAR=value` to use explicitly; or just - `ENV_VAR` to copy the value of a variable in Pants's own environment. + + {EXTRA_ENV_VARS_USAGE_HELP} """ ), ) @@ -745,13 +745,12 @@ def calculate_from_global_options(self, test: TestSubsystem) -> Optional[int]: class TestExtraEnvVarsField(StringSequenceField, metaclass=ABCMeta): alias = "extra_env_vars" help = help_text( - """ - Additional environment variables to include in test processes. + f""" + Additional environment variables to include in test processes. - Entries are strings in the form `ENV_VAR=value` to use explicitly; or just - `ENV_VAR` to copy the value of a variable in Pants's own environment. + {EXTRA_ENV_VARS_USAGE_HELP} - This will be merged with and override values from `[test].extra_env_vars`. + This will be merged with and override values from `[test].extra_env_vars`. """ ) diff --git a/src/python/pants/engine/env_vars.py b/src/python/pants/engine/env_vars.py index 58a54e783bb..ea80235b9be 100644 --- a/src/python/pants/engine/env_vars.py +++ b/src/python/pants/engine/env_vars.py @@ -3,9 +3,10 @@ from __future__ import annotations +import fnmatch import re from dataclasses import dataclass -from typing import Dict, Optional, Sequence +from typing import Dict, Iterator, Optional, Sequence from pants.util.frozendict import FrozenDict from pants.util.ordered_set import FrozenOrderedSet @@ -13,6 +14,12 @@ name_value_re = re.compile(r"([A-Za-z_]\w*)=(.*)") shorthand_re = re.compile(r"([A-Za-z_]\w*)") +EXTRA_ENV_VARS_USAGE_HELP = """\ +Entries are strings in the form `ENV_VAR=value` to use explicitly; or just +`ENV_VAR` to copy the value of a variable in Pants's own environment. +`fnmatch` globs like `ENV_VAR_PREFIXED_*` can be used to copy multiple environment variables. +""" + class CompleteEnvironmentVars(FrozenDict): """CompleteEnvironmentVars contains all environment variables from the current Pants process. @@ -53,7 +60,8 @@ def check_and_set(name: str, value: Optional[str]): if name_value_match: check_and_set(name_value_match[1], name_value_match[2]) elif shorthand_re.match(env_var): - check_and_set(env_var, self.get(env_var)) + for name, value in self.get_or_match(env_var): + check_and_set(name, value) else: raise ValueError( f"An invalid variable was requested via the --test-extra-env-var " @@ -62,6 +70,23 @@ def check_and_set(name: str, value: Optional[str]): return FrozenDict(env_var_subset) + def get_or_match(self, name_or_pattern: str) -> Iterator[tuple[str, str]]: + """Get the value of an envvar if it has an exact match, otherwise all fnmatches. + + Although fnmatch could also handle direct matches, it is significantly slower (roughly 2000 + times). + """ + if value := self.get(name_or_pattern): + yield name_or_pattern, value + return # do not check fnmatches if we have an exact match + + # fnmatch.filter looks tempting, + # but we'd need to iterate once for the filtering the keys and again for getting the values + for k, v in self.items(): + # we use fnmatchcase to avoid normalising the case with `os.path.normcase` on Windows systems + if fnmatch.fnmatchcase(k, name_or_pattern): + yield k, v + @dataclass(frozen=True) class EnvironmentVarsRequest: diff --git a/src/python/pants/engine/environment_test.py b/src/python/pants/engine/environment_test.py index 8ffd3e67769..7568a058a2d 100644 --- a/src/python/pants/engine/environment_test.py +++ b/src/python/pants/engine/environment_test.py @@ -37,3 +37,32 @@ def test_invalid_variable() -> None: "An invalid variable was requested via the --test-extra-env-var mechanism: 3INVALID" in str(exc) ) + + +def test_envvar_fnmatch() -> None: + """Test fnmatch patterns correctly pull in all matching envvars.""" + + pants_env = CompleteEnvironmentVars( + { + "LETTER_C": "prefix_char_match", + "LETTER_PI": "prefix", + "LETTER_P*": "exact_match_with_glob", + "letter_lower": "case-sensitive", + } + ) + + char_match = pants_env.get_subset(["LETTER_?"]) + assert char_match == {"LETTER_C": "prefix_char_match"} + + multichar_match = pants_env.get_subset(["LETTER_*"]) + assert multichar_match == { + "LETTER_C": "prefix_char_match", + "LETTER_PI": "prefix", + "LETTER_P*": "exact_match_with_glob", + } + + exact_match_with_glob = pants_env.get_subset(["LETTER_P*"]) + assert exact_match_with_glob == {"LETTER_P*": "exact_match_with_glob"} + + case_sensitive = pants_env.get_subset(["LETTER_LOWER"]) + assert case_sensitive == {}