From f1f0e3cffbd454f4bb7b419cd66337c6c867a7d1 Mon Sep 17 00:00:00 2001 From: purajit <purajit.malalur@color.com> Date: Wed, 3 Jul 2024 13:17:51 -0700 Subject: [PATCH] Allow customizing semgrep configurations; correct rule matching glob --- .../pants/backend/tools/semgrep/rules.py | 48 +++++++------------ .../pants/backend/tools/semgrep/rules_test.py | 2 +- .../pants/backend/tools/semgrep/subsystem.py | 17 ++++++- 3 files changed, 34 insertions(+), 33 deletions(-) diff --git a/src/python/pants/backend/tools/semgrep/rules.py b/src/python/pants/backend/tools/semgrep/rules.py index e2e68cbe8bf0..d426b166757f 100644 --- a/src/python/pants/backend/tools/semgrep/rules.py +++ b/src/python/pants/backend/tools/semgrep/rules.py @@ -44,29 +44,12 @@ class SemgrepLintRequest(LintTargetsRequest): @dataclass(frozen=True) class PartitionMetadata: config_files: frozenset[PurePath] - ignore_files: Snapshot @property def description(self) -> str: return ", ".join(sorted(str(path) for path in self.config_files)) -_IGNORE_FILE_NAME = ".semgrepignore" - -_RULES_DIR_NAME = ".semgrep" -_RULES_FILES_GLOBS = ( - ".semgrep.yml", - ".semgrep.yaml", - f"{_RULES_DIR_NAME}/*.yml", - f"{_RULES_DIR_NAME}/*.yaml", -) - - -@dataclass -class SemgrepIgnoreFiles: - snapshot: Snapshot - - @dataclass class AllSemgrepConfigs: configs_by_dir: dict[PurePath, set[PurePath]] @@ -85,14 +68,14 @@ def ancestor_configs(self, address: Address) -> Iterable[PurePath]: yield from self.configs_by_dir.get(ancestor, []) -def _group_by_semgrep_dir(all_paths: Paths) -> AllSemgrepConfigs: +def _group_by_semgrep_dir(rules_dir_name: str, all_paths: Paths) -> AllSemgrepConfigs: configs_by_dir = defaultdict(set) for path_ in all_paths.files: path = PurePath(path_) # A rule like foo/bar/.semgrep/baz.yaml should behave like it's in in foo/bar, not # foo/bar/.semgrep parent = path.parent - config_directory = parent.parent if parent.name == _RULES_DIR_NAME else parent + config_directory = parent.parent if parent.name == rules_dir_name else parent configs_by_dir[config_directory].add(path) @@ -100,9 +83,17 @@ def _group_by_semgrep_dir(all_paths: Paths) -> AllSemgrepConfigs: @rule -async def find_all_semgrep_configs() -> AllSemgrepConfigs: - all_paths = await Get(Paths, PathGlobs([f"**/{file_glob}" for file_glob in _RULES_FILES_GLOBS])) - return _group_by_semgrep_dir(all_paths) +async def find_all_semgrep_configs(semgrep: SemgrepSubsystem) -> AllSemgrepConfigs: + rules_files_globs = ( + f"{semgrep.rules_dir_name}/*.yml", + f"{semgrep.rules_dir_name}/*.yaml", + # TODO: these don't seem to be mentioned in semgrep docs; should they be removed? + ".semgrep.yml", + ".semgrep.yaml", + ) + + all_paths = await Get(Paths, PathGlobs([f"**/{file_glob}" for file_glob in rules_files_globs])) + return _group_by_semgrep_dir(semgrep.rules_dir_name, all_paths) @dataclass(frozen=True) @@ -121,17 +112,10 @@ async def infer_relevant_semgrep_configs( return RelevantSemgrepConfigs(all_semgrep.ancestor_configs(request.field_set.address)) -@rule -async def all_semgrep_ignore_files() -> SemgrepIgnoreFiles: - snapshot = await Get(Snapshot, PathGlobs([f"**/{_IGNORE_FILE_NAME}"])) - return SemgrepIgnoreFiles(snapshot) - - @rule async def partition( request: SemgrepLintRequest.PartitionRequest[SemgrepFieldSet], semgrep: SemgrepSubsystem, - ignore_files: SemgrepIgnoreFiles, ) -> Partitions: if semgrep.skip: return Partitions() @@ -148,7 +132,7 @@ async def partition( by_config[configs].append(field_set) return Partitions( - Partition(tuple(field_sets), PartitionMetadata(configs, ignore_files.snapshot)) + Partition(tuple(field_sets), PartitionMetadata(configs)) for configs, field_sets in by_config.items() ) @@ -175,6 +159,8 @@ async def lint( Get(Digest, CreateDigest([_DEFAULT_SETTINGS])), ) + ignore_files = await Get(Snapshot, PathGlobs([semgrep.ignore_config_path])) + input_digest = await Get( Digest, MergeDigests( @@ -182,7 +168,7 @@ async def lint( input_files.snapshot.digest, config_files.digest, settings, - request.partition_metadata.ignore_files.digest, + ignore_files.digest, ) ), ) diff --git a/src/python/pants/backend/tools/semgrep/rules_test.py b/src/python/pants/backend/tools/semgrep/rules_test.py index 520560b8c736..318cb4948769 100644 --- a/src/python/pants/backend/tools/semgrep/rules_test.py +++ b/src/python/pants/backend/tools/semgrep/rules_test.py @@ -66,7 +66,7 @@ def configs(strs: dict[str, set[str]]) -> AllSemgrepConfigs: ) def test_group_by_group_by_semgrep_dir(paths: tuple[str, ...], expected: AllSemgrepConfigs): input = Paths(files=paths, dirs=()) - result = rules._group_by_semgrep_dir(input) + result = rules._group_by_semgrep_dir(".semgrep", input) assert result == expected diff --git a/src/python/pants/backend/tools/semgrep/subsystem.py b/src/python/pants/backend/tools/semgrep/subsystem.py index af73b460304e..dc08864c5ff0 100644 --- a/src/python/pants/backend/tools/semgrep/subsystem.py +++ b/src/python/pants/backend/tools/semgrep/subsystem.py @@ -12,7 +12,7 @@ from pants.engine.rules import Rule, collect_rules from pants.engine.target import Dependencies, FieldSet, SingleSourceField, Target from pants.engine.unions import UnionRule -from pants.option.option_types import ArgsListOption, BoolOption, SkipOption +from pants.option.option_types import ArgsListOption, BoolOption, SkipOption, StrOption from pants.util.strutil import softwrap @@ -51,6 +51,21 @@ class SemgrepSubsystem(PythonToolBase): register_lockfile = True default_lockfile_resource = ("pants.backend.tools.semgrep", "semgrep.lock") + rules_dir_name = StrOption( + default=".semgrep", + help=softwrap( + """ + The directory name with semgrep rules, which is searched recursively for YAML files, and + can be present at any level, with rules applying to all levels below it. + """ + ), + ) + + ignore_config_path = StrOption( + default=".semgrepignore", + help="The path to the semgrepignore file", + ) + args = ArgsListOption( example="--verbose", default=["--quiet"],