Skip to content

Commit

Permalink
Allow customizing semgrep configurations; correct rule matching glob
Browse files Browse the repository at this point in the history
  • Loading branch information
purajit committed Jul 3, 2024
1 parent 50a4e75 commit f1f0e3c
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 33 deletions.
48 changes: 17 additions & 31 deletions src/python/pants/backend/tools/semgrep/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,29 +44,12 @@ class SemgrepLintRequest(LintTargetsRequest):
@dataclass(frozen=True)
class PartitionMetadata:
config_files: frozenset[PurePath]
ignore_files: Snapshot

@property
def description(self) -> str:
return ", ".join(sorted(str(path) for path in self.config_files))


_IGNORE_FILE_NAME = ".semgrepignore"

_RULES_DIR_NAME = ".semgrep"
_RULES_FILES_GLOBS = (
".semgrep.yml",
".semgrep.yaml",
f"{_RULES_DIR_NAME}/*.yml",
f"{_RULES_DIR_NAME}/*.yaml",
)


@dataclass
class SemgrepIgnoreFiles:
snapshot: Snapshot


@dataclass
class AllSemgrepConfigs:
configs_by_dir: dict[PurePath, set[PurePath]]
Expand All @@ -85,24 +68,32 @@ def ancestor_configs(self, address: Address) -> Iterable[PurePath]:
yield from self.configs_by_dir.get(ancestor, [])


def _group_by_semgrep_dir(all_paths: Paths) -> AllSemgrepConfigs:
def _group_by_semgrep_dir(rules_dir_name: str, all_paths: Paths) -> AllSemgrepConfigs:
configs_by_dir = defaultdict(set)
for path_ in all_paths.files:
path = PurePath(path_)
# A rule like foo/bar/.semgrep/baz.yaml should behave like it's in in foo/bar, not
# foo/bar/.semgrep
parent = path.parent
config_directory = parent.parent if parent.name == _RULES_DIR_NAME else parent
config_directory = parent.parent if parent.name == rules_dir_name else parent

configs_by_dir[config_directory].add(path)

return AllSemgrepConfigs(configs_by_dir)


@rule
async def find_all_semgrep_configs() -> AllSemgrepConfigs:
all_paths = await Get(Paths, PathGlobs([f"**/{file_glob}" for file_glob in _RULES_FILES_GLOBS]))
return _group_by_semgrep_dir(all_paths)
async def find_all_semgrep_configs(semgrep: SemgrepSubsystem) -> AllSemgrepConfigs:
rules_files_globs = (
f"{semgrep.rules_dir_name}/*.yml",
f"{semgrep.rules_dir_name}/*.yaml",
# TODO: these don't seem to be mentioned in semgrep docs; should they be removed?
".semgrep.yml",
".semgrep.yaml",
)

all_paths = await Get(Paths, PathGlobs([f"**/{file_glob}" for file_glob in rules_files_globs]))
return _group_by_semgrep_dir(semgrep.rules_dir_name, all_paths)


@dataclass(frozen=True)
Expand All @@ -121,17 +112,10 @@ async def infer_relevant_semgrep_configs(
return RelevantSemgrepConfigs(all_semgrep.ancestor_configs(request.field_set.address))


@rule
async def all_semgrep_ignore_files() -> SemgrepIgnoreFiles:
snapshot = await Get(Snapshot, PathGlobs([f"**/{_IGNORE_FILE_NAME}"]))
return SemgrepIgnoreFiles(snapshot)


@rule
async def partition(
request: SemgrepLintRequest.PartitionRequest[SemgrepFieldSet],
semgrep: SemgrepSubsystem,
ignore_files: SemgrepIgnoreFiles,
) -> Partitions:
if semgrep.skip:
return Partitions()
Expand All @@ -148,7 +132,7 @@ async def partition(
by_config[configs].append(field_set)

return Partitions(
Partition(tuple(field_sets), PartitionMetadata(configs, ignore_files.snapshot))
Partition(tuple(field_sets), PartitionMetadata(configs))
for configs, field_sets in by_config.items()
)

Expand All @@ -175,14 +159,16 @@ async def lint(
Get(Digest, CreateDigest([_DEFAULT_SETTINGS])),
)

ignore_files = await Get(Snapshot, PathGlobs([semgrep.ignore_config_path]))

input_digest = await Get(
Digest,
MergeDigests(
(
input_files.snapshot.digest,
config_files.digest,
settings,
request.partition_metadata.ignore_files.digest,
ignore_files.digest,
)
),
)
Expand Down
2 changes: 1 addition & 1 deletion src/python/pants/backend/tools/semgrep/rules_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def configs(strs: dict[str, set[str]]) -> AllSemgrepConfigs:
)
def test_group_by_group_by_semgrep_dir(paths: tuple[str, ...], expected: AllSemgrepConfigs):
input = Paths(files=paths, dirs=())
result = rules._group_by_semgrep_dir(input)
result = rules._group_by_semgrep_dir(".semgrep", input)
assert result == expected


Expand Down
17 changes: 16 additions & 1 deletion src/python/pants/backend/tools/semgrep/subsystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from pants.engine.rules import Rule, collect_rules
from pants.engine.target import Dependencies, FieldSet, SingleSourceField, Target
from pants.engine.unions import UnionRule
from pants.option.option_types import ArgsListOption, BoolOption, SkipOption
from pants.option.option_types import ArgsListOption, BoolOption, SkipOption, StrOption
from pants.util.strutil import softwrap


Expand Down Expand Up @@ -51,6 +51,21 @@ class SemgrepSubsystem(PythonToolBase):
register_lockfile = True
default_lockfile_resource = ("pants.backend.tools.semgrep", "semgrep.lock")

rules_dir_name = StrOption(
default=".semgrep",
help=softwrap(
"""
The directory name with semgrep rules, which is searched recursively for YAML files, and
can be present at any level, with rules applying to all levels below it.
"""
),
)

ignore_config_path = StrOption(
default=".semgrepignore",
help="The path to the semgrepignore file",
)

args = ArgsListOption(
example="--verbose",
default=["--quiet"],
Expand Down

0 comments on commit f1f0e3c

Please sign in to comment.