From de76e6436ee271f316c24b7546a6f480961170db Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 15 Dec 2022 05:53:46 -0800 Subject: [PATCH] Use pre-commit for CI style checks. (#3062) This PR adopts `pre-commit` in CI style checks and updates a few of the hooks (flake8, clang-format, and the copyright checker). This helps with the ongoing transition to GitHub Actions by centralizing style checks. I also applied a significant set of changes from `clang-format` and `flake8` suggestions in commits cfe6395 and adf0741. To minimize the diff in this PR, we can split those two commits into their own PRs with some small tweaks to the `pre-commit` configuration. Let me know if you'd like me to do this. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - AJ Schmidt (https://github.com/ajschmidt8) - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/3062 --- .pre-commit-config.yaml | 34 ++- ci/checks/copyright.py | 217 +++++++------ ci/checks/style.sh | 72 +---- cpp/scripts/gitutils.py | 286 ------------------ cpp/scripts/run-clang-format.py | 141 --------- python/.flake8 | 7 - .../cugraph/tests/test_property_graph.py | 5 +- setup.cfg | 25 ++ 8 files changed, 185 insertions(+), 602 deletions(-) delete mode 100644 cpp/scripts/gitutils.py delete mode 100644 cpp/scripts/run-clang-format.py delete mode 100644 python/.flake8 create mode 100644 setup.cfg diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 61d21fcba7b..a075beea3f3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,13 +4,13 @@ # To run: `pre-commit run --all-files` repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.3.0 + rev: v4.4.0 hooks: - id: check-added-large-files - id: debug-statements - id: mixed-line-ending - repo: https://github.com/psf/black - rev: 22.3.0 + rev: 22.10.0 hooks: - id: black language_version: python3 @@ -18,14 +18,36 @@ repos: args: [--target-version=py38] files: ^python/ - repo: https://github.com/PyCQA/flake8 - rev: 3.8.4 + rev: 6.0.0 hooks: - id: flake8 - args: [--config=python/.flake8] - files: ^python/ + args: ["--config=setup.cfg"] + files: python/.*$ + types: [file] + types_or: [python] # TODO: Enable [python, cython] + additional_dependencies: ["flake8-force"] - repo: https://github.com/asottile/yesqa rev: v1.3.0 hooks: - id: yesqa additional_dependencies: - - flake8==3.8.4 + - flake8==6.0.0 + - repo: https://github.com/pre-commit/mirrors-clang-format + rev: v11.1.0 + hooks: + - id: clang-format + exclude: | + (?x)^( + cpp/libcugraph_etl| + cpp/tests/c_api/.* + ) + types_or: [c, c++, cuda] + args: ["-fallback-style=none", "-style=file", "-i"] + - repo: local + hooks: + - id: copyright-check + name: copyright-check + entry: python ./ci/checks/copyright.py --git-modified-only --update-current-year + language: python + pass_filenames: false + additional_dependencies: [gitpython] diff --git a/ci/checks/copyright.py b/ci/checks/copyright.py index f3acfd404f5..407204f6a38 100644 --- a/ci/checks/copyright.py +++ b/ci/checks/copyright.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2019-2022, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,46 +13,41 @@ # limitations under the License. # -import datetime -import re import argparse -import io +import datetime import os +import re import sys -SCRIPT_DIR = os.path.dirname(os.path.realpath(os.path.expanduser(__file__))) - -# Add the scripts dir for gitutils -sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, - "../../cpp/scripts"))) - -# Now import gitutils. Ignore flake8 error here since there is no other way to -# set up imports -import gitutils # noqa: E402 +import git FilesToCheck = [ re.compile(r"[.](cmake|cpp|cu|cuh|h|hpp|sh|pxd|py|pyx)$"), re.compile(r"CMakeLists[.]txt$"), - re.compile(r"CMakeLists_standalone[.]txt$"), re.compile(r"setup[.]cfg$"), re.compile(r"[.]flake8[.]cython$"), - re.compile(r"meta[.]yaml$") + re.compile(r"meta[.]yaml$"), +] +ExemptFiles = [ + re.compile(r"versioneer[.]py"), ] -ExemptFiles = ['versioneer.py'] # this will break starting at year 10000, which is probably OK :) CheckSimple = re.compile( - r"Copyright *(?:\(c\))? *(\d{4}),? *NVIDIA C(?:ORPORATION|orporation)") + r"Copyright *(?:\(c\))? *(\d{4}),? *NVIDIA C(?:ORPORATION|orporation)" +) CheckDouble = re.compile( r"Copyright *(?:\(c\))? *(\d{4})-(\d{4}),? *NVIDIA C(?:ORPORATION|orporation)" # noqa: E501 ) def checkThisFile(f): - # This check covers things like symlinks which point to files that DNE - if not (os.path.exists(f)): - return False - if gitutils and gitutils.isFileEmpty(f): + if isinstance(f, git.Diff): + if f.deleted_file or f.b_blob.size == 0: + return False + f = f.b_path + elif not os.path.exists(f) or os.stat(f).st_size == 0: + # This check covers things like symlinks which point to files that DNE return False for exempt in ExemptFiles: if exempt.search(f): @@ -63,36 +58,90 @@ def checkThisFile(f): return False +def modifiedFiles(): + """Get a set of all modified files, as Diff objects. + + The files returned have been modified in git since the merge base of HEAD + and the upstream of the target branch. We return the Diff objects so that + we can read only the staged changes. + """ + repo = git.Repo() + # Use the environment variable TARGET_BRANCH or RAPIDS_BASE_BRANCH (defined in CI) if possible + target_branch = os.environ.get("TARGET_BRANCH", os.environ.get("RAPIDS_BASE_BRANCH")) + if target_branch is None: + # Fall back to the closest branch if not on CI + target_branch = repo.git.describe( + all=True, tags=True, match="branch-*", abbrev=0 + ).lstrip("heads/") + + upstream_target_branch = None + if target_branch in repo.heads: + # Use the tracking branch of the local reference if it exists. This + # returns None if no tracking branch is set. + upstream_target_branch = repo.heads[target_branch].tracking_branch() + if upstream_target_branch is None: + # Fall back to the remote with the newest target_branch. This code + # path is used on CI because the only local branch reference is + # current-pr-branch, and thus target_branch is not in repo.heads. + # This also happens if no tracking branch is defined for the local + # target_branch. We use the remote with the latest commit if + # multiple remotes are defined. + candidate_branches = [ + remote.refs[target_branch] for remote in repo.remotes + if target_branch in remote.refs + ] + if len(candidate_branches) > 0: + upstream_target_branch = sorted( + candidate_branches, + key=lambda branch: branch.commit.committed_datetime, + )[-1] + else: + # If no remotes are defined, try to use the local version of the + # target_branch. If this fails, the repo configuration must be very + # strange and we can fix this script on a case-by-case basis. + upstream_target_branch = repo.heads[target_branch] + merge_base = repo.merge_base("HEAD", upstream_target_branch.commit)[0] + diff = merge_base.diff() + changed_files = {f for f in diff if f.b_path is not None} + return changed_files + + def getCopyrightYears(line): res = CheckSimple.search(line) if res: - return (int(res.group(1)), int(res.group(1))) + return int(res.group(1)), int(res.group(1)) res = CheckDouble.search(line) if res: - return (int(res.group(1)), int(res.group(2))) - return (None, None) + return int(res.group(1)), int(res.group(2)) + return None, None def replaceCurrentYear(line, start, end): # first turn a simple regex into double (if applicable). then update years res = CheckSimple.sub(r"Copyright (c) \1-\1, NVIDIA CORPORATION", line) res = CheckDouble.sub( - r"Copyright (c) {:04d}-{:04d}, NVIDIA CORPORATION".format(start, end), - res) + rf"Copyright (c) {start:04d}-{end:04d}, NVIDIA CORPORATION", + res, + ) return res def checkCopyright(f, update_current_year): - """ - Checks for copyright headers and their years - """ + """Checks for copyright headers and their years.""" errs = [] thisYear = datetime.datetime.now().year lineNum = 0 crFound = False yearMatched = False - with io.open(f, "r", encoding="utf-8") as fp: - lines = fp.readlines() + + if isinstance(f, git.Diff): + path = f.b_path + lines = f.b_blob.data_stream.read().decode().splitlines(keepends=True) + else: + path = f + with open(f, encoding="utf-8") as fp: + lines = fp.readlines() + for line in lines: lineNum += 1 start, end = getCopyrightYears(line) @@ -101,20 +150,19 @@ def checkCopyright(f, update_current_year): crFound = True if start > end: e = [ - f, + path, lineNum, "First year after second year in the copyright " "header (manual fix required)", - None + None, ] errs.append(e) - if thisYear < start or thisYear > end: + elif thisYear < start or thisYear > end: e = [ - f, + path, lineNum, - "Current year not included in the " - "copyright header", - None + "Current year not included in the copyright header", + None, ] if thisYear < start: e[-1] = replaceCurrentYear(line, thisYear, end) @@ -123,15 +171,14 @@ def checkCopyright(f, update_current_year): errs.append(e) else: yearMatched = True - fp.close() # copyright header itself not found if not crFound: e = [ - f, + path, 0, "Copyright header missing or formatted incorrectly " "(manual fix required)", - None + None, ] errs.append(e) # even if the year matches a copyright header, make the check pass @@ -141,21 +188,19 @@ def checkCopyright(f, update_current_year): if update_current_year: errs_update = [x for x in errs if x[-1] is not None] if len(errs_update) > 0: - print("File: {}. Changing line(s) {}".format( - f, ', '.join(str(x[1]) for x in errs if x[-1] is not None))) + lines_changed = ", ".join(str(x[1]) for x in errs_update) + print(f"File: {path}. Changing line(s) {lines_changed}") for _, lineNum, __, replacement in errs_update: lines[lineNum - 1] = replacement - with io.open(f, "w", encoding="utf-8") as out_file: - for new_line in lines: - out_file.write(new_line) - errs = [x for x in errs if x[-1] is None] + with open(path, "w", encoding="utf-8") as out_file: + out_file.writelines(lines) return errs def getAllFilesUnderDir(root, pathFilter=None): retList = [] - for (dirpath, dirnames, filenames) in os.walk(root): + for dirpath, dirnames, filenames in os.walk(root): for fn in filenames: filePath = os.path.join(dirpath, fn) if pathFilter(filePath): @@ -170,47 +215,37 @@ def checkCopyright_main(): it compares between branches "$PR_TARGET_BRANCH" and "current-pr-branch" """ retVal = 0 - global ExemptFiles argparser = argparse.ArgumentParser( - "Checks for a consistent copyright header in git's modified files") - argparser.add_argument("--update-current-year", - dest='update_current_year', - action="store_true", - required=False, - help="If set, " - "update the current year if a header " - "is already present and well formatted.") - argparser.add_argument("--git-modified-only", - dest='git_modified_only', - action="store_true", - required=False, - help="If set, " - "only files seen as modified by git will be " - "processed.") - argparser.add_argument("--exclude", - dest='exclude', - action="append", - required=False, - default=["python/cuml/_thirdparty/"], - help=("Exclude the paths specified (regexp). " - "Can be specified multiple times.")) - - (args, dirs) = argparser.parse_known_args() - try: - ExemptFiles = ExemptFiles + [pathName for pathName in args.exclude] - ExemptFiles = [re.compile(file) for file in ExemptFiles] - except re.error as reException: - print("Regular expression error:") - print(reException) - return 1 + "Checks for a consistent copyright header in git's modified files" + ) + argparser.add_argument( + "--update-current-year", + dest="update_current_year", + action="store_true", + required=False, + help="If set, " + "update the current year if a header is already " + "present and well formatted.", + ) + argparser.add_argument( + "--git-modified-only", + dest="git_modified_only", + action="store_true", + required=False, + help="If set, " + "only files seen as modified by git will be " + "processed.", + ) + + args, dirs = argparser.parse_known_args() if args.git_modified_only: - files = gitutils.modifiedFiles(pathFilter=checkThisFile) + files = [f for f in modifiedFiles() if checkThisFile(f)] else: files = [] for d in [os.path.abspath(d) for d in dirs]: - if not (os.path.isdir(d)): + if not os.path.isdir(d): raise ValueError(f"{d} is not a directory.") files += getAllFilesUnderDir(d, pathFilter=checkThisFile) @@ -219,24 +254,24 @@ def checkCopyright_main(): errors += checkCopyright(f, args.update_current_year) if len(errors) > 0: - print("Copyright headers incomplete in some of the files!") + if any(e[-1] is None for e in errors): + print("Copyright headers incomplete in some of the files!") for e in errors: print(" %s:%d Issue: %s" % (e[0], e[1], e[2])) print("") n_fixable = sum(1 for e in errors if e[-1] is not None) path_parts = os.path.abspath(__file__).split(os.sep) - file_from_repo = os.sep.join(path_parts[path_parts.index("ci"):]) - if n_fixable > 0: - print(("You can run `python {} --git-modified-only " - "--update-current-year` to fix {} of these " - "errors.\n").format(file_from_repo, n_fixable)) + file_from_repo = os.sep.join(path_parts[path_parts.index("ci") :]) + if n_fixable > 0 and not args.update_current_year: + print( + f"You can run `python {file_from_repo} --git-modified-only " + "--update-current-year` and stage the results in git to " + f"fix {n_fixable} of these errors.\n" + ) retVal = 1 - else: - print("Copyright check passed") return retVal if __name__ == "__main__": - import sys sys.exit(checkCopyright_main()) diff --git a/ci/checks/style.sh b/ci/checks/style.sh index 51b59071fd4..e2a74fa7ed4 100755 --- a/ci/checks/style.sh +++ b/ci/checks/style.sh @@ -4,76 +4,10 @@ # cuGraph Style Tester # ######################## -# Assume this script is run from the root of the cugraph repo - -# Make failing commands visible when used in a pipeline and allow the script to -# continue on errors, but use ERRORCODE to still allow any failing command to be -# captured for returning a final status code. This allows all style check to -# take place to provide a more comprehensive list of style violations. -set -o pipefail -# CI does `set -e` then sources this file, so we override that so we can output -# the results from the various style checkers -set +e -ERRORCODE=0 -PATH=/conda/bin:$PATH - # Activate common conda env +PATH=/conda/bin:$PATH . /opt/conda/etc/profile.d/conda.sh conda activate rapids -# Run flake8 and get results/return code -FLAKE=`flake8 --config=python/.flake8 python` -ERRORCODE=$((ERRORCODE | $?)) - -# Run black and get results/return code -BLACK_FORMAT=`black --target-version=py38 --check --exclude versioneer.py python 2>&1` -BLACK_FORMAT_RETVAL=$? -ERRORCODE=$((ERRORCODE | ${BLACK_FORMAT_RETVAL})) - -# Run clang-format and check for a consistent code format -CLANG_FORMAT=`python cpp/scripts/run-clang-format.py 2>&1` -CLANG_FORMAT_RETVAL=$? -ERRORCODE=$((ERRORCODE | ${CLANG_FORMAT_RETVAL})) - -# Output results if failure otherwise show pass -if [ "$FLAKE" != "" ]; then - echo -e "\n\n>>>> FAILED: flake8 style check; begin output\n\n" - echo -e "$FLAKE" - echo -e "\n\n>>>> FAILED: flake8 style check; end output\n\n" -else - echo -e "\n\n>>>> PASSED: flake8 style check\n\n" -fi - -if [ "$BLACK_FORMAT_RETVAL" != "0" ]; then - echo -e "\n\n>>>> FAILED: black format check; begin output\n\n" - echo -e "$BLACK_FORMAT" - echo -e "\n\n>>>> FAILED: black format check; end output\n\n" -else - echo -e "\n\n>>>> PASSED: black format check\n\n" -fi - -if [ "$CLANG_FORMAT_RETVAL" != "0" ]; then - echo -e "\n\n>>>> FAILED: clang format check; begin output\n\n" - echo -e "$CLANG_FORMAT" - echo -e "\n\n>>>> FAILED: clang format check; end output\n\n" -else - echo -e "\n\n>>>> PASSED: clang format check\n\n" -fi - -# Check for copyright headers in the files modified currently -#COPYRIGHT=`env PYTHONPATH=ci/utils python ci/checks/copyright.py cpp python benchmarks ci 2>&1` -COPYRIGHT=`env PYTHONPATH=ci/utils python ci/checks/copyright.py --git-modified-only --exclude=".*/versioneer.py" 2>&1` -CR_RETVAL=$? -ERRORCODE=$((ERRORCODE | ${CR_RETVAL})) - -if [ "$CR_RETVAL" != "0" ]; then - echo -e "\n\n>>>> FAILED: copyright check; begin output\n\n" - echo -e "$COPYRIGHT" - echo -e "\n\n>>>> FAILED: copyright check; end output\n\n" -else - echo -e "\n\n>>>> PASSED: copyright check; begin debug output\n\n" - echo -e "$COPYRIGHT" - echo -e "\n\n>>>> PASSED: copyright check; end debug output\n\n" -fi - -exit ${ERRORCODE} +# Run pre-commit checks +pre-commit run --hook-stage manual --all-files --show-diff-on-failure \ No newline at end of file diff --git a/cpp/scripts/gitutils.py b/cpp/scripts/gitutils.py deleted file mode 100644 index 4e30f8fcb03..00000000000 --- a/cpp/scripts/gitutils.py +++ /dev/null @@ -1,286 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import subprocess -import os -import re - - -def isFileEmpty(f): - return os.stat(f).st_size == 0 - - -def __git(*opts): - """Runs a git command and returns its output""" - cmd = "git " + " ".join(list(opts)) - ret = subprocess.check_output(cmd, shell=True) - return ret.decode("UTF-8").rstrip("\n") - - -def __gitdiff(*opts): - """Runs a git diff command with no pager set""" - return __git("--no-pager", "diff", *opts) - - -def branch(): - """Returns the name of the current branch""" - name = __git("rev-parse", "--abbrev-ref", "HEAD") - name = name.rstrip() - return name - - -def repo_version(): - """ - Determines the version of the repo by using `git describe` - - Returns - ------- - str - The full version of the repo in the format 'v#.#.#{a|b|rc}' - """ - return __git("describe", "--tags", "--abbrev=0") - - -def repo_version_major_minor(): - """ - Determines the version of the repo using `git describe` and returns only - the major and minor portion - - Returns - ------- - str - The partial version of the repo in the format '{major}.{minor}' - """ - - full_repo_version = repo_version() - - match = re.match(r"^v?(?P[0-9]+)(?:\.(?P[0-9]+))?", - full_repo_version) - - if (match is None): - print(" [DEBUG] Could not determine repo major minor version. " - f"Full repo version: {full_repo_version}.") - return None - - out_version = match.group("major") - - if (match.group("minor")): - out_version += "." + match.group("minor") - - return out_version - - -def determine_merge_commit(current_branch="HEAD"): - """ - When running outside of CI, this will estimate the target merge commit hash - of `current_branch` by finding a common ancester with the remote branch - 'branch-{major}.{minor}' where {major} and {minor} are determined from the - repo version. - - Parameters - ---------- - current_branch : str, optional - Which branch to consider as the current branch, by default "HEAD" - - Returns - ------- - str - The common commit hash ID - """ - - try: - # Try to determine the target branch from the most recent tag - head_branch = __git("describe", - "--all", - "--tags", - "--match='branch-*'", - "--abbrev=0") - except subprocess.CalledProcessError: - print(" [DEBUG] Could not determine target branch from most recent " - "tag. Falling back to 'branch-{major}.{minor}.") - head_branch = None - - if (head_branch is not None): - # Convert from head to branch name - head_branch = __git("name-rev", "--name-only", head_branch) - else: - # Try and guess the target branch as "branch-." - version = repo_version_major_minor() - - if (version is None): - return None - - head_branch = "branch-{}".format(version) - - try: - # Now get the remote tracking branch - remote_branch = __git("rev-parse", - "--abbrev-ref", - "--symbolic-full-name", - head_branch + "@{upstream}") - except subprocess.CalledProcessError: - print(" [DEBUG] Could not remote tracking reference for " - f"branch {head_branch}.") - remote_branch = None - - if (remote_branch is None): - return None - - print(f" [DEBUG] Determined TARGET_BRANCH as: '{remote_branch}'. " - "Finding common ancestor.") - - common_commit = __git("merge-base", remote_branch, current_branch) - - return common_commit - - -def uncommittedFiles(): - """ - Returns a list of all changed files that are not yet committed. This - means both untracked/unstaged as well as uncommitted files too. - """ - files = __git("status", "-u", "-s") - ret = [] - for f in files.splitlines(): - f = f.strip(" ") - f = re.sub("\s+", " ", f) # noqa: W605 - tmp = f.split(" ", 1) - # only consider staged files or uncommitted files - # in other words, ignore untracked files - if tmp[0] == "M" or tmp[0] == "A": - ret.append(tmp[1]) - return ret - - -def changedFilesBetween(baseName, branchName, commitHash): - """ - Returns a list of files changed between branches baseName and latest commit - of branchName. - """ - current = branch() - # checkout "base" branch - __git("checkout", "--force", baseName) - # checkout branch for comparing - __git("checkout", "--force", branchName) - # checkout latest commit from branch - __git("checkout", "-fq", commitHash) - - files = __gitdiff("--name-only", - "--ignore-submodules", - f"{baseName}..{branchName}") - - # restore the original branch - __git("checkout", "--force", current) - return files.splitlines() - - -def changesInFileBetween(file, b1, b2, filter=None): - """Filters the changed lines to a file between the branches b1 and b2""" - current = branch() - __git("checkout", "--quiet", b1) - __git("checkout", "--quiet", b2) - diffs = __gitdiff("--ignore-submodules", - "-w", - "--minimal", - "-U0", - "%s...%s" % (b1, b2), - "--", - file) - __git("checkout", "--quiet", current) - lines = [] - for line in diffs.splitlines(): - if filter is None or filter(line): - lines.append(line) - return lines - - -def modifiedFiles(pathFilter=None): - """ - If inside a CI-env (ie. TARGET_BRANCH and COMMIT_HASH are defined, and - current branch is "current-pr-branch"), then lists out all files modified - between these 2 branches. Locally, TARGET_BRANCH will try to be determined - from the current repo version and finding a coresponding branch named - 'branch-{major}.{minor}'. If this fails, this functino will list out all - the uncommitted files in the current branch. - - Such utility function is helpful while putting checker scripts as part of - cmake, as well as CI process. This way, during development, only the files - touched (but not yet committed) by devs can be checked. But, during the CI - process ALL files modified by the dev, as submiited in the PR, will be - checked. This happens, all the while using the same script. - """ - targetBranch = os.environ.get("TARGET_BRANCH") - commitHash = os.environ.get("COMMIT_HASH") - currentBranch = branch() - print( - f" [DEBUG] TARGET_BRANCH={targetBranch}, COMMIT_HASH={commitHash}, " - f"currentBranch={currentBranch}") - - if targetBranch and commitHash and (currentBranch == "current-pr-branch"): - print(" [DEBUG] Assuming a CI environment.") - allFiles = changedFilesBetween(targetBranch, currentBranch, commitHash) - else: - print(" [DEBUG] Did not detect CI environment. " - "Determining TARGET_BRANCH locally.") - - common_commit = determine_merge_commit(currentBranch) - - if (common_commit is not None): - - # Now get the diff. Use --staged to get both diff between - # common_commit..HEAD and any locally staged files - allFiles = __gitdiff("--name-only", - "--ignore-submodules", - "--staged", - f"{common_commit}").splitlines() - else: - # Fallback to just uncommitted files - allFiles = uncommittedFiles() - - files = [] - for f in allFiles: - if pathFilter is None or pathFilter(f): - files.append(f) - - filesToCheckString = "\n\t".join(files) if files else "" - print(f" [DEBUG] Found files to check:\n\t{filesToCheckString}\n") - return files - - -def listAllFilesInDir(folder): - """Utility function to list all files/subdirs in the input folder""" - allFiles = [] - for root, dirs, files in os.walk(folder): - for name in files: - allFiles.append(os.path.join(root, name)) - return allFiles - - -def listFilesToCheck(filesDirs, filter=None): - """ - Utility function to filter the input list of files/dirs based on the input - filter method and returns all the files that need to be checked - """ - allFiles = [] - for f in filesDirs: - if os.path.isfile(f): - if filter is None or filter(f): - allFiles.append(f) - elif os.path.isdir(f): - files = listAllFilesInDir(f) - for f_ in files: - if filter is None or filter(f_): - allFiles.append(f_) - return allFiles diff --git a/cpp/scripts/run-clang-format.py b/cpp/scripts/run-clang-format.py deleted file mode 100644 index 46b027f3c06..00000000000 --- a/cpp/scripts/run-clang-format.py +++ /dev/null @@ -1,141 +0,0 @@ -# Copyright (c) 2019-2021, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from __future__ import print_function -import sys -import re -import os -import subprocess -import argparse -import tempfile - - -EXPECTED_VERSION = "11.1.0" -VERSION_REGEX = re.compile(r"clang-format version ([0-9.]+)") -# NOTE: populate this list with more top-level dirs as we add more of them to the cugraph repo -DEFAULT_DIRS = ["cpp/include", - "cpp/src", - "cpp/tests"] - - -def parse_args(): - argparser = argparse.ArgumentParser("Runs clang-format on a project") - argparser.add_argument("-dstdir", type=str, default=None, - help="Directory to store the temporary outputs of" - " clang-format. If nothing is passed for this, then" - " a temporary dir will be created using `mkdtemp`") - argparser.add_argument("-exe", type=str, default="clang-format", - help="Path to clang-format exe") - argparser.add_argument("-inplace", default=False, action="store_true", - help="Replace the source files itself.") - argparser.add_argument("-regex", type=str, - default=r"[.](cu|cuh|h|hpp|cpp)$", - help="Regex string to filter in sources") - argparser.add_argument("-ignore", type=str, default=r"cannylab/bh[.]cu$", - help="Regex used to ignore files from matched list") - argparser.add_argument("-v", dest="verbose", action="store_true", - help="Print verbose messages") - argparser.add_argument("dirs", type=str, nargs="*", - help="List of dirs where to find sources") - args = argparser.parse_args() - args.regex_compiled = re.compile(args.regex) - args.ignore_compiled = re.compile(args.ignore) - if args.dstdir is None: - args.dstdir = tempfile.mkdtemp() - ret = subprocess.check_output("%s --version" % args.exe, shell=True) - ret = ret.decode("utf-8") - version = VERSION_REGEX.match(ret) - if version is None: - raise Exception("Failed to figure out clang-format version!") - version = version.group(1) - if version != EXPECTED_VERSION: - raise Exception("clang-format exe must be v%s found '%s'" % \ - (EXPECTED_VERSION, version)) - if len(args.dirs) == 0: - args.dirs = DEFAULT_DIRS - return args - - -def list_all_src_files(file_regex, ignore_regex, srcdirs, dstdir, inplace): - allFiles = [] - for srcdir in srcdirs: - for root, dirs, files in os.walk(srcdir): - for f in files: - if re.search(file_regex, f): - src = os.path.join(root, f) - if re.search(ignore_regex, src): - continue - if inplace: - _dir = root - else: - _dir = os.path.join(dstdir, root) - dst = os.path.join(_dir, f) - allFiles.append((src, dst)) - return allFiles - - -def run_clang_format(src, dst, exe, verbose): - dstdir = os.path.dirname(dst) - if not os.path.exists(dstdir): - os.makedirs(dstdir) - # run the clang format command itself - if src == dst: - cmd = "%s -i %s" % (exe, src) - else: - cmd = "%s %s > %s" % (exe, src, dst) - try: - subprocess.check_call(cmd, shell=True) - except subprocess.CalledProcessError: - print("Failed to run clang-format! Maybe your env is not proper?") - raise - # run the diff to check if there are any formatting issues - cmd = "diff -q %s %s >/dev/null" % (src, dst) - try: - subprocess.check_call(cmd, shell=True) - if verbose: - print("%s passed" % os.path.basename(src)) - except subprocess.CalledProcessError: - print("%s failed! 'diff %s %s' will show formatting violations!" % \ - (os.path.basename(src), src, dst)) - return False - return True - - -def main(): - args = parse_args() - # Attempt to making sure that we run this script from root of repo always - if not os.path.exists(".git"): - print("Error!! This needs to always be run from the root of repo") - sys.exit(-1) - all_files = list_all_src_files(args.regex_compiled, args.ignore_compiled, - args.dirs, args.dstdir, args.inplace) - # actual format checker - status = True - for src, dst in all_files: - if not run_clang_format(src, dst, args.exe, args.verbose): - status = False - if not status: - print("clang-format failed! You have 2 options:") - print(" 1. Look at formatting differences above and fix them manually") - print(" 2. Or run the below command to bulk-fix all these at once") - print("Bulk-fix command: ") - print(" python cpp/scripts/run-clang-format.py %s -inplace" % \ - " ".join(sys.argv[1:])) - sys.exit(-1) - return - - -if __name__ == "__main__": - main() diff --git a/python/.flake8 b/python/.flake8 deleted file mode 100644 index 5a49c87c04b..00000000000 --- a/python/.flake8 +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. - -[flake8] -exclude = img,notebooks,thirdparty,__init__.py,libgdf -# https://black.readthedocs.io/en/stable/guides/using_black_with_other_tools.html#flake8 -max-line-length = 88 -extend-ignore = E203 diff --git a/python/cugraph/cugraph/tests/test_property_graph.py b/python/cugraph/cugraph/tests/test_property_graph.py index b19829cdcb5..4523c70bc9a 100644 --- a/python/cugraph/cugraph/tests/test_property_graph.py +++ b/python/cugraph/cugraph/tests/test_property_graph.py @@ -1870,9 +1870,10 @@ def test_add_data_noncontiguous(df_type): @pytest.mark.parametrize("df_type", df_types, ids=df_type_id) def test_vertex_ids_different_type(df_type): - """Getting the number of vertices requires combining vertex ids from multiples columns. + """Getting the number of vertices requires combining vertex ids from + multiple columns. - This tests ensures combining these columns works even if they are different types. + This test ensures combining these columns works even if they are different types. """ from cugraph.experimental import PropertyGraph diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 00000000000..1d917da6bd2 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,25 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. + +[flake8] +filename = *.py, *.pyx, *.pxd, *.pxi +exclude = __init__.py, *.egg, build, docs, .git +force-check = True +max-line-length = 88 +ignore = + # line break before binary operator + W503, + # whitespace before : + E203 +per-file-ignores = + # Rules ignored only in Cython: + # E211: whitespace before '(' (used in multi-line imports) + # E225: Missing whitespace around operators (breaks cython casting syntax like ) + # E226: Missing whitespace around arithmetic operators (breaks cython pointer syntax like int*) + # E227: Missing whitespace around bitwise or shift operator (Can also break casting syntax) + # E275: Missing whitespace after keyword (Doesn't work with Cython except?) + # E402: invalid syntax (works for Python, not Cython) + # E999: invalid syntax (works for Python, not Cython) + # W504: line break after binary operator (breaks lines that end with a pointer) + *.pyx: E211, E225, E226, E227, E275, E402, E999, W504 + *.pxd: E211, E225, E226, E227, E275, E402, E999, W504 + *.pxi: E211, E225, E226, E227, E275, E402, E999, W504