diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c9b4acc..a95a445 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,7 +14,7 @@ jobs: strategy: matrix: os: [windows-latest, macos-latest, ubuntu-latest] - python-version: ['3.9', '3.x'] + python-version: ['3.9', '3.12'] steps: - uses: actions/checkout@v4 diff --git a/pyproject.toml b/pyproject.toml index 9f3baf7..3735f2a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,3 +41,4 @@ line-length = 100 [tool.mypy] files = ["src"] +ignore_missing_imports = true diff --git a/src/gitutils/__init__.py b/src/gitutils/__init__.py index 17b0bef..5b24eb0 100644 --- a/src/gitutils/__init__.py +++ b/src/gitutils/__init__.py @@ -1,12 +1,14 @@ """ -These Git utilities use nothing beyond pure Python and command-line Git. +These Git utilities use pygit2 where possible for much more efficient operations +than using subprocesses even with asyncio. + Speed is emphasized throughout, with pipelining and concurrent `asyncio` routines throughout for fastest operation on large numbers of repos. """ import logging -__version__ = "1.12.1" +__version__ = "2.0.0" def _log(verbose: bool): diff --git a/src/gitutils/email.py b/src/gitutils/email.py index ecc8266..8f5bc86 100644 --- a/src/gitutils/email.py +++ b/src/gitutils/email.py @@ -69,8 +69,8 @@ def cli(): for d, emails in gitemail(P.path, P.exclude, timeout=P.timeout): print(MAGENTA + d.stem + BLACK) - for email in emails: - print(*email) + for e in emails: + print(*e) if __name__ == "__main__": diff --git a/src/gitutils/status.py b/src/gitutils/status.py index a6ebfd0..ef19b63 100644 --- a/src/gitutils/status.py +++ b/src/gitutils/status.py @@ -1,159 +1,27 @@ """ -detect Git local repo modifications. Crazy fast by not invoking remote. - -replaced by git status --porcelain: - git ls-files -o -d --exclude-standard: # check for uncommitted files - git --no-pager diff HEAD , # check for uncommitted work - -DOES NOT WORK git log --branches --not --remotes # check for uncommitted branches +detect Git local repo modifications """ from __future__ import annotations import argparse -import subprocess -import logging +import typing from pathlib import Path import asyncio +from pprint import pprint -from . import _log -from .git import gitdirs, git_exe, subprocess_asyncio, MAGENTA, BLACK, TIMEOUT - -C0 = ["rev-parse", "--abbrev-ref", "HEAD"] # get branch name -C1 = ["status", "--porcelain"] # uncommitted or changed files - -__all__ = ["git_porcelain"] - - -def git_porcelain(path: Path, timeout: float = TIMEOUT["local"]) -> bool: - """ - detects if single Git repo is porcelain i.e. clean. - May not have been pushed or fetched. - - Parameters - ---------- - - path: pathlib.Path - path to Git repo - - Returns - ------- - - is_porcelain: bool - true if local Git is clean - """ - - if not path.is_dir(): - raise NotADirectoryError(path) - - ret = subprocess.run( - [git_exe(), "-C", str(path)] + C1, - stdout=subprocess.PIPE, - text=True, - timeout=timeout, - ) - if ret.returncode != 0: - logging.error(f"{path.name} return code {ret.returncode} {C1}") - return False - return not ret.stdout - - -async def _git_status(path: Path, timeout: float) -> tuple[str, str] | None: - """ - Notes which Git repos have local changes that haven't been pushed to remote - - Parameters - ---------- - path : pathlib.Path - Git repo directory - - Returns - ------- - changes : tuple of pathlib.Path, str - Git repo local changes - """ - - code, out, err = await subprocess_asyncio([git_exe(), "-C", str(path)] + C1, timeout=timeout) - if code != 0: - logging.error(f"{path.name} return code {code} {C1} {err}") - return None - - logging.info(path.name) - - # %% uncommitted changes - if out: - return path.name, out - - # %% detect committed, but not pushed - code, branch, err = await subprocess_asyncio([git_exe(), "-C", str(path)] + C0, timeout=timeout) - if code != 0: - logging.error(f"{path.name} return code {code} {C0} {err}") - return None - - C2 = [git_exe(), "-C", str(path), "diff", "--stat", f"origin/{branch}.."] - code, out, err = await subprocess_asyncio(C2, timeout=timeout) - if code != 0: - logging.error(f"{path.name} return code {code} {branch} {out} {err}") - return None - - if out: - return path.name, out - - return None - - -def git_status_serial(path: Path, timeout: float = TIMEOUT["local"]) -> tuple[str, str] | None: - """ - - Notes which Git repos have local changes that haven't been pushed to remote - - Parameters - ---------- - path : pathlib.Path - Git repo directory - - Returns - ------- - changes : tuple of pathlib.Path, str - Git repo local changes - """ - - out = subprocess.check_output( - [git_exe(), "-C", str(path)] + C1, text=True, timeout=timeout - ).strip() - - logging.info(path.name) - - # %% uncommitted changes - if out: - return path.name, out - - # %% detect committed, but not pushed - branch = subprocess.check_output( - [git_exe(), "-C", str(path)] + C0, text=True, timeout=timeout - ).strip() - - C2 = [git_exe(), "-C", str(path), "diff", "--stat", f"origin/{branch}.."] - out = subprocess.check_output(C2, text=True, timeout=timeout).strip() - - if out: - return path.name, out - - return None +import pygit2 +from . import _log +from .git import gitdirs +from .status_cmd import git_status_serial, git_status_async -async def git_status(path: Path, verbose: bool, timeout: float) -> list[str]: - c = MAGENTA if verbose else "" - changed = [] - futures = [_git_status(d, timeout) for d in gitdirs(path)] - for r in asyncio.as_completed(futures, timeout=timeout): - if changes := await r: - changed.append(changes[0]) - print(c + changes[0]) - if verbose: - print(BLACK + changes[1]) +def git_status(path: Path, verbose: bool) -> typing.Iterator[dict]: - return changed + for d in gitdirs(path): + repo = pygit2.Repository(d) + if status := repo.status(): + yield status def cli(): @@ -161,20 +29,26 @@ def cli(): p.add_argument("path", help="path to look under", nargs="?", default="~/code") p.add_argument("-v", "--verbose", action="store_true") p.add_argument("-t", "--timeout", type=float) - p.add_argument("--serial", help="don't use asyncio parallel execution", action="store_true") + p.add_argument( + "-method", + help="use Git command line serial execution", + choices=["pygit2", "serial", "async"], default="pygit2", + ) P = p.parse_args() _log(P.verbose) - if P.serial: - c = MAGENTA if P.verbose else "" + if P.method == "pygit2": + for s in git_status(P.path, P.verbose): + pprint(s) + elif P.method == "serial": for d in gitdirs(P.path): if changes := git_status_serial(d, P.timeout): - print(c + changes[0]) + print(changes[0]) if P.verbose: - print(BLACK + changes[1]) - else: - asyncio.run(git_status(P.path, P.verbose, P.timeout)) + print(changes[1]) + elif P.method == "async": + asyncio.run(git_status_async(P.path, P.verbose, P.timeout)) if __name__ == "__main__": diff --git a/src/gitutils/status_cmd.py b/src/gitutils/status_cmd.py new file mode 100644 index 0000000..930c182 --- /dev/null +++ b/src/gitutils/status_cmd.py @@ -0,0 +1,156 @@ +""" +This was the original implementation of mass-checking of Git status +using asyncio and subprocesses. It is much more efficient to use +libgit2 via pygit2, which is the current implementation. + +replaced by git status --porcelain: + git ls-files -o -d --exclude-standard: # check for uncommitted files + git --no-pager diff HEAD , # check for uncommitted work + +DOES NOT WORK git log --branches --not --remotes # check for uncommitted branches +""" + +from __future__ import annotations +import subprocess +import logging +from pathlib import Path +import asyncio + +from .git import gitdirs, git_exe, subprocess_asyncio, MAGENTA, BLACK, TIMEOUT + +C0 = ["rev-parse", "--abbrev-ref", "HEAD"] # get branch name +C1 = ["status", "--porcelain"] # uncommitted or changed files + +__all__ = ["git_porcelain"] + + +def git_porcelain(path: Path, timeout: float = TIMEOUT["local"]) -> bool: + """ + detects if single Git repo is porcelain i.e. clean. + May not have been pushed or fetched. + + Parameters + ---------- + + path: pathlib.Path + path to Git repo + + Returns + ------- + + is_porcelain: bool + true if local Git is clean + """ + + if not path.is_dir(): + raise NotADirectoryError(path) + + ret = subprocess.run( + [git_exe(), "-C", str(path)] + C1, + stdout=subprocess.PIPE, + text=True, + timeout=timeout, + ) + if ret.returncode != 0: + logging.error(f"{path.name} return code {ret.returncode} {C1}") + return False + return not ret.stdout + + +async def _git_status(path: Path, timeout: float) -> tuple[str, str] | None: + """ + Notes which Git repos have local changes that haven't been pushed to remote + + Parameters + ---------- + path : pathlib.Path + Git repo directory + + Returns + ------- + changes : tuple of pathlib.Path, str + Git repo local changes + """ + + code, out, err = await subprocess_asyncio([git_exe(), "-C", str(path)] + C1, timeout=timeout) + if code != 0: + logging.error(f"{path.name} return code {code} {C1} {err}") + return None + + logging.info(path.name) + + # %% uncommitted changes + if out: + return path.name, out + + # %% detect committed, but not pushed + code, branch, err = await subprocess_asyncio([git_exe(), "-C", str(path)] + C0, timeout=timeout) + if code != 0: + logging.error(f"{path.name} return code {code} {C0} {err}") + return None + + C2 = [git_exe(), "-C", str(path), "diff", "--stat", f"origin/{branch}.."] + code, out, err = await subprocess_asyncio(C2, timeout=timeout) + if code != 0: + logging.error(f"{path.name} return code {code} {branch} {out} {err}") + return None + + if out: + return path.name, out + + return None + + +def git_status_serial(path: Path, timeout: float = TIMEOUT["local"]) -> tuple[str, str] | None: + """ + + Notes which Git repos have local changes that haven't been pushed to remote + + Parameters + ---------- + path : pathlib.Path + Git repo directory + + Returns + ------- + changes : tuple of pathlib.Path, str + Git repo local changes + """ + + out = subprocess.check_output( + [git_exe(), "-C", str(path)] + C1, text=True, timeout=timeout + ).strip() + + logging.info(path.name) + + # %% uncommitted changes + if out: + return path.name, out + + # %% detect committed, but not pushed + branch = subprocess.check_output( + [git_exe(), "-C", str(path)] + C0, text=True, timeout=timeout + ).strip() + + C2 = [git_exe(), "-C", str(path), "diff", "--stat", f"origin/{branch}.."] + out = subprocess.check_output(C2, text=True, timeout=timeout).strip() + + if out: + return path.name, out + + return None + + +async def git_status_async(path: Path, verbose: bool, timeout: float) -> list[str]: + c = MAGENTA if verbose else "" + + changed = [] + futures = [_git_status(d, timeout) for d in gitdirs(path)] + for r in asyncio.as_completed(futures, timeout=timeout): + if changes := await r: + changed.append(changes[0]) + print(c + changes[0]) + if verbose: + print(BLACK + changes[1]) + + return changed diff --git a/src/gitutils/tests/test_modified.py b/src/gitutils/tests/test_modified.py index 2990aa5..19d4698 100644 --- a/src/gitutils/tests/test_modified.py +++ b/src/gitutils/tests/test_modified.py @@ -1,7 +1,8 @@ import subprocess import asyncio -from gitutils.status import git_status +import gitutils.status_cmd +import gitutils.status from gitutils.git import TIMEOUT @@ -10,16 +11,31 @@ def test_script_modified(git_init): ret = subprocess.check_output(["gitstat", str(p)], text=True) assert not ret - (p / "foo.txt").touch() - ret = subprocess.check_output(["gitstat", str(p)], text=True) - assert ret + test_new = p / "foo.txt" + test_new.touch() + ret = subprocess.check_output(["gitstat", str(p), "-v"], text=True) + assert ret, f"didn't find {test_new} in {ret}" + + +def test_modified_libgit2(git_init): + p = git_init + repos = gitutils.status.git_status(p, False) + assert len(list(repos)) == 0 + + test_new = p / "foo.txt" + test_new.touch() + repos = gitutils.status.git_status(p, False) + r = list(repos) + assert len(r) == 1 + print(r) -def test_modified(git_init): +def test_modified_async(git_init): p = git_init - repos = asyncio.run(git_status(p, False, TIMEOUT["local"])) + repos = asyncio.run(gitutils.status_cmd.git_status_async(p, False, TIMEOUT["local"])) assert len(repos) == 0 (p / "foo.txt").touch() - repos = asyncio.run(git_status(p, False, TIMEOUT["local"])) - assert len(repos) == 1 + repos = asyncio.run(gitutils.status_cmd.git_status_async(p, False, TIMEOUT["local"])) + r = list(repos) + assert len(r) == 1