Skip to content

Commit

Permalink
status: use PyGit2 for much faster, simpler
Browse files Browse the repository at this point in the history
  • Loading branch information
scivision committed Sep 30, 2024
1 parent ae22b46 commit 4cbba52
Show file tree
Hide file tree
Showing 7 changed files with 213 additions and 164 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
strategy:
matrix:
os: [windows-latest, macos-latest, ubuntu-latest]
python-version: ['3.9', '3.x']
python-version: ['3.9', '3.12']

steps:
- uses: actions/checkout@v4
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,4 @@ line-length = 100

[tool.mypy]
files = ["src"]
ignore_missing_imports = true
6 changes: 4 additions & 2 deletions src/gitutils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
"""
These Git utilities use nothing beyond pure Python and command-line Git.
These Git utilities use pygit2 where possible for much more efficient operations
than using subprocesses even with asyncio.
Speed is emphasized throughout, with pipelining and concurrent `asyncio` routines throughout
for fastest operation on large numbers of repos.
"""

import logging

__version__ = "1.12.1"
__version__ = "2.0.0"


def _log(verbose: bool):
Expand Down
4 changes: 2 additions & 2 deletions src/gitutils/email.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@ def cli():

for d, emails in gitemail(P.path, P.exclude, timeout=P.timeout):
print(MAGENTA + d.stem + BLACK)
for email in emails:
print(*email)
for e in emails:
print(*e)


if __name__ == "__main__":
Expand Down
176 changes: 25 additions & 151 deletions src/gitutils/status.py
Original file line number Diff line number Diff line change
@@ -1,180 +1,54 @@
"""
detect Git local repo modifications. Crazy fast by not invoking remote.
replaced by git status --porcelain:
git ls-files -o -d --exclude-standard: # check for uncommitted files
git --no-pager diff HEAD , # check for uncommitted work
DOES NOT WORK git log --branches --not --remotes # check for uncommitted branches
detect Git local repo modifications
"""

from __future__ import annotations
import argparse
import subprocess
import logging
import typing
from pathlib import Path
import asyncio
from pprint import pprint

from . import _log
from .git import gitdirs, git_exe, subprocess_asyncio, MAGENTA, BLACK, TIMEOUT

C0 = ["rev-parse", "--abbrev-ref", "HEAD"] # get branch name
C1 = ["status", "--porcelain"] # uncommitted or changed files

__all__ = ["git_porcelain"]


def git_porcelain(path: Path, timeout: float = TIMEOUT["local"]) -> bool:
"""
detects if single Git repo is porcelain i.e. clean.
May not have been pushed or fetched.
Parameters
----------
path: pathlib.Path
path to Git repo
Returns
-------
is_porcelain: bool
true if local Git is clean
"""

if not path.is_dir():
raise NotADirectoryError(path)

ret = subprocess.run(
[git_exe(), "-C", str(path)] + C1,
stdout=subprocess.PIPE,
text=True,
timeout=timeout,
)
if ret.returncode != 0:
logging.error(f"{path.name} return code {ret.returncode} {C1}")
return False
return not ret.stdout


async def _git_status(path: Path, timeout: float) -> tuple[str, str] | None:
"""
Notes which Git repos have local changes that haven't been pushed to remote
Parameters
----------
path : pathlib.Path
Git repo directory
Returns
-------
changes : tuple of pathlib.Path, str
Git repo local changes
"""

code, out, err = await subprocess_asyncio([git_exe(), "-C", str(path)] + C1, timeout=timeout)
if code != 0:
logging.error(f"{path.name} return code {code} {C1} {err}")
return None

logging.info(path.name)

# %% uncommitted changes
if out:
return path.name, out

# %% detect committed, but not pushed
code, branch, err = await subprocess_asyncio([git_exe(), "-C", str(path)] + C0, timeout=timeout)
if code != 0:
logging.error(f"{path.name} return code {code} {C0} {err}")
return None

C2 = [git_exe(), "-C", str(path), "diff", "--stat", f"origin/{branch}.."]
code, out, err = await subprocess_asyncio(C2, timeout=timeout)
if code != 0:
logging.error(f"{path.name} return code {code} {branch} {out} {err}")
return None

if out:
return path.name, out

return None


def git_status_serial(path: Path, timeout: float = TIMEOUT["local"]) -> tuple[str, str] | None:
"""
Notes which Git repos have local changes that haven't been pushed to remote
Parameters
----------
path : pathlib.Path
Git repo directory
Returns
-------
changes : tuple of pathlib.Path, str
Git repo local changes
"""

out = subprocess.check_output(
[git_exe(), "-C", str(path)] + C1, text=True, timeout=timeout
).strip()

logging.info(path.name)

# %% uncommitted changes
if out:
return path.name, out

# %% detect committed, but not pushed
branch = subprocess.check_output(
[git_exe(), "-C", str(path)] + C0, text=True, timeout=timeout
).strip()

C2 = [git_exe(), "-C", str(path), "diff", "--stat", f"origin/{branch}.."]
out = subprocess.check_output(C2, text=True, timeout=timeout).strip()

if out:
return path.name, out

return None
import pygit2

from . import _log
from .git import gitdirs
from .status_cmd import git_status_serial, git_status_async

async def git_status(path: Path, verbose: bool, timeout: float) -> list[str]:
c = MAGENTA if verbose else ""

changed = []
futures = [_git_status(d, timeout) for d in gitdirs(path)]
for r in asyncio.as_completed(futures, timeout=timeout):
if changes := await r:
changed.append(changes[0])
print(c + changes[0])
if verbose:
print(BLACK + changes[1])
def git_status(path: Path, verbose: bool) -> typing.Iterator[dict]:

return changed
for d in gitdirs(path):
repo = pygit2.Repository(d)
if status := repo.status():
yield status


def cli():
p = argparse.ArgumentParser(description="get status of many Git repos")
p.add_argument("path", help="path to look under", nargs="?", default="~/code")
p.add_argument("-v", "--verbose", action="store_true")
p.add_argument("-t", "--timeout", type=float)
p.add_argument("--serial", help="don't use asyncio parallel execution", action="store_true")
p.add_argument(
"-method",
help="use Git command line serial execution",
choices=["pygit2", "serial", "async"], default="pygit2",
)
P = p.parse_args()

_log(P.verbose)

if P.serial:
c = MAGENTA if P.verbose else ""
if P.method == "pygit2":
for s in git_status(P.path, P.verbose):
pprint(s)
elif P.method == "serial":
for d in gitdirs(P.path):
if changes := git_status_serial(d, P.timeout):
print(c + changes[0])
print(changes[0])
if P.verbose:
print(BLACK + changes[1])
else:
asyncio.run(git_status(P.path, P.verbose, P.timeout))
print(changes[1])
elif P.method == "async":
asyncio.run(git_status_async(P.path, P.verbose, P.timeout))


if __name__ == "__main__":
Expand Down
Loading

0 comments on commit 4cbba52

Please sign in to comment.