Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cli: introduce basic dvc du #10068

Merged
merged 1 commit into from
Dec 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions dvc/cli/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
data_sync,
destroy,
diff,
du,
experiments,
freeze,
gc,
Expand Down Expand Up @@ -95,6 +96,7 @@
data,
artifacts,
studio,
du,
]


Expand Down
80 changes: 80 additions & 0 deletions dvc/commands/du.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import argparse
import logging

from dvc.cli import completion
from dvc.cli.command import CmdBaseNoRepo
from dvc.cli.utils import DictAction, append_doc_link
from dvc.ui import ui

logger = logging.getLogger(__name__)


class CmdDU(CmdBaseNoRepo):
def run(self):
from dvc.repo import Repo
from dvc.utils.humanize import naturalsize

entries = Repo.du(
self.args.url,
self.args.path,
rev=self.args.rev,
summarize=self.args.summarize,
config=self.args.config,
remote=self.args.remote,
remote_config=self.args.remote_config,
)
ui.table([(naturalsize(size), path) for path, size in entries])
return 0


def add_parser(subparsers, parent_parser):
DU_HELP = "Show disk usage."
du_parser = subparsers.add_parser(
"du",
parents=[parent_parser],
description=append_doc_link(DU_HELP, "du"),
help=DU_HELP,
formatter_class=argparse.RawTextHelpFormatter,
)
du_parser.add_argument("url", help="Location of DVC repository")
du_parser.add_argument(
"--rev",
nargs="?",
help="Git revision (e.g. SHA, branch, tag)",
metavar="<commit>",
)
du_parser.add_argument(
"-s",
"--summarize",
action="store_true",
help="Show total disk usage.",
)
du_parser.add_argument(
"--config",
type=str,
help=(
"Path to a config file that will be merged with the config "
"in the target repository."
),
)
du_parser.add_argument(
"--remote",
type=str,
help="Remote name to set as a default in the target repository.",
)
du_parser.add_argument(
"--remote-config",
type=str,
nargs="*",
action=DictAction,
help=(
"Remote config options to merge with a remote's config (default or one "
"specified by '--remote') in the target repository."
),
)
du_parser.add_argument(
"path",
nargs="?",
help="Path to directory within the repository",
).complete = completion.DIR
du_parser.set_defaults(func=CmdDU)
41 changes: 41 additions & 0 deletions dvc/fs/dvc.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os
import posixpath
import threading
from collections import deque
from contextlib import ExitStack, suppress
from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Tuple, Type, Union

Expand Down Expand Up @@ -60,6 +61,7 @@ def _merge_info(repo, key, fs_info, dvc_info):
if fs_info:
ret["type"] = fs_info["type"]
ret["size"] = fs_info["size"]
ret["fs_info"] = fs_info
isexec = False
if fs_info["type"] == "file":
isexec = utils.is_exec(fs_info["mode"])
Expand Down Expand Up @@ -421,6 +423,45 @@ def get_file(self, rpath, lpath, **kwargs):
dvc_path = _get_dvc_path(dvc_fs, subkey)
return dvc_fs.get_file(dvc_path, lpath, **kwargs)

def du(self, path, total=True, maxdepth=None, withdirs=False, **kwargs):
if maxdepth is not None:
raise NotImplementedError

sizes = {}
dus = {}
todo = deque([self.info(path)])
while todo:
info = todo.popleft()
isdir = info["type"] == "directory"
size = info["size"] or 0
name = info["name"]

if not isdir:
sizes[name] = size
continue

dvc_info = info.get("dvc_info") or {}
fs_info = info.get("fs_info")
entry = dvc_info.get("entry")
if (
dvc_info
and not fs_info
and entry is not None
and entry.size is not None
):
dus[name] = entry.size
continue

if withdirs:
sizes[name] = size

todo.extend(self.ls(info["name"], detail=True))

if total:
return sum(sizes.values()) + sum(dus.values())

return sizes

def close(self):
self._repo_stack.close()

Expand Down
2 changes: 2 additions & 0 deletions dvc/repo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ class Repo:
from dvc.repo.commit import commit # type: ignore[misc]
from dvc.repo.destroy import destroy # type: ignore[misc]
from dvc.repo.diff import diff # type: ignore[misc]
from dvc.repo.du import du as _du # type: ignore[misc]
from dvc.repo.fetch import fetch # type: ignore[misc]
from dvc.repo.freeze import freeze, unfreeze # type: ignore[misc]
from dvc.repo.gc import gc # type: ignore[misc]
Expand All @@ -93,6 +94,7 @@ class Repo:
from .cache import check_missing as cache_check_missing # type: ignore[misc]
from .data import status as data_status # type: ignore[misc]

du = staticmethod(_du)
ls = staticmethod(_ls)
ls_url = staticmethod(_ls_url)
get = staticmethod(_get)
Expand Down
42 changes: 42 additions & 0 deletions dvc/repo/du.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from typing import Any, Dict, Optional, Union


def du(
url: str,
path: Optional[str] = None,
rev: Optional[str] = None,
summarize: bool = False,
config: Union[None, Dict[str, Any], str] = None,
remote: Optional[str] = None,
remote_config: Optional[dict] = None,
):
from dvc.config import Config

from . import Repo

if config and not isinstance(config, dict):
config_dict = Config.load_file(config)
else:
config_dict = None

with Repo.open(
url,
rev=rev,
subrepos=True,
uninitialized=True,
config=config_dict,
remote=remote,
remote_config=remote_config,
) as repo:
path = path or ""

fs = repo.dvcfs

if summarize or not fs.isdir(path):
return [(path, fs.du(path, total=True))]

ret = [
(entry_path, fs.du(entry_path, total=True)) for entry_path in fs.ls(path)
]
ret.append((path, sum(entry[1] for entry in ret)))
return ret
49 changes: 49 additions & 0 deletions tests/func/test_du.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import os


def test_du(tmp_dir, dvc):
tmp_dir.gen(
{
"file": b"file",
"dvcfile": b"dvcfile",
"dir": {
"dirfile": b"dirfile",
"subdir": {
"subdirfile": b"subdirfile",
},
"dvcsubdir": {
"dvcsubdirfile": b"dvcsubdirfile",
},
},
}
)

dvc.add("dvcfile")
dvc.add(os.path.join("dir", "dvcsubdir"))

assert dvc.du(".", "file") == [("file", 4)]
assert dvc.du(".", "dvcfile") == [("dvcfile", 7)]
assert set(dvc.du(".", "dir/subdir")) == {
("dir/subdir/subdirfile", 10),
("dir/subdir", 10),
}
assert dvc.du(".", "dir/subdir", summarize=True) == [("dir/subdir", 10)]
assert set(dvc.du(".", "dir/dvcsubdir")) == {
("dir/dvcsubdir/dvcsubdirfile", 13),
("dir/dvcsubdir", 13),
}
assert dvc.du(".", "dir/dvcsubdir", summarize=True) == [("dir/dvcsubdir", 13)]
assert set(dvc.du(".", "dir")) == {
("dir/dvcsubdir", 13),
("dir/subdir", 10),
("dir/dirfile", 7),
("dir", 30),
}
assert dvc.du(".", "dir", summarize=True) == [("dir", 30)]
assert set(dvc.du(".", "/")) == {
("/dvcfile", 7),
("/dir", 30),
("/file", 4),
("/", 41),
}
assert dvc.du(".", "/", summarize=True) == [("/", 41)]
21 changes: 21 additions & 0 deletions tests/unit/command/test_du.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from dvc.cli import parse_args
from dvc.commands.du import CmdDU


def test_du(mocker):
cli_args = parse_args(["du", "myurl", "mypath", "--summarize", "--rev", "myrev"])
assert cli_args.func == CmdDU

cmd = cli_args.func(cli_args)
mock_du = mocker.patch("dvc.repo.Repo.du")

assert cmd.run() == 0
mock_du.assert_called_once_with(
"myurl",
"mypath",
rev="myrev",
summarize=True,
config=None,
remote=None,
remote_config=None,
)