diff --git a/dvc/commands/add.py b/dvc/commands/add.py index 575c36ce17..523b97ea7b 100644 --- a/dvc/commands/add.py +++ b/dvc/commands/add.py @@ -64,7 +64,9 @@ def run(self): jobs=self.args.jobs, force=self.args.force, ) - + except FileNotFoundError: + logger.exception("") + return 1 except DvcException: logger.exception("") return 1 diff --git a/dvc/output.py b/dvc/output.py index cadf48f995..6a70b23a94 100644 --- a/dvc/output.py +++ b/dvc/output.py @@ -1,10 +1,11 @@ +import errno import logging import os import posixpath from collections import defaultdict from contextlib import suppress from operator import itemgetter -from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Type +from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Type, Union from urllib.parse import urlparse from funcy import collecting, first, project @@ -25,11 +26,12 @@ from dvc_data.hashfile import load as oload from dvc_data.hashfile.build import build from dvc_data.hashfile.checkout import checkout +from dvc_data.hashfile.db import HashFileDB, add_update_tree from dvc_data.hashfile.hash_info import HashInfo from dvc_data.hashfile.istextfile import istextfile from dvc_data.hashfile.meta import Meta from dvc_data.hashfile.transfer import transfer as otransfer -from dvc_data.hashfile.tree import Tree +from dvc_data.hashfile.tree import Tree, du from dvc_objects.errors import ObjectFormatError from .annotations import ANNOTATION_FIELDS, ANNOTATION_SCHEMA, Annotation @@ -39,7 +41,6 @@ from .utils.fs import path_isin if TYPE_CHECKING: - from dvc_data.hashfile.db import HashFileDB from dvc_data.hashfile.obj import HashFile from dvc_data.index import DataIndexKey from dvc_objects.db import ObjectDB @@ -417,8 +418,6 @@ def __init__( # noqa: PLR0913 def _compute_meta_hash_info_from_files(self) -> None: if self.files: - from dvc_data.hashfile.db import HashFileDB - tree = Tree.from_list(self.files, hash_name=self.hash_name) tree.digest(with_meta=True) self.odb = HashFileDB(tree.fs, tree.path + ".odb") @@ -1008,7 +1007,7 @@ def unprotect(self): if self.exists: self.cache.unprotect(self.fs_path) - def get_dir_cache(self, **kwargs): + def get_dir_cache(self, **kwargs) -> Optional["Tree"]: if not self.is_dir_checksum: raise DvcException("cannot get dir cache for file checksum") @@ -1022,14 +1021,17 @@ def get_dir_cache(self, **kwargs): self.repo.cloud.pull([obj.hash_info], **kwargs) if self.obj: + assert isinstance(self.obj, Tree) return self.obj try: - self.obj = oload(self.cache, self.hash_info) + obj = oload(self.cache, self.hash_info) + assert isinstance(obj, Tree) except (FileNotFoundError, ObjectFormatError): - self.obj = None + obj = None - return self.obj + self.obj = obj + return obj def _collect_used_dir_cache( self, remote=None, force=False, jobs=None, filter_info=None @@ -1165,7 +1167,7 @@ def _check_can_merge(self, out): def merge(self, ancestor, other, allowed=None): from dvc_data.hashfile.tree import MergeError as TreeMergeError - from dvc_data.hashfile.tree import du, merge + from dvc_data.hashfile.tree import merge assert other @@ -1198,6 +1200,160 @@ def merge(self, ancestor, other, allowed=None): nfiles=len(merged), ) + def unstage(self, path: str) -> Tuple["Meta", "Tree"]: + from pygtrie import Trie + + from dvc_objects.fs.path import Path + + assert isinstance(self.fs.path, Path) + rel_key = tuple(self.fs.path.parts(self.fs.path.relpath(path, self.fs_path))) + + if not self.hash_info: + tree = Tree() + else: + tree = self.get_dir_cache() or Tree() + + trie = tree.as_trie() + assert isinstance(trie, Trie) + + try: + del trie[rel_key:] # type: ignore[misc] + except KeyError: + raise FileNotFoundError( # noqa: B904 + errno.ENOENT, + os.strerror(errno.ENOENT), + self.fs.path.relpath(path), + ) + + new = tree.from_trie(trie) + new.digest() + return Meta(nfiles=len(new), isdir=True), new + + def apply( + self, + path: str, + obj: Union["Tree", "HashFile"], + meta: "Meta", + ) -> Tuple["Meta", "Tree"]: + from pygtrie import Trie + + from dvc_objects.fs.path import Path + + assert isinstance(self.fs.path, Path) + append_only = True + rel_key = tuple(self.fs.path.parts(self.fs.path.relpath(path, self.fs_path))) + + if not self.hash_info: + tree = Tree() + else: + tree = self.get_dir_cache() or Tree() + + trie = tree.as_trie() + assert isinstance(trie, Trie) + + try: + del trie[rel_key:] # type: ignore[misc] + except KeyError: + pass + else: + append_only = False + + items = {} + if isinstance(obj, Tree): + items = {(*rel_key, *key): (m, o) for key, m, o in obj} + else: + items = {rel_key: (meta, obj.hash_info)} + trie.update(items) + + new = Tree.from_trie(trie) + new.digest() + + size = self.meta.size if self.meta and self.meta.size else None + if append_only and size and meta.size is not None: + # if files were only appended, we can sum to the existing size + size += meta.size + elif self.hash_info and self.hash_info == new.hash_info: + # if hashes are same, sizes must have been the same + size = self.meta.size + else: + size = None + + meta = Meta(nfiles=len(new), size=size, isdir=True) + return meta, new + + def add( # noqa: C901 + self, path: Optional[str] = None, no_commit: bool = False, relink: bool = True + ) -> Optional["HashFile"]: + path = path or self.fs_path + if self.hash_info and not self.is_dir_checksum and self.fs_path != path: + raise DvcException( + f"Cannot modify '{self}' which is being tracked as a file" + ) + + assert self.repo + cache = self.cache if self.use_cache else self.repo.cache.local + assert isinstance(cache, HashFileDB) + + new: "HashFile" + try: + assert self.hash_name + staging, meta, obj = build( + cache, + path, + self.fs, + self.hash_name, + ignore=self.dvcignore, + dry_run=not self.use_cache, + ) + except FileNotFoundError as exc: + if self.fs_path == path: + raise self.DoesNotExistError(self) from exc + if not self.is_dir_checksum: + raise + + meta, new = self.unstage(path) + staging, obj = None, None + else: + assert obj + assert staging + if self.fs_path != path: + meta, new = self.apply(path, obj, meta) + add_update_tree(staging, new) + else: + new = obj + + self.obj = new + self.hash_info = self.obj.hash_info + self.meta = meta + self.files = None + self.ignore() + + if no_commit or not self.use_cache: + return obj + + if isinstance(new, Tree): + add_update_tree(cache, new) + + if not obj: + return obj + + assert staging + assert obj.hash_info + otransfer(staging, self.cache, {obj.hash_info}, hardlink=relink, shallow=False) + + if relink: + self._checkout( + path, + self.fs, + obj, + self.cache, + relink=True, + state=self.repo.state, + prompt=prompt.confirm, + ) + self.set_exec() + return obj + @property def fspath(self): return self.fs_path diff --git a/dvc/repo/add.py b/dvc/repo/add.py index 98a57848e7..50e96c39e1 100644 --- a/dvc/repo/add.py +++ b/dvc/repo/add.py @@ -2,14 +2,16 @@ import os from contextlib import contextmanager from itertools import tee -from typing import TYPE_CHECKING, Any, Iterator, List, Optional +from typing import TYPE_CHECKING, Any, Iterator, List, NamedTuple, Optional import colorama from dvc.exceptions import ( CacheLinkError, + DvcException, InvalidArgumentError, OutputDuplicationError, + OutputNotFoundError, OverlappingOutputPathsError, RecursiveAddingWhileUsingFilename, ) @@ -29,6 +31,11 @@ logger = logging.getLogger(__name__) +class StageInfo(NamedTuple): + stage: "Stage" + output_exists: bool + + OVERLAPPING_CHILD_FMT = ( "Cannot add '{out}', because it is overlapping with other " "DVC tracked output: '{parent}'.\n" @@ -114,7 +121,7 @@ def translate_graph_error(stages: Stages) -> Iterator[None]: ) -def progress_iter(stages: Stages) -> Iterator["Stage"]: +def progress_iter(stages: List[StageInfo]) -> Iterator["StageInfo"]: total = len(stages) desc = "Adding..." with ui.progress(stages, total=total, desc=desc, unit="file", leave=True) as pbar: @@ -122,10 +129,10 @@ def progress_iter(stages: Stages) -> Iterator["Stage"]: pbar.bar_format = desc pbar.refresh() - for stage in pbar: + for item in pbar: if total > 1: - pbar.set_msg(f"{stage.outs[0]}") - yield stage + pbar.set_msg(f"{item.stage.outs[0]}") + yield item if total == 1: # restore bar format for stats # pylint: disable=no-member pbar.bar_format = pbar.BAR_FMT_DEFAULT @@ -183,12 +190,14 @@ def add( desc = "Collecting targets" stages_it = create_stages(repo, add_targets, fname, transfer, **kwargs) stages = list(ui.progress(stages_it, desc=desc, unit="file")) + + stages_list = [stage for stage, _ in stages] msg = "Collecting stages from the workspace" - with translate_graph_error(stages), ui.status(msg) as status: + with translate_graph_error(stages_list), ui.status(msg) as status: # remove existing stages that are to-be replaced with these # new stages for the graph checks. repo.check_graph( - stages=stages, callback=lambda: status.update("Checking graph") + stages=stages_list, callback=lambda: status.update("Checking graph") ) odb = None @@ -196,18 +205,18 @@ def add( odb = repo.cloud.get_remote_odb(kwargs.get("remote"), "add") with warn_link_failures() as link_failures: - for stage, source in zip(progress_iter(stages), sources): + for (stage, output_exists), source in zip(progress_iter(stages), sources): + out = stage.outs[0] if to_remote or to_cache: stage.transfer(source, to_remote=to_remote, odb=odb, **kwargs) else: try: - stage.save() - if not no_commit: - stage.commit() + path = out.fs.path.abspath(source) if output_exists else None + stage.add_outs(path, no_commit=no_commit) except CacheLinkError: link_failures.append(str(stage.relpath)) stage.dump() - return stages + return stages_list LARGE_DIR_RECURSIVE_ADD_WARNING = ( @@ -263,7 +272,7 @@ def create_stages( external: bool = False, force: bool = False, **kwargs: Any, -) -> Iterator["Stage"]: +) -> Iterator[StageInfo]: for target in targets: if kwargs.get("out"): target = resolve_output(target, kwargs["out"], force=force) @@ -271,16 +280,24 @@ def create_stages( repo, target, always_local=transfer and not kwargs.get("out") ) - stage = repo.stage.create( - single_stage=True, - validate=False, - fname=fname or path, - wdir=wdir, - outs=[out], - external=external, - force=force, - ) + try: + (out_obj,) = repo.find_outs_by_path(target, strict=False) + stage = out_obj.stage + if not stage.is_data_source: + raise DvcException(f"cannot update {out!r}: not a data source") + output_exists = True + except OutputNotFoundError: + stage = repo.stage.create( + single_stage=True, + validate=False, + fname=fname or path, + wdir=wdir, + outs=[out], + external=external, + force=force, + ) + output_exists = False out_obj = stage.outs[0] out_obj.annot.update(**kwargs) - yield stage + yield StageInfo(stage, output_exists) diff --git a/dvc/stage/__init__.py b/dvc/stage/__init__.py index d982859494..1f00367845 100644 --- a/dvc/stage/__init__.py +++ b/dvc/stage/__init__.py @@ -495,34 +495,34 @@ def save_deps(self, allow_missing=False): if not allow_missing: raise - def save_outs(self, allow_missing: bool = False): - from dvc.output import OutputDoesNotExistError - + def get_versioned_outs(self) -> Dict[str, "Output"]: from .exceptions import StageFileDoesNotExistError, StageNotFound try: old = self.reload() - old_outs = {out.def_path: out for out in old.outs} - merge_versioned = any( - ( - out.files is not None - or (out.meta is not None and out.meta.version_id is not None) - ) - for out in old_outs.values() - ) except (StageFileDoesNotExistError, StageNotFound): - merge_versioned = False + return {} + + return { + out.def_path: out + for out in old.outs + if out.files is not None + or (out.meta is not None and out.meta.version_id is not None) + } + def save_outs(self, allow_missing: bool = False): + from dvc.output import OutputDoesNotExistError + + old_versioned_outs = self.get_versioned_outs() for out in self.outs: try: out.save() except OutputDoesNotExistError: if not (allow_missing or out.checkpoint): raise - if merge_versioned: - old_out = old_outs.get(out.def_path) - if old_out is not None: - out.merge_version_meta(old_out) + + if old_out := old_versioned_outs.get(out.def_path): + out.merge_version_meta(old_out) def ignore_outs(self) -> None: for out in self.outs: @@ -560,6 +560,29 @@ def commit(self, allow_missing=False, filter_info=None, **kwargs) -> None: if link_failures: raise CacheLinkError(link_failures) + @rwlocked(write=["outs"]) + def add_outs( # noqa: C901 + self, filter_info=None, allow_missing: bool = False, **kwargs + ): + from dvc.output import OutputDoesNotExistError + + link_failures = [] + old_versioned_outs = self.get_versioned_outs() + for out in self.filter_outs(filter_info): + try: + out.add(filter_info, **kwargs) + except (FileNotFoundError, OutputDoesNotExistError): + if not (allow_missing or out.checkpoint): + raise + except CacheLinkError: + link_failures.append(filter_info or out.fs_path) + + if old_out := old_versioned_outs.get(out.def_path): + out.merge_version_meta(old_out) + + if link_failures: + raise CacheLinkError(link_failures) + @rwlocked(read=["deps", "outs"]) def run( self, diff --git a/pyproject.toml b/pyproject.toml index d61e4bf12f..231a9b19b4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ dependencies = [ "configobj>=5.0.6", "distro>=1.3", "dpath<3,>=2.1.0", - "dvc-data>=0.47.1,<0.48", + "dvc-data>=0.47.5,<0.48", "dvc-http>=2.29.0", "dvc-render>=0.3.1,<1", "dvc-studio-client>=0.9.0,<1", diff --git a/tests/func/test_add.py b/tests/func/test_add.py index d2268778d5..1a752654cc 100644 --- a/tests/func/test_add.py +++ b/tests/func/test_add.py @@ -19,7 +19,6 @@ from dvc.exceptions import ( DvcException, InvalidArgumentError, - OutputDuplicationError, OverlappingOutputPathsError, RecursiveAddingWhileUsingFilename, ) @@ -388,25 +387,25 @@ def test_should_update_state_entry_for_directory_after_add(mocker, dvc, tmp_dir) ret = main(["add", "data"]) assert ret == 0 - assert file_md5_counter.mock.call_count == 5 + assert file_md5_counter.mock.call_count == 4 ret = main(["status"]) assert ret == 0 - assert file_md5_counter.mock.call_count == 6 + assert file_md5_counter.mock.call_count == 5 ls = "dir" if os.name == "nt" else "ls" ret = main(["run", "--single-stage", "-d", "data", "{} {}".format(ls, "data")]) assert ret == 0 - assert file_md5_counter.mock.call_count == 8 + assert file_md5_counter.mock.call_count == 7 os.rename("data", "data.back") ret = main(["checkout"]) assert ret == 0 - assert file_md5_counter.mock.call_count == 8 + assert file_md5_counter.mock.call_count == 7 ret = main(["status"]) assert ret == 0 - assert file_md5_counter.mock.call_count == 10 + assert file_md5_counter.mock.call_count == 9 def test_add_commit(tmp_dir, dvc): @@ -427,15 +426,15 @@ def test_should_collect_dir_cache_only_once(mocker, tmp_dir, dvc): counter = mocker.spy(dvc_data.hashfile.build, "_build_tree") ret = main(["add", "data"]) assert ret == 0 - assert counter.mock.call_count == 3 + assert counter.mock.call_count == 2 ret = main(["status"]) assert ret == 0 - assert counter.mock.call_count == 4 + assert counter.mock.call_count == 3 ret = main(["status"]) assert ret == 0 - assert counter.mock.call_count == 5 + assert counter.mock.call_count == 4 def test_should_place_stage_in_data_dir_if_repository_below_symlink( @@ -718,13 +717,7 @@ def test_add_from_data_dir(tmp_dir, scm, dvc): tmp_dir.gen({"dir": {"file2": "file2 content"}}) - with pytest.raises(OverlappingOutputPathsError) as e: - dvc.add(os.path.join("dir", "file2"), fname="file2.dvc") - assert str(e.value) == ( - "Cannot add '{out}', because it is overlapping with other DVC " - "tracked output: 'dir'.\n" - "To include '{out}' in 'dir', run 'dvc commit dir.dvc'" - ).format(out=os.path.join("dir", "file2")) + dvc.add(os.path.join("dir", "file2")) def test_add_parent_dir(tmp_dir, scm, dvc): @@ -782,10 +775,10 @@ def test_add_optimization_for_hardlink_on_empty_files(tmp_dir, dvc, mocker): assert os.path.exists(stage.outs[0].cache_path) -def test_output_duplication_for_pipeline_tracked(tmp_dir, dvc, run_copy): +def test_try_adding_pipeline_tracked_output(tmp_dir, dvc, run_copy): tmp_dir.dvc_gen("foo", "foo") run_copy("foo", "bar", name="copy-foo-bar") - with pytest.raises(OutputDuplicationError): + with pytest.raises(DvcException, match="cannot update 'bar': not a data source"): dvc.add("bar") @@ -1102,8 +1095,7 @@ def test_add_on_not_existing_file_should_not_remove_stage_file(tmp_dir, dvc): "target", [ "dvc.repo.index.Index.check_graph", - "dvc.stage.Stage.save", - "dvc.stage.Stage.commit", + "dvc.stage.Stage.add_outs", ], ) def test_add_does_not_remove_stage_file_on_failure(tmp_dir, dvc, mocker, target): diff --git a/tests/func/test_virtual_directory.py b/tests/func/test_virtual_directory.py new file mode 100644 index 0000000000..6aec736448 --- /dev/null +++ b/tests/func/test_virtual_directory.py @@ -0,0 +1,197 @@ +import shutil +from os.path import join + +from dvc_data.hashfile.hash_info import HashInfo +from dvc_data.hashfile.meta import Meta + + +def test_virtual_add(tmp_dir, dvc, remote): + tmp_dir.gen({"dir": {"foo": "foo", "bar": "bar"}}) + + (stage,) = dvc.add("dir") + out = stage.outs[0] + + assert out.hash_info == HashInfo( + name="md5", value="5ea40360f5b4ec688df672a4db9c17d1.dir" + ) + assert out.meta == Meta(isdir=True, size=6, nfiles=2) + + assert dvc.push() == 3 + dvc.cache.local.clear() + + tmp_dir.gen( + {"dir": {"foobar": "foobar", "lorem": "ipsum", "subdir": {"file": "file"}}} + ) + (stage,) = dvc.add("dir/foobar") + + out = stage.outs[0] + assert out.hash_info == HashInfo( + name="md5", value="a5beca056acbef9e0013347efdc2b751.dir" + ) + assert out.meta == Meta(isdir=True, size=12, nfiles=3) + assert dvc.push() == 2 + + (stage,) = dvc.add("dir/subdir") + out = stage.outs[0] + assert out.hash_info == HashInfo( + name="md5", value="de78e9fff7c3478c6b316bf08437d0f6.dir" + ) + assert out.meta == Meta(isdir=True, size=16, nfiles=4) + assert dvc.push() == 2 + + +def test_virtual_remove(tmp_dir, dvc, remote): + tmp_dir.gen( + { + "dir": { + "foo": "foo", + "bar": "bar", + "subdir": {"lorem": "lorem", "ipsum": "ipsum"}, + } + } + ) + + (stage,) = dvc.add("dir") + out = stage.outs[0] + + assert out.hash_info == HashInfo( + name="md5", value="15b0e3c73ad2c748ce206988cb6b7319.dir" + ) + assert out.meta == Meta(isdir=True, size=16, nfiles=4) + + assert dvc.push() == 5 + dvc.cache.local.clear() + + (tmp_dir / "dir" / "foo").unlink() + (stage,) = dvc.add("dir/foo") + + out = stage.outs[0] + assert out.hash_info == HashInfo( + name="md5", value="991ea7d558d320d8817a0798e9c676f1.dir" + ) + assert out.meta == Meta(isdir=True, size=None, nfiles=3) + + assert dvc.push() == 1 + + shutil.rmtree(tmp_dir / "dir" / "subdir") + (stage,) = dvc.add("dir/subdir") + + out = stage.outs[0] + assert out.hash_info == HashInfo( + name="md5", value="91aaa9bb58b657d623ef143b195a67e4.dir" + ) + assert out.meta == Meta(isdir=True, size=None, nfiles=1) + assert dvc.push() == 1 + + +def test_virtual_update_dir(tmp_dir, dvc, remote): + tmp_dir.gen({"dir": {"foo": "foo", "subdir": {"lorem": "lorem"}}}) + (stage,) = dvc.add("dir") + out = stage.outs[0] + + assert out.hash_info == HashInfo( + name="md5", value="22a16c9bf84b3068bc2206d88a6b5776.dir" + ) + assert out.meta == Meta(isdir=True, size=8, nfiles=2) + + assert dvc.push() == 3 + dvc.cache.local.clear() + shutil.rmtree("dir") + + tmp_dir.gen({"dir": {"subdir": {"ipsum": "lorem ipsum", "file": "file"}}}) + (stage,) = dvc.add("dir/subdir") + + out = stage.outs[0] + assert out.hash_info == HashInfo( + name="md5", value="32f5734ea1a2aa1a067c0c15f0ae5781.dir" + ) + assert out.meta == Meta(isdir=True, size=None, nfiles=3) + assert dvc.push() == 3 + + +def test_virtual_update_file(tmp_dir, dvc, remote): + tmp_dir.gen({"dir": {"foo": "foo", "subdir": {"lorem": "lorem"}}}) + (stage,) = dvc.add("dir") + out = stage.outs[0] + + assert out.hash_info == HashInfo( + name="md5", value="22a16c9bf84b3068bc2206d88a6b5776.dir" + ) + assert out.meta == Meta(isdir=True, size=8, nfiles=2) + + assert dvc.push() == 3 + dvc.cache.local.clear() + shutil.rmtree("dir") + + tmp_dir.gen({"dir": {"foo": "foobar"}}) + (stage,) = dvc.add("dir/foo") + out = stage.outs[0] + assert out.hash_info == HashInfo( + name="md5", value="49408ac059c76086a3a892129a324b60.dir" + ) + assert out.meta == Meta(isdir=True, size=None, nfiles=2) + assert dvc.push() == 2 + + +def test_virtual_update_noop(tmp_dir, dvc, remote): + tmp_dir.gen({"dir": {"foo": "foo", "subdir": {"lorem": "lorem"}}}) + + (stage,) = dvc.add("dir") + out = stage.outs[0] + hash_info = HashInfo(name="md5", value="22a16c9bf84b3068bc2206d88a6b5776.dir") + meta = Meta(isdir=True, size=8, nfiles=2) + + assert out.hash_info == hash_info + assert out.meta == meta + assert dvc.push() == 3 + + dvc.cache.local.clear() + shutil.rmtree("dir") + + tmp_dir.gen({"dir": {"foo": "foo", "subdir": {"lorem": "lorem"}}}) + + (stage,) = dvc.add("dir/foo") + out = stage.outs[0] + assert out.hash_info == hash_info + assert out.meta == meta + assert not dvc.push() + + dvc.cache.local.clear() + + (stage,) = dvc.add("dir/subdir") + out = stage.outs[0] + assert out.hash_info == hash_info + assert out.meta == meta + assert not dvc.push() + + +def test_partial_checkout_and_update(M, tmp_dir, dvc, remote): + tmp_dir.gen({"dir": {"foo": "foo", "subdir": {"lorem": "lorem"}}}) + + (stage,) = dvc.add("dir") + out = stage.outs[0] + + assert out.hash_info == HashInfo( + name="md5", value="22a16c9bf84b3068bc2206d88a6b5776.dir" + ) + assert out.meta == Meta(isdir=True, size=8, nfiles=2) + + assert dvc.push() == 3 + dvc.cache.local.clear() + shutil.rmtree("dir") + + assert dvc.pull("dir/subdir") == M.dict( + added=[join("dir", "")], + fetched=1, + ) + assert (tmp_dir / "dir").read_text() == {"subdir": {"lorem": "lorem"}} + + tmp_dir.gen({"dir": {"subdir": {"ipsum": "ipsum"}}}) + (stage,) = dvc.add("dir/subdir/ipsum") + + out = stage.outs[0] + assert out.hash_info == HashInfo( + name="md5", value="06d953a10e0b0ffacba04876a9351e39.dir" + ) + assert out.meta == Meta(isdir=True, size=13, nfiles=3) + assert dvc.push() == 2 diff --git a/tests/unit/repo/test_repo.py b/tests/unit/repo/test_repo.py index 74fa4a5db0..ba7ea13e18 100644 --- a/tests/unit/repo/test_repo.py +++ b/tests/unit/repo/test_repo.py @@ -89,7 +89,6 @@ def test_skip_graph_checks(tmp_dir, dvc, mocker, run_copy): mock_build_graph.reset_mock() dvc._skip_graph_checks = True tmp_dir.gen("baz", "baz text") - dvc.add("baz") run_copy("baz", "qux", single_stage=True) assert not mock_build_graph.called @@ -97,7 +96,6 @@ def test_skip_graph_checks(tmp_dir, dvc, mocker, run_copy): mock_build_graph.reset_mock() dvc._skip_graph_checks = False tmp_dir.gen("quux", "quux text") - dvc.add("quux") run_copy("quux", "quuz", single_stage=True) assert mock_build_graph.called