Skip to content

Commit

Permalink
add: merge existing file meta for versioned dirs
Browse files Browse the repository at this point in the history
  • Loading branch information
pmrowla committed Nov 21, 2022
1 parent 014d149 commit 2a1af0b
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 4 deletions.
22 changes: 22 additions & 0 deletions dvc/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -1203,6 +1203,28 @@ def restore_fields(self, other: "Output"):
self.remote = other.remote
self.can_push = other.can_push

def merge_version_meta(self, other: "Output"):
"""Merge version meta for files which are unchanged from other."""
if not self.hash_info:
return
if self.hash_info.isdir:
return self._merge_dir_version_meta(other)
if self.hash_info != other.hash_info:
return
self.meta = other.meta

def _merge_dir_version_meta(self, other: "Output"):
from dvc_data.hashfile.tree import update_meta

if not self.obj or not other.hash_info.isdir:
return
other_obj = other.obj if other.obj is not None else other.get_obj()
assert isinstance(self.obj, Tree) and isinstance(other_obj, Tree)
updated = update_meta(self.obj, other_obj)
assert updated.hash_info == self.obj.hash_info
self.obj = updated
self.files = updated.as_list(with_meta=True)


META_SCHEMA = {
Meta.PARAM_SIZE: int,
Expand Down
2 changes: 1 addition & 1 deletion dvc/repo/add.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def add( # noqa: C901
stage.transfer(source, to_remote=to_remote, odb=odb, **kwargs)
else:
try:
stage.save()
stage.save(merge_versioned=True)
if not no_commit:
stage.commit()
except CacheLinkError:
Expand Down
33 changes: 30 additions & 3 deletions dvc/stage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,10 +463,12 @@ def compute_md5(self):
logger.debug("Computed %s md5: '%s'", self, m)
return m

def save(self, allow_missing=False):
def save(self, allow_missing: bool = False, merge_versioned: bool = False):
self.save_deps(allow_missing=allow_missing)

self.save_outs(allow_missing=allow_missing)
self.save_outs(
allow_missing=allow_missing, merge_versioned=merge_versioned
)
self.md5 = self.compute_md5()

self.repo.stage_cache.save(self)
Expand All @@ -481,15 +483,40 @@ def save_deps(self, allow_missing=False):
if not allow_missing:
raise

def save_outs(self, allow_missing=False):
def save_outs(
self, allow_missing: bool = False, merge_versioned: bool = False
):
from dvc.output import OutputDoesNotExistError

from .exceptions import StageFileDoesNotExistError, StageNotFound

if merge_versioned:
try:
old = self.reload()
old_outs = {out.def_path: out for out in old.outs}
merge_versioned = any(
(
out.files is not None
or (
out.meta is not None
and out.meta.version_id is not None
)
)
for out in old_outs.values()
)
except (StageFileDoesNotExistError, StageNotFound):
merge_versioned = False

for out in self.outs:
try:
out.save()
except OutputDoesNotExistError:
if not (allow_missing or out.checkpoint):
raise
if merge_versioned:
old_out = old_outs.get(out.def_path)
if old_out is not None:
out.merge_version_meta(old_out)

def ignore_outs(self):
for out in self.outs:
Expand Down

0 comments on commit 2a1af0b

Please sign in to comment.