From 2e97e3c17da4f61387dbf6c55442cafef7055c68 Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Thu, 1 Sep 2022 15:08:07 +0900 Subject: [PATCH] output: save version_id meta --- dvc/config_schema.py | 1 + dvc/output.py | 41 ++++++++++++++++++++++++++++---- tests/unit/output/test_output.py | 1 + 3 files changed, 39 insertions(+), 4 deletions(-) diff --git a/dvc/config_schema.py b/dvc/config_schema.py index 758c56915a4..0d6ff5ee907 100644 --- a/dvc/config_schema.py +++ b/dvc/config_schema.py @@ -79,6 +79,7 @@ class RelPath(str): "checksum_jobs": All(Coerce(int), Range(1)), "jobs": All(Coerce(int), Range(1)), Optional("no_traverse"): Bool, # obsoleted + Optional("version_aware"): Bool, } LOCAL_COMMON = { "type": supported_cache_type, diff --git a/dvc/output.py b/dvc/output.py index bad7e9f5284..a13564dc701 100644 --- a/dvc/output.py +++ b/dvc/output.py @@ -288,7 +288,15 @@ def __init__( repo=None, ): self.repo = stage.repo if not repo and stage else repo - fs_cls, fs_config, fs_path = get_cloud_fs(self.repo, url=path) + meta = Meta.from_dict(info) + # NOTE: when version_aware is not passed into get_cloud_fs, it will be + # set based on whether or not path is versioned + fs_kwargs = {"version_aware": True} if meta.version_id else {} + fs_cls, fs_config, fs_path = get_cloud_fs( + self.repo, + url=path, + **fs_kwargs, + ) self.fs = fs_cls(**fs_config) if ( @@ -320,7 +328,7 @@ def __init__( # By resolved path, which contains actual location, # should be absolute and don't contain remote:// refs. self.stage = stage - self.meta = Meta.from_dict(info) + self.meta = meta self.hash_info = HashInfo.from_dict(info) self.use_cache = False if self.IS_DEPENDENCY else cache self.metric = False if self.IS_DEPENDENCY else metric @@ -335,6 +343,12 @@ def __init__( self.remote = remote + if self.fs.version_aware: + _, version_id = self.fs.path.coalesce_version( + self.def_path, self.meta.version_id + ) + self.meta.version_id = version_id + def _parse_path(self, fs, fs_path): parsed = urlparse(self.def_path) if ( @@ -360,6 +374,10 @@ def __repr__(self): def __str__(self): if self.fs.protocol != "local": + if self.meta.version_id: + return self.fs.path.version_path( + self.def_path, self.meta.version_id + ) return self.def_path if ( @@ -419,13 +437,17 @@ def cache_path(self): ) def get_hash(self): + _, hash_info = self._get_hash_meta() + return hash_info + + def _get_hash_meta(self): if self.use_cache: odb = self.odb name = self.odb.fs.PARAM_CHECKSUM else: odb = self.repo.odb.local name = self.fs.PARAM_CHECKSUM - _, _, obj = build( + _, meta, obj = build( odb, self.fs_path, self.fs, @@ -433,7 +455,11 @@ def get_hash(self): ignore=self.dvcignore, dry_run=not self.use_cache, ) - return obj.hash_info + return meta, obj.hash_info + + def get_meta(self) -> Meta: + meta, _ = self._get_hash_meta() + return meta @property def is_dir_checksum(self): @@ -469,6 +495,11 @@ def changed_cache(self, filter_info=None): except (FileNotFoundError, ObjectFormatError): return True + def changed_meta(self) -> bool: + if self.fs.version_aware and self.meta.version_id: + return self.meta.version_id == self.get_meta().version_id + return False + def workspace_status(self): if not self.exists: return {str(self): "deleted"} @@ -814,6 +845,7 @@ def move(self, out): self.fs.move(self.fs_path, out.fs_path) self.def_path = out.def_path self.fs_path = out.fs_path + self.__dict__.pop("fs_path") self.save() self.commit() @@ -1095,6 +1127,7 @@ def is_plot(self) -> bool: Meta.PARAM_SIZE: int, Meta.PARAM_NFILES: int, Meta.PARAM_ISEXEC: bool, + Meta.PARAM_VERSION_ID: str, } SCHEMA = { diff --git a/tests/unit/output/test_output.py b/tests/unit/output/test_output.py index 7f0cd736bcc..fb728124e17 100644 --- a/tests/unit/output/test_output.py +++ b/tests/unit/output/test_output.py @@ -90,6 +90,7 @@ def test_get_used_objs(exists, expected_message, mocker, caplog): mocker.patch.object( stage.repo.dvcignore, "check_ignore", return_value=_no_match("path") ) + stage.repo.fs.version_aware = False output = Output(stage, "path")