From 5293f3281fa70cbba349b4baf4eb90adae678ff4 Mon Sep 17 00:00:00 2001 From: Daniel King Date: Thu, 8 Feb 2024 22:33:32 -0800 Subject: [PATCH 1/2] lstrip and unit test --- composer/utils/object_store/uc_object_store.py | 3 ++- tests/utils/object_store/test_uc_object_store.py | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/composer/utils/object_store/uc_object_store.py b/composer/utils/object_store/uc_object_store.py index 9b08530888..fd53e9b40c 100644 --- a/composer/utils/object_store/uc_object_store.py +++ b/composer/utils/object_store/uc_object_store.py @@ -211,8 +211,9 @@ def get_object_size(self, object_name: str) -> int: # Note: The UC team is working on changes to fix the files.get_status API, but it currently # does not work. Once fixed, we will call the files API endpoint. We currently only use this # function in Composer and LLM-foundry to check the UC object's existence. + object_path = self._get_object_path(object_name).lstrip('/') self.client.api_client.do(method='HEAD', - path=f'{self._UC_VOLUME_FILES_API_ENDPOINT}/{self.prefix}/{object_name}', + path=f'{self._UC_VOLUME_FILES_API_ENDPOINT}/{object_path}', headers={'Source': 'mosaicml/composer'}) return 1000000 # Dummy value, as we don't have a way to get the size of the file except DatabricksError as e: diff --git a/tests/utils/object_store/test_uc_object_store.py b/tests/utils/object_store/test_uc_object_store.py index 6d047d42ee..60845e43eb 100644 --- a/tests/utils/object_store/test_uc_object_store.py +++ b/tests/utils/object_store/test_uc_object_store.py @@ -90,6 +90,14 @@ def test_get_object_size(ws_client, uc_object_store, result: str): raise NotImplementedError(f'Test for result={result} is not implemented.') +def test_get_object_size_full_path(ws_client, uc_object_store): + ws_client.api_client.do.return_value = {} + assert uc_object_store.get_object_size('Volumes/catalog/schema/volume/train.txt') == 1000000 + ws_client.api_client.do.assert_called_with(method='HEAD', + path=f'/api/2.0/fs/files/Volumes/catalog/schema/volume/train.txt', + headers={'Source': 'mosaicml/composer'}) + + def test_get_uri(uc_object_store): assert uc_object_store.get_uri('train.txt') == 'dbfs:/Volumes/catalog/schema/volume/train.txt' assert uc_object_store.get_uri('Volumes/catalog/schema/volume/checkpoint/model.bin' From 78c22a5887605e7eb369fc45238b1273e8179d2b Mon Sep 17 00:00:00 2001 From: Daniel King Date: Thu, 8 Feb 2024 22:41:01 -0800 Subject: [PATCH 2/2] path join --- composer/utils/object_store/uc_object_store.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/composer/utils/object_store/uc_object_store.py b/composer/utils/object_store/uc_object_store.py index fd53e9b40c..4fc901212a 100644 --- a/composer/utils/object_store/uc_object_store.py +++ b/composer/utils/object_store/uc_object_store.py @@ -212,9 +212,8 @@ def get_object_size(self, object_name: str) -> int: # does not work. Once fixed, we will call the files API endpoint. We currently only use this # function in Composer and LLM-foundry to check the UC object's existence. object_path = self._get_object_path(object_name).lstrip('/') - self.client.api_client.do(method='HEAD', - path=f'{self._UC_VOLUME_FILES_API_ENDPOINT}/{object_path}', - headers={'Source': 'mosaicml/composer'}) + path = os.path.join(self._UC_VOLUME_FILES_API_ENDPOINT, object_path) + self.client.api_client.do(method='HEAD', path=path, headers={'Source': 'mosaicml/composer'}) return 1000000 # Dummy value, as we don't have a way to get the size of the file except DatabricksError as e: # If the code reaches here, the file was not found