From 1deaccb9c7943c86c3334977ed33d8638ef0a8e0 Mon Sep 17 00:00:00 2001 From: Haotian An Date: Fri, 26 Apr 2024 18:04:51 +0000 Subject: [PATCH 1/6] require config_name and instance_type in set config --- src/sagemaker/jumpstart/model.py | 12 +++- .../sagemaker/jumpstart/model/test_model.py | 59 ++----------------- 2 files changed, 15 insertions(+), 56 deletions(-) diff --git a/src/sagemaker/jumpstart/model.py b/src/sagemaker/jumpstart/model.py index f3e18c306d..b678cfba8a 100644 --- a/src/sagemaker/jumpstart/model.py +++ b/src/sagemaker/jumpstart/model.py @@ -429,16 +429,22 @@ def retrieve_example_payload(self) -> JumpStartSerializablePayload: sagemaker_session=self.sagemaker_session, ) - def set_deployment_config(self, config_name: Optional[str]) -> None: + def set_deployment_config(self, config_name: str, instance_type: str) -> None: """Sets the deployment config to apply to the model. Args: - config_name (Optional[str]): + config_name (str): The name of the deployment config. Set to None to unset any existing config that is applied to the model. + instance_type (str): + The instance_type that the model will use after setting + the config. """ self.__init__( - model_id=self.model_id, model_version=self.model_version, config_name=config_name + model_id=self.model_id, + model_version=self.model_version, + instance_type=instance_type, + config_name=config_name, ) @property diff --git a/tests/unit/sagemaker/jumpstart/model/test_model.py b/tests/unit/sagemaker/jumpstart/model/test_model.py index 6d859aecdb..66efc5deba 100644 --- a/tests/unit/sagemaker/jumpstart/model/test_model.py +++ b/tests/unit/sagemaker/jumpstart/model/test_model.py @@ -1614,7 +1614,7 @@ def test_model_set_deployment_config( mock_get_model_specs.reset_mock() mock_model_deploy.reset_mock() mock_get_model_specs.side_effect = get_prototype_spec_with_configs - model.set_deployment_config("neuron-inference") + model.set_deployment_config("neuron-inference", "ml.inf2.2xlarge") assert model.config_name == "neuron-inference" @@ -1622,7 +1622,7 @@ def test_model_set_deployment_config( mock_model_deploy.assert_called_once_with( initial_instance_count=1, - instance_type="ml.inf2.xlarge", + instance_type="ml.inf2.2xlarge", tags=[ {"Key": JumpStartTag.MODEL_ID, "Value": "pytorch-eqa-bert-base-cased"}, {"Key": JumpStartTag.MODEL_VERSION, "Value": "1.0.0"}, @@ -1631,34 +1631,8 @@ def test_model_set_deployment_config( wait=True, endpoint_logging=False, ) - - @mock.patch( - "sagemaker.jumpstart.model.get_jumpstart_configs", side_effect=lambda *args, **kwargs: {} - ) - @mock.patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor._get_manifest") - @mock.patch("sagemaker.jumpstart.factory.model.Session") - @mock.patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor.get_model_specs") - @mock.patch("sagemaker.jumpstart.model.Model.deploy") - @mock.patch("sagemaker.jumpstart.factory.model.JUMPSTART_DEFAULT_REGION_NAME", region) - def test_model_unset_deployment_config( - self, - mock_model_deploy: mock.Mock, - mock_get_model_specs: mock.Mock, - mock_session: mock.Mock, - mock_get_manifest: mock.Mock, - mock_get_jumpstart_configs: mock.Mock, - ): - mock_get_model_specs.side_effect = get_prototype_spec_with_configs - mock_get_manifest.side_effect = ( - lambda region, model_type, *args, **kwargs: get_prototype_manifest(region, model_type) - ) - mock_model_deploy.return_value = default_predictor - - model_id, _ = "pytorch-eqa-bert-base-cased", "*" - - mock_session.return_value = sagemaker_session - - model = JumpStartModel(model_id=model_id, config_name="neuron-inference") + mock_model_deploy.reset_mock() + model.set_deployment_config("neuron-inference", "ml.inf2.xlarge") assert model.config_name == "neuron-inference" @@ -1676,24 +1650,6 @@ def test_model_unset_deployment_config( endpoint_logging=False, ) - mock_get_model_specs.reset_mock() - mock_model_deploy.reset_mock() - mock_get_model_specs.side_effect = get_prototype_model_spec - model.set_deployment_config(None) - - model.deploy() - - mock_model_deploy.assert_called_once_with( - initial_instance_count=1, - instance_type="ml.p2.xlarge", - tags=[ - {"Key": JumpStartTag.MODEL_ID, "Value": "pytorch-eqa-bert-base-cased"}, - {"Key": JumpStartTag.MODEL_VERSION, "Value": "1.0.0"}, - ], - wait=True, - endpoint_logging=False, - ) - @mock.patch("sagemaker.jumpstart.model.get_init_kwargs") @mock.patch("sagemaker.jumpstart.utils.verify_model_region_and_return_specs") @mock.patch("sagemaker.jumpstart.model.get_instance_rate_per_hour") @@ -1813,6 +1769,7 @@ def test_model_retrieve_deployment_config( expected = get_base_deployment_configs()[0] config_name = expected.get("DeploymentConfigName") + instance_type = expected.get("InstanceType") mock_get_init_kwargs.side_effect = lambda *args, **kwargs: get_mock_init_kwargs( model_id, config_name ) @@ -1821,17 +1778,13 @@ def test_model_retrieve_deployment_config( model = JumpStartModel(model_id=model_id) - model.set_deployment_config(config_name) + model.set_deployment_config(config_name, instance_type) self.assertEqual(model.deployment_config, expected) mock_get_init_kwargs.reset_mock() mock_get_init_kwargs.side_effect = lambda *args, **kwargs: get_mock_init_kwargs(model_id) - # Unset - model.set_deployment_config(None) - self.assertIsNone(model.deployment_config) - @mock.patch("sagemaker.jumpstart.model.get_init_kwargs") @mock.patch("sagemaker.jumpstart.utils.verify_model_region_and_return_specs") @mock.patch("sagemaker.jumpstart.model.get_instance_rate_per_hour") From e079b70100589cb74a5ab0368d2f57e8fdf063d2 Mon Sep 17 00:00:00 2001 From: Haotian An Date: Fri, 26 Apr 2024 18:25:05 +0000 Subject: [PATCH 2/6] docstring --- src/sagemaker/jumpstart/model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sagemaker/jumpstart/model.py b/src/sagemaker/jumpstart/model.py index b678cfba8a..f975424b0e 100644 --- a/src/sagemaker/jumpstart/model.py +++ b/src/sagemaker/jumpstart/model.py @@ -434,8 +434,8 @@ def set_deployment_config(self, config_name: str, instance_type: str) -> None: Args: config_name (str): - The name of the deployment config. Set to None to unset - any existing config that is applied to the model. + The name of the deployment config to apply to the model. + Call list_deployment_configs to see the list of config names. instance_type (str): The instance_type that the model will use after setting the config. From 2cf72f0d9e2c0921a0d959f046c4e9864dd5cac5 Mon Sep 17 00:00:00 2001 From: Haotian An Date: Fri, 26 Apr 2024 19:10:05 +0000 Subject: [PATCH 3/6] add supported instance types check --- src/sagemaker/jumpstart/factory/model.py | 14 ++++- .../sagemaker/jumpstart/model/test_model.py | 53 +++++++++++++++++++ 2 files changed, 66 insertions(+), 1 deletion(-) diff --git a/src/sagemaker/jumpstart/factory/model.py b/src/sagemaker/jumpstart/factory/model.py index 68fbf2e861..ef79000694 100644 --- a/src/sagemaker/jumpstart/factory/model.py +++ b/src/sagemaker/jumpstart/factory/model.py @@ -544,7 +544,11 @@ def _add_resources_to_kwargs(kwargs: JumpStartModelInitKwargs) -> JumpStartModel def _add_config_name_to_kwargs(kwargs: JumpStartModelInitKwargs) -> JumpStartModelInitKwargs: - """Sets default config name to the kwargs. Returns full kwargs.""" + """Sets default config name to the kwargs. Returns full kwargs. + + Raises: + ValueError: If the instance_type is not supported with the current config. + """ specs = verify_model_region_and_return_specs( model_id=kwargs.model_id, @@ -565,6 +569,14 @@ def _add_config_name_to_kwargs(kwargs: JumpStartModelInitKwargs) -> JumpStartMod kwargs.config_name or specs.inference_configs.get_top_config_from_ranking().config_name ) + if kwargs.config_name: + resolved_config = specs.inference_configs.configs[kwargs.config_name].resolved_config + supported_instance_types = resolved_config.get("supported_inference_instance_types", []) + if kwargs.instance_type not in supported_instance_types: + raise ValueError( + f"Instance type {kwargs.instance_type} is not supported for config {kwargs.config_name}." + ) + return kwargs diff --git a/tests/unit/sagemaker/jumpstart/model/test_model.py b/tests/unit/sagemaker/jumpstart/model/test_model.py index 66efc5deba..2c739f6086 100644 --- a/tests/unit/sagemaker/jumpstart/model/test_model.py +++ b/tests/unit/sagemaker/jumpstart/model/test_model.py @@ -1650,6 +1650,59 @@ def test_model_set_deployment_config( endpoint_logging=False, ) + @mock.patch( + "sagemaker.jumpstart.model.get_jumpstart_configs", side_effect=lambda *args, **kwargs: {} + ) + @mock.patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor._get_manifest") + @mock.patch("sagemaker.jumpstart.factory.model.Session") + @mock.patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor.get_model_specs") + @mock.patch("sagemaker.jumpstart.model.Model.deploy") + @mock.patch("sagemaker.jumpstart.factory.model.JUMPSTART_DEFAULT_REGION_NAME", region) + def test_model_set_deployment_config_incompatible_instance_type( + self, + mock_model_deploy: mock.Mock, + mock_get_model_specs: mock.Mock, + mock_session: mock.Mock, + mock_get_manifest: mock.Mock, + mock_get_jumpstart_configs: mock.Mock, + ): + mock_get_model_specs.side_effect = get_prototype_model_spec + mock_get_manifest.side_effect = ( + lambda region, model_type, *args, **kwargs: get_prototype_manifest(region, model_type) + ) + mock_model_deploy.return_value = default_predictor + + model_id, _ = "pytorch-eqa-bert-base-cased", "*" + + mock_session.return_value = sagemaker_session + + model = JumpStartModel(model_id=model_id) + + assert model.config_name is None + + model.deploy() + + mock_model_deploy.assert_called_once_with( + initial_instance_count=1, + instance_type="ml.p2.xlarge", + tags=[ + {"Key": JumpStartTag.MODEL_ID, "Value": "pytorch-eqa-bert-base-cased"}, + {"Key": JumpStartTag.MODEL_VERSION, "Value": "1.0.0"}, + ], + wait=True, + endpoint_logging=False, + ) + + mock_get_model_specs.reset_mock() + mock_model_deploy.reset_mock() + mock_get_model_specs.side_effect = get_prototype_spec_with_configs + with pytest.raises(ValueError) as error: + model.set_deployment_config("neuron-inference", "ml.inf2.32xlarge") + assert ( + "Instance type ml.inf2.32xlarge is not supported for config neuron-inference." + in str(error) + ) + @mock.patch("sagemaker.jumpstart.model.get_init_kwargs") @mock.patch("sagemaker.jumpstart.utils.verify_model_region_and_return_specs") @mock.patch("sagemaker.jumpstart.model.get_instance_rate_per_hour") From 0732bd97214bd3c07ec7fc159f17af0780a35fc4 Mon Sep 17 00:00:00 2001 From: Haotian An Date: Fri, 26 Apr 2024 19:35:22 +0000 Subject: [PATCH 4/6] add more tests --- src/sagemaker/jumpstart/factory/model.py | 19 ++++++++++++------- .../sagemaker/jumpstart/model/test_model.py | 11 ++++++++++- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/src/sagemaker/jumpstart/factory/model.py b/src/sagemaker/jumpstart/factory/model.py index ef79000694..874fbc9a2d 100644 --- a/src/sagemaker/jumpstart/factory/model.py +++ b/src/sagemaker/jumpstart/factory/model.py @@ -569,13 +569,18 @@ def _add_config_name_to_kwargs(kwargs: JumpStartModelInitKwargs) -> JumpStartMod kwargs.config_name or specs.inference_configs.get_top_config_from_ranking().config_name ) - if kwargs.config_name: - resolved_config = specs.inference_configs.configs[kwargs.config_name].resolved_config - supported_instance_types = resolved_config.get("supported_inference_instance_types", []) - if kwargs.instance_type not in supported_instance_types: - raise ValueError( - f"Instance type {kwargs.instance_type} is not supported for config {kwargs.config_name}." - ) + if not kwargs.config_name: + return kwargs + + if kwargs.config_name not in set(specs.inference_configs.configs.keys()): + raise ValueError(f"Config {kwargs.config_name} is not supported for model {kwargs.model_id}.") + + resolved_config = specs.inference_configs.configs[kwargs.config_name].resolved_config + supported_instance_types = resolved_config.get("supported_inference_instance_types", []) + if kwargs.instance_type not in supported_instance_types: + raise ValueError( + f"Instance type {kwargs.instance_type} is not supported for config {kwargs.config_name}." + ) return kwargs diff --git a/tests/unit/sagemaker/jumpstart/model/test_model.py b/tests/unit/sagemaker/jumpstart/model/test_model.py index 2c739f6086..5bbc31a5b1 100644 --- a/tests/unit/sagemaker/jumpstart/model/test_model.py +++ b/tests/unit/sagemaker/jumpstart/model/test_model.py @@ -1658,7 +1658,7 @@ def test_model_set_deployment_config( @mock.patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor.get_model_specs") @mock.patch("sagemaker.jumpstart.model.Model.deploy") @mock.patch("sagemaker.jumpstart.factory.model.JUMPSTART_DEFAULT_REGION_NAME", region) - def test_model_set_deployment_config_incompatible_instance_type( + def test_model_set_deployment_config_incompatible_instance_type_or_name( self, mock_model_deploy: mock.Mock, mock_get_model_specs: mock.Mock, @@ -1703,6 +1703,15 @@ def test_model_set_deployment_config_incompatible_instance_type( in str(error) ) + with pytest.raises(ValueError) as error: + model.set_deployment_config("neuron-inference-unknown-name", "ml.inf2.32xlarge") + assert ( + "Cannot find Jumpstart config name neuron-inference-unknown-name. " + "List of config names that is supported by the model: " + "['neuron-inference', 'neuron-inference-budget', 'gpu-inference-budget', 'gpu-inference']" + in str(error) + ) + @mock.patch("sagemaker.jumpstart.model.get_init_kwargs") @mock.patch("sagemaker.jumpstart.utils.verify_model_region_and_return_specs") @mock.patch("sagemaker.jumpstart.model.get_instance_rate_per_hour") From 3a0f8a5f44c12469077d73d2d8693c35405febdb Mon Sep 17 00:00:00 2001 From: Haotian An Date: Fri, 26 Apr 2024 19:49:33 +0000 Subject: [PATCH 5/6] format --- src/sagemaker/jumpstart/factory/model.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/sagemaker/jumpstart/factory/model.py b/src/sagemaker/jumpstart/factory/model.py index 874fbc9a2d..c7b9ecf5bf 100644 --- a/src/sagemaker/jumpstart/factory/model.py +++ b/src/sagemaker/jumpstart/factory/model.py @@ -573,13 +573,16 @@ def _add_config_name_to_kwargs(kwargs: JumpStartModelInitKwargs) -> JumpStartMod return kwargs if kwargs.config_name not in set(specs.inference_configs.configs.keys()): - raise ValueError(f"Config {kwargs.config_name} is not supported for model {kwargs.model_id}.") - + raise ValueError( + f"Config {kwargs.config_name} is not supported for model {kwargs.model_id}." + ) + resolved_config = specs.inference_configs.configs[kwargs.config_name].resolved_config supported_instance_types = resolved_config.get("supported_inference_instance_types", []) if kwargs.instance_type not in supported_instance_types: raise ValueError( - f"Instance type {kwargs.instance_type} is not supported for config {kwargs.config_name}." + f"Instance type {kwargs.instance_type} " + "is not supported for config {kwargs.config_name}." ) return kwargs From 1aeca7022b374e45bb405d119edd75331a4930df Mon Sep 17 00:00:00 2001 From: Haotian An Date: Fri, 26 Apr 2024 20:26:10 +0000 Subject: [PATCH 6/6] fix tests --- src/sagemaker/jumpstart/factory/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sagemaker/jumpstart/factory/model.py b/src/sagemaker/jumpstart/factory/model.py index c7b9ecf5bf..53508b56f3 100644 --- a/src/sagemaker/jumpstart/factory/model.py +++ b/src/sagemaker/jumpstart/factory/model.py @@ -582,7 +582,7 @@ def _add_config_name_to_kwargs(kwargs: JumpStartModelInitKwargs) -> JumpStartMod if kwargs.instance_type not in supported_instance_types: raise ValueError( f"Instance type {kwargs.instance_type} " - "is not supported for config {kwargs.config_name}." + f"is not supported for config {kwargs.config_name}." ) return kwargs