From 054c29e93152d701a18cf4bb31a3cc142568bd1c Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Fri, 26 Apr 2024 14:26:54 -0700 Subject: [PATCH 01/19] Init Deployment configs outside Model init. --- src/sagemaker/jumpstart/model.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/sagemaker/jumpstart/model.py b/src/sagemaker/jumpstart/model.py index 78c36cb954..ef410affc5 100644 --- a/src/sagemaker/jumpstart/model.py +++ b/src/sagemaker/jumpstart/model.py @@ -361,17 +361,14 @@ def _validate_model_id_and_type(): self.model_package_arn = model_init_kwargs.model_package_arn self.init_kwargs = model_init_kwargs.to_kwargs_dict(False) - metadata_configs = get_jumpstart_configs( + self.metadata_configs = get_jumpstart_configs( region=self.region, model_id=self.model_id, model_version=self.model_version, sagemaker_session=self.sagemaker_session, model_type=self.model_type, ) - self._deployment_configs = [ - self._convert_to_deployment_config_metadata(config_name, config) - for config_name, config in metadata_configs.items() - ] + self._deployment_configs = None def log_subscription_warning(self) -> None: """Log message prompting the customer to subscribe to the proprietary model.""" @@ -461,7 +458,7 @@ def benchmark_metrics(self) -> pd.DataFrame: def display_benchmark_metrics(self) -> None: """Display Benchmark Metrics for deployment configs.""" - print(self.benchmark_metrics.to_markdown()) + print(self.benchmark_metrics.to_markdown(index=False, tablefmt="grid")) def list_deployment_configs(self) -> List[Dict[str, Any]]: """List deployment configs for ``This`` model. @@ -469,6 +466,7 @@ def list_deployment_configs(self) -> List[Dict[str, Any]]: Returns: List[Dict[str, Any]]: A list of deployment configs. """ + self._init_deployment_configs() return self._deployment_configs def _create_sagemaker_model( @@ -868,6 +866,7 @@ def _get_benchmarks_data(self, config_name: str) -> Dict[str, List[str]]: Returns: Dict[str, List[str]]: Deployment config benchmark data. """ + self._init_deployment_configs() return get_metrics_from_deployment_configs( self._deployment_configs, config_name, @@ -885,11 +884,20 @@ def _retrieve_selected_deployment_config(self, config_name: str) -> Optional[Dic if config_name is None: return None + self._init_deployment_configs() for deployment_config in self._deployment_configs: if deployment_config.get("DeploymentConfigName") == config_name: return deployment_config return None + def _init_deployment_configs(self) -> None: + """Initialize the deployment configs.""" + if self._deployment_configs is None: + self._deployment_configs = [ + self._convert_to_deployment_config_metadata(config_name, config) + for config_name, config in self.metadata_configs.items() + ] + def _convert_to_deployment_config_metadata( self, config_name: str, metadata_config: JumpStartMetadataConfig ) -> Dict[str, Any]: From b997a01c0aa163eb43b3d08c69f834a3d7e3e0ff Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Fri, 26 Apr 2024 16:43:17 -0700 Subject: [PATCH 02/19] Testing with NB --- src/sagemaker/jumpstart/model.py | 134 ++++++++++++++----------------- src/sagemaker/jumpstart/types.py | 15 +++- src/sagemaker/jumpstart/utils.py | 60 ++++++-------- src/sagemaker/utils.py | 19 +++-- 4 files changed, 106 insertions(+), 122 deletions(-) diff --git a/src/sagemaker/jumpstart/model.py b/src/sagemaker/jumpstart/model.py index a1524dcb76..7f82202713 100644 --- a/src/sagemaker/jumpstart/model.py +++ b/src/sagemaker/jumpstart/model.py @@ -42,7 +42,6 @@ JumpStartSerializablePayload, DeploymentConfigMetadata, JumpStartBenchmarkStat, - JumpStartMetadataConfig, ) from sagemaker.jumpstart.utils import ( validate_model_id_and_get_type, @@ -361,7 +360,7 @@ def _validate_model_id_and_type(): self.model_package_arn = model_init_kwargs.model_package_arn self.init_kwargs = model_init_kwargs.to_kwargs_dict(False) - self.metadata_configs = get_jumpstart_configs( + self._metadata_configs = get_jumpstart_configs( region=self.region, model_id=self.model_id, model_version=self.model_version, @@ -460,11 +459,18 @@ def benchmark_metrics(self) -> pd.DataFrame: Returns: Metrics: Pandas DataFrame object. """ - return pd.DataFrame(self._get_benchmarks_data(self.config_name)) + return pd.DataFrame(self._get_benchmarks_data().get("Data")) def display_benchmark_metrics(self) -> None: """Display Benchmark Metrics for deployment configs.""" - print(self.benchmark_metrics.to_markdown(index=False, tablefmt="grid")) + data = self._get_benchmarks_data().get("Data") + error_message = self._get_benchmarks_data().get("ErrorMessage") + + if error_message is not None: + JUMPSTART_LOGGER.warning(error_message) + + df = pd.DataFrame(data) + print(df.to_markdown(index=False, tablefmt="grid")) def list_deployment_configs(self) -> List[Dict[str, Any]]: """List deployment configs for ``This`` model. @@ -863,20 +869,18 @@ def register_deploy_wrapper(*args, **kwargs): return model_package - @lru_cache - def _get_benchmarks_data(self, config_name: str) -> Dict[str, List[str]]: + # @lru_cache + def _get_benchmarks_data(self) -> Dict[str, Any]: """Deployment configs benchmark metrics. - Args: - config_name (str): The name of the selected deployment config. Returns: Dict[str, List[str]]: Deployment config benchmark data. """ - self._init_deployment_configs() - return get_metrics_from_deployment_configs( - self._deployment_configs, - config_name, - ) + error_message = self._init_deployment_configs() + return { + "ErrorMessage": error_message, + "Data": get_metrics_from_deployment_configs(self._deployment_configs), + } @lru_cache def _retrieve_selected_deployment_config(self, config_name: str) -> Optional[Dict[str, Any]]: @@ -896,70 +900,50 @@ def _retrieve_selected_deployment_config(self, config_name: str) -> Optional[Dic return deployment_config return None - def _init_deployment_configs(self) -> None: + def _init_deployment_configs(self) -> Optional[str]: """Initialize the deployment configs.""" + deployment_configs = [] + error_message = None if self._deployment_configs is None: - self._deployment_configs = [ - self._convert_to_deployment_config_metadata(config_name, config) - for config_name, config in self.metadata_configs.items() - ] - - def _convert_to_deployment_config_metadata( - self, config_name: str, metadata_config: JumpStartMetadataConfig - ) -> Dict[str, Any]: - """Retrieve deployment config for config name. - - Args: - config_name (str): Name of deployment config. - metadata_config (JumpStartMetadataConfig): Metadata config for deployment config. - Returns: - A deployment metadata config for config name (dict[str, Any]). - """ - default_inference_instance_type = metadata_config.resolved_config.get( - "default_inference_instance_type" - ) - - benchmark_metrics = ( - metadata_config.benchmark_metrics.get(default_inference_instance_type) - if metadata_config.benchmark_metrics is not None - else None - ) - - should_fetch_instance_rate_metric = True - if benchmark_metrics is not None: - for benchmark_metric in benchmark_metrics: - if benchmark_metric.name.lower() == "instance rate": - should_fetch_instance_rate_metric = False - break - - if should_fetch_instance_rate_metric: - instance_rate = get_instance_rate_per_hour( - instance_type=default_inference_instance_type, region=self.region - ) - if instance_rate is not None: - instance_rate_metric = JumpStartBenchmarkStat(instance_rate) - - if benchmark_metrics is None: - benchmark_metrics = [instance_rate_metric] - else: - benchmark_metrics.append(instance_rate_metric) - - init_kwargs = get_init_kwargs( - model_id=self.model_id, - instance_type=default_inference_instance_type, - sagemaker_session=self.sagemaker_session, - ) - deploy_kwargs = get_deploy_kwargs( - model_id=self.model_id, - instance_type=default_inference_instance_type, - sagemaker_session=self.sagemaker_session, - ) - - deployment_config_metadata = DeploymentConfigMetadata( - config_name, benchmark_metrics, init_kwargs, deploy_kwargs - ) - - return deployment_config_metadata.to_json() + should_fetch_instance_rate_metric = True + for config_name, metadata_config in self._metadata_configs.items(): + benchmark_metrics = {} + for instance_type, benchmark_metric in metadata_config.benchmark_metrics.items(): + if should_fetch_instance_rate_metric: + instance_type_rate = get_instance_rate_per_hour( + instance_type=instance_type, region=self.region + ) + + if "Message" in instance_type_rate: + error_message = instance_type_rate["Message"] + should_fetch_instance_rate_metric = False + benchmark_metrics[instance_type] = benchmark_metric + else: + benchmark_metric.append(JumpStartBenchmarkStat(instance_type_rate)) + benchmark_metrics[instance_type] = benchmark_metric + + default_inference_instance_type = metadata_config.resolved_config.get( + "default_inference_instance_type" + ) + init_kwargs = get_init_kwargs( + model_id=self.model_id, + instance_type=default_inference_instance_type, + sagemaker_session=self.sagemaker_session, + ) + deploy_kwargs = get_deploy_kwargs( + model_id=self.model_id, + instance_type=default_inference_instance_type, + sagemaker_session=self.sagemaker_session, + ) + + deployment_config_metadata = DeploymentConfigMetadata( + config_name, benchmark_metrics, init_kwargs, deploy_kwargs + ) + deployment_configs.append(deployment_config_metadata.to_json()) + + if len(deployment_configs) > 0: + self._deployment_configs = deployment_configs + return error_message def __str__(self) -> str: """Overriding str(*) method to make more human-readable.""" diff --git a/src/sagemaker/jumpstart/types.py b/src/sagemaker/jumpstart/types.py index cd74a03e5a..52230541ad 100644 --- a/src/sagemaker/jumpstart/types.py +++ b/src/sagemaker/jumpstart/types.py @@ -2264,19 +2264,24 @@ class DeploymentArgs(BaseDeploymentConfigDataHolder): "model_data", "environment", "instance_type", + "supported_instance_types", "compute_resource_requirements", "model_data_download_timeout", "container_startup_health_check_timeout", ] def __init__( - self, init_kwargs: JumpStartModelInitKwargs, deploy_kwargs: JumpStartModelDeployKwargs + self, + init_kwargs: JumpStartModelInitKwargs, + deploy_kwargs: JumpStartModelDeployKwargs, + supported_instance_types: List[str], ): """Instantiates DeploymentConfig object.""" if init_kwargs is not None: self.image_uri = init_kwargs.image_uri self.model_data = init_kwargs.model_data self.instance_type = init_kwargs.instance_type + self.supported_instance_types = supported_instance_types self.environment = init_kwargs.env if init_kwargs.resources is not None: self.compute_resource_requirements = ( @@ -2302,12 +2307,16 @@ class DeploymentConfigMetadata(BaseDeploymentConfigDataHolder): def __init__( self, config_name: str, - benchmark_metrics: List[JumpStartBenchmarkStat], + benchmark_metrics: Dict[str, List[JumpStartBenchmarkStat]], init_kwargs: JumpStartModelInitKwargs, deploy_kwargs: JumpStartModelDeployKwargs, ): """Instantiates DeploymentConfigMetadata object.""" self.deployment_config_name = config_name - self.deployment_args = DeploymentArgs(init_kwargs, deploy_kwargs) + self.deployment_args = DeploymentArgs( + init_kwargs, + deploy_kwargs, + [instance_type for instance_type in benchmark_metrics], + ) self.acceleration_configs = None self.benchmark_metrics = benchmark_metrics diff --git a/src/sagemaker/jumpstart/utils.py b/src/sagemaker/jumpstart/utils.py index 357bdb6eb7..d8f0a2ac44 100644 --- a/src/sagemaker/jumpstart/utils.py +++ b/src/sagemaker/jumpstart/utils.py @@ -1031,59 +1031,45 @@ def get_jumpstart_configs( def get_metrics_from_deployment_configs( - deployment_configs: List[Dict[str, Any]], config_name: str + deployment_configs: List[Dict[str, Any]] ) -> Dict[str, List[str]]: """Extracts metrics from deployment configs. Args: deployment_configs (list[dict[str, Any]]): List of deployment configs. - config_name (str): The name of the deployment config use by the model. """ - data = {"Config Name": [], "Instance Type": [], "Selected": [], "Accelerated": []} + data = {"Config Name": [], "Instance Type": []} for index, deployment_config in enumerate(deployment_configs): if deployment_config.get("DeploymentArgs") is None: continue benchmark_metrics = deployment_config.get("BenchmarkMetrics") - if benchmark_metrics is not None: + for current_instance_type, current_instance_type_metrics in benchmark_metrics.items(): + data["Config Name"].append(deployment_config.get("DeploymentConfigName")) - data["Instance Type"].append( - deployment_config.get("DeploymentArgs").get("InstanceType") - ) - data["Selected"].append( - "Yes" - if ( - config_name is not None - and config_name == deployment_config.get("DeploymentConfigName") - ) - else "No" + instance_type_to_display = ( + f"{current_instance_type} (Default)" + if current_instance_type + == deployment_config.get("DeploymentArgs").get("InstanceType") + else current_instance_type ) - - accelerated_configs = deployment_config.get("AccelerationConfigs") - if accelerated_configs is None: - data["Accelerated"].append("No") - else: - data["Accelerated"].append( - "Yes" - if ( - len(accelerated_configs) > 0 - and accelerated_configs[0].get("Enabled", False) - ) - else "No" - ) + data["Instance Type"].append(instance_type_to_display) if index == 0: - for benchmark_metric in benchmark_metrics: - column_name = f"{benchmark_metric.get('name')} ({benchmark_metric.get('unit')})" - data[column_name] = [] - - for benchmark_metric in benchmark_metrics: - column_name = f"{benchmark_metric.get('name')} ({benchmark_metric.get('unit')})" - if column_name in data.keys(): - data[column_name].append(benchmark_metric.get("value")) + temp_data = {} + for metric in current_instance_type_metrics: + column_name = f"{metric.get('name')} ({metric.get('unit')})" + if metric.get("name").lower() == "instance rate": + data[column_name] = [] + else: + temp_data[column_name] = [] + data = {**data, **temp_data} + + for metric in current_instance_type_metrics: + column_name = f"{metric.get('name')} ({metric.get('unit')})" + if column_name in data: + data[column_name].append(metric.get("value")) - if "Yes" not in data["Accelerated"]: - del data["Accelerated"] return data diff --git a/src/sagemaker/utils.py b/src/sagemaker/utils.py index 35f60b37e1..ad86ba20e7 100644 --- a/src/sagemaker/utils.py +++ b/src/sagemaker/utils.py @@ -37,6 +37,7 @@ import boto3 import botocore +from botocore.exceptions import ClientError from botocore.utils import merge_dicts from six.moves.urllib import parse import pandas as pd @@ -1664,7 +1665,7 @@ def deep_override_dict( def get_instance_rate_per_hour( instance_type: str, region: str, -) -> Union[Dict[str, str], None]: +) -> Dict[str, str]: """Gets instance rate per hour for the given instance type. Args: @@ -1698,19 +1699,23 @@ def get_instance_rate_per_hour( if isinstance(price_data, str): price_data = json.loads(price_data) - return extract_instance_rate_per_hour(price_data) - except Exception as e: # pylint: disable=W0703 - logging.exception("Error getting instance rate: %s", e) - return None + rate = extract_instance_rate_per_hour(price_data) + if rate is None: + return {} + return rate + except ClientError as e: + return e.response["Error"] + except Exception: # pylint: disable=W0703 + return {"Message": "Something went wrong while getting instance rate."} -def extract_instance_rate_per_hour(price_data: Dict[str, Any]) -> Union[Dict[str, str], None]: +def extract_instance_rate_per_hour(price_data: Dict[str, Any]) -> Optional[Dict[str, str]]: """Extract instance rate per hour for the given Price JSON data. Args: price_data (Dict[str, Any]): The Price JSON data. Returns: - Union[Dict[str, str], None]: Instance rate per hour. + Optional[Dict[str, str], None]: Instance rate per hour. """ if price_data is not None: From 78226f43e8565b4c291b052463490e034170e240 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Fri, 26 Apr 2024 17:16:48 -0700 Subject: [PATCH 03/19] Testing with NB-V2 --- src/sagemaker/jumpstart/model.py | 8 ++++++-- src/sagemaker/jumpstart/types.py | 3 ++- src/sagemaker/jumpstart/utils.py | 8 ++++---- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/sagemaker/jumpstart/model.py b/src/sagemaker/jumpstart/model.py index 7f82202713..cd6ead81f6 100644 --- a/src/sagemaker/jumpstart/model.py +++ b/src/sagemaker/jumpstart/model.py @@ -922,7 +922,8 @@ def _init_deployment_configs(self) -> Optional[str]: benchmark_metric.append(JumpStartBenchmarkStat(instance_type_rate)) benchmark_metrics[instance_type] = benchmark_metric - default_inference_instance_type = metadata_config.resolved_config.get( + resolved_config = metadata_config.resolved_config + default_inference_instance_type = resolved_config.get( "default_inference_instance_type" ) init_kwargs = get_init_kwargs( @@ -937,7 +938,10 @@ def _init_deployment_configs(self) -> Optional[str]: ) deployment_config_metadata = DeploymentConfigMetadata( - config_name, benchmark_metrics, init_kwargs, deploy_kwargs + config_name, + benchmark_metrics, + resolved_config.get("supported_inference_instance_types"), + init_kwargs, deploy_kwargs ) deployment_configs.append(deployment_config_metadata.to_json()) diff --git a/src/sagemaker/jumpstart/types.py b/src/sagemaker/jumpstart/types.py index 52230541ad..6b3ea9c56a 100644 --- a/src/sagemaker/jumpstart/types.py +++ b/src/sagemaker/jumpstart/types.py @@ -2308,6 +2308,7 @@ def __init__( self, config_name: str, benchmark_metrics: Dict[str, List[JumpStartBenchmarkStat]], + supported_instance_types: List[str], init_kwargs: JumpStartModelInitKwargs, deploy_kwargs: JumpStartModelDeployKwargs, ): @@ -2316,7 +2317,7 @@ def __init__( self.deployment_args = DeploymentArgs( init_kwargs, deploy_kwargs, - [instance_type for instance_type in benchmark_metrics], + supported_instance_types ) self.acceleration_configs = None self.benchmark_metrics = benchmark_metrics diff --git a/src/sagemaker/jumpstart/utils.py b/src/sagemaker/jumpstart/utils.py index d8f0a2ac44..0c17351da5 100644 --- a/src/sagemaker/jumpstart/utils.py +++ b/src/sagemaker/jumpstart/utils.py @@ -1060,16 +1060,16 @@ def get_metrics_from_deployment_configs( if index == 0: temp_data = {} for metric in current_instance_type_metrics: - column_name = f"{metric.get('name')} ({metric.get('unit')})" - if metric.get("name").lower() == "instance rate": + column_name = f"{metric.name} ({metric.unit})" + if metric.name.lower() == "instance rate": data[column_name] = [] else: temp_data[column_name] = [] data = {**data, **temp_data} for metric in current_instance_type_metrics: - column_name = f"{metric.get('name')} ({metric.get('unit')})" + column_name = f"{metric.name} ({metric.unit})" if column_name in data: - data[column_name].append(metric.get("value")) + data[column_name].append(metric.value) return data From f70d902c85c2e1a37ab0e4f0a0de755f9447a50b Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Fri, 26 Apr 2024 18:44:17 -0700 Subject: [PATCH 04/19] Refactoring, NB testing --- src/sagemaker/jumpstart/model.py | 118 +++++++++++++------------------ src/sagemaker/jumpstart/types.py | 60 ++++++++-------- src/sagemaker/jumpstart/utils.py | 2 +- src/sagemaker/utils.py | 19 ++--- 4 files changed, 91 insertions(+), 108 deletions(-) diff --git a/src/sagemaker/jumpstart/model.py b/src/sagemaker/jumpstart/model.py index cd6ead81f6..5e1422f3b3 100644 --- a/src/sagemaker/jumpstart/model.py +++ b/src/sagemaker/jumpstart/model.py @@ -367,7 +367,6 @@ def _validate_model_id_and_type(): sagemaker_session=self.sagemaker_session, model_type=self.model_type, ) - self._deployment_configs = None def log_subscription_warning(self) -> None: """Log message prompting the customer to subscribe to the proprietary model.""" @@ -450,7 +449,7 @@ def deployment_config(self) -> Optional[Dict[str, Any]]: Returns: Optional[Dict[str, Any]]: Deployment config that will be applied to the model. """ - return self._retrieve_selected_deployment_config(self.config_name) + return self._retrieve_selected_deployment_config(self.config_name, self.instance_type) @property def benchmark_metrics(self) -> pd.DataFrame: @@ -459,18 +458,11 @@ def benchmark_metrics(self) -> pd.DataFrame: Returns: Metrics: Pandas DataFrame object. """ - return pd.DataFrame(self._get_benchmarks_data().get("Data")) + return pd.DataFrame(self._get_benchmarks_data()) def display_benchmark_metrics(self) -> None: """Display Benchmark Metrics for deployment configs.""" - data = self._get_benchmarks_data().get("Data") - error_message = self._get_benchmarks_data().get("ErrorMessage") - - if error_message is not None: - JUMPSTART_LOGGER.warning(error_message) - - df = pd.DataFrame(data) - print(df.to_markdown(index=False, tablefmt="grid")) + print(self.benchmark_metrics.to_markdown(index=False, tablefmt="grid")) def list_deployment_configs(self) -> List[Dict[str, Any]]: """List deployment configs for ``This`` model. @@ -478,8 +470,7 @@ def list_deployment_configs(self) -> List[Dict[str, Any]]: Returns: List[Dict[str, Any]]: A list of deployment configs. """ - self._init_deployment_configs() - return self._deployment_configs + return self._get_deployment_configs(self.instance_type) def _create_sagemaker_model( self, @@ -869,21 +860,18 @@ def register_deploy_wrapper(*args, **kwargs): return model_package - # @lru_cache def _get_benchmarks_data(self) -> Dict[str, Any]: """Deployment configs benchmark metrics. Returns: Dict[str, List[str]]: Deployment config benchmark data. """ - error_message = self._init_deployment_configs() - return { - "ErrorMessage": error_message, - "Data": get_metrics_from_deployment_configs(self._deployment_configs), - } + return get_metrics_from_deployment_configs(self._get_deployment_configs(self.instance_type)) @lru_cache - def _retrieve_selected_deployment_config(self, config_name: str) -> Optional[Dict[str, Any]]: + def _retrieve_selected_deployment_config( + self, config_name: str, instance_type: str + ) -> Optional[Dict[str, Any]]: """Retrieve the deployment config to apply to the model. Args: @@ -894,60 +882,52 @@ def _retrieve_selected_deployment_config(self, config_name: str) -> Optional[Dic if config_name is None: return None - self._init_deployment_configs() - for deployment_config in self._deployment_configs: + for deployment_config in self._get_deployment_configs(instance_type): if deployment_config.get("DeploymentConfigName") == config_name: return deployment_config return None - def _init_deployment_configs(self) -> Optional[str]: - """Initialize the deployment configs.""" + @lru_cache + def _get_deployment_configs(self, selected_instance_type: str) -> List[Dict[str, Any]]: + """Retrieve the deployment configs to apply to the model.""" deployment_configs = [] - error_message = None - if self._deployment_configs is None: - should_fetch_instance_rate_metric = True - for config_name, metadata_config in self._metadata_configs.items(): - benchmark_metrics = {} - for instance_type, benchmark_metric in metadata_config.benchmark_metrics.items(): - if should_fetch_instance_rate_metric: - instance_type_rate = get_instance_rate_per_hour( - instance_type=instance_type, region=self.region - ) - - if "Message" in instance_type_rate: - error_message = instance_type_rate["Message"] - should_fetch_instance_rate_metric = False - benchmark_metrics[instance_type] = benchmark_metric - else: - benchmark_metric.append(JumpStartBenchmarkStat(instance_type_rate)) - benchmark_metrics[instance_type] = benchmark_metric - - resolved_config = metadata_config.resolved_config - default_inference_instance_type = resolved_config.get( - "default_inference_instance_type" - ) - init_kwargs = get_init_kwargs( - model_id=self.model_id, - instance_type=default_inference_instance_type, - sagemaker_session=self.sagemaker_session, - ) - deploy_kwargs = get_deploy_kwargs( - model_id=self.model_id, - instance_type=default_inference_instance_type, - sagemaker_session=self.sagemaker_session, - ) - - deployment_config_metadata = DeploymentConfigMetadata( - config_name, - benchmark_metrics, - resolved_config.get("supported_inference_instance_types"), - init_kwargs, deploy_kwargs - ) - deployment_configs.append(deployment_config_metadata.to_json()) - - if len(deployment_configs) > 0: - self._deployment_configs = deployment_configs - return error_message + + should_fetch_instance_rate_metric = True + for config_name, metadata_config in self._metadata_configs.items(): + benchmark_metrics = {} + for instance_type, benchmark_metric in metadata_config.benchmark_metrics.items(): + if should_fetch_instance_rate_metric: + instance_type_rate = get_instance_rate_per_hour( + instance_type=instance_type, region=self.region + ) + + if instance_type_rate is None: + should_fetch_instance_rate_metric = False + benchmark_metrics[instance_type] = benchmark_metric + else: + benchmark_metric.append(JumpStartBenchmarkStat(instance_type_rate)) + benchmark_metrics[instance_type] = benchmark_metric + + init_kwargs = get_init_kwargs( + model_id=self.model_id, + instance_type=selected_instance_type, + sagemaker_session=self.sagemaker_session, + ) + deploy_kwargs = get_deploy_kwargs( + model_id=self.model_id, + instance_type=selected_instance_type, + sagemaker_session=self.sagemaker_session, + ) + deployment_config_metadata = DeploymentConfigMetadata( + config_name, + benchmark_metrics, + metadata_config.resolved_config, + init_kwargs, + deploy_kwargs, + ) + deployment_configs.append(deployment_config_metadata.to_json()) + + return deployment_configs def __str__(self) -> str: """Overriding str(*) method to make more human-readable.""" diff --git a/src/sagemaker/jumpstart/types.py b/src/sagemaker/jumpstart/types.py index 6b3ea9c56a..d5f72e718a 100644 --- a/src/sagemaker/jumpstart/types.py +++ b/src/sagemaker/jumpstart/types.py @@ -2235,26 +2235,28 @@ def to_json(self) -> Dict[str, Any]: if hasattr(self, att): cur_val = getattr(self, att) att = self._convert_to_pascal_case(att) - if issubclass(type(cur_val), JumpStartDataHolderType): - json_obj[att] = cur_val.to_json() - elif isinstance(cur_val, list): - json_obj[att] = [] - for obj in cur_val: - if issubclass(type(obj), JumpStartDataHolderType): - json_obj[att].append(obj.to_json()) - else: - json_obj[att].append(obj) - elif isinstance(cur_val, dict): - json_obj[att] = {} - for key, val in cur_val.items(): - if issubclass(type(val), JumpStartDataHolderType): - json_obj[att][self._convert_to_pascal_case(key)] = val.to_json() - else: - json_obj[att][key] = val - else: - json_obj[att] = cur_val + json_obj[att] = self._val_to_json(cur_val) return json_obj + def _val_to_json(self, val: Any) -> Any: + """Converts ``Any`` to JSON.""" + if issubclass(type(val), JumpStartDataHolderType): + return val.to_json() + if isinstance(val, list): + list_obj = [] + for obj in val: + list_obj.append(self._val_to_json(obj)) + return list_obj + if isinstance(val, dict): + dict_obj = {} + for k, v in val.items(): + if isinstance(v, JumpStartDataHolderType): + dict_obj[self._convert_to_pascal_case(k)] = self._val_to_json(v) + else: + dict_obj[k] = self._val_to_json(v) + return dict_obj + return val + class DeploymentArgs(BaseDeploymentConfigDataHolder): """Dataclass representing a Deployment Config.""" @@ -2263,7 +2265,8 @@ class DeploymentArgs(BaseDeploymentConfigDataHolder): "image_uri", "model_data", "environment", - "instance_type", + "selected_instance_type", + "default_instance_type", "supported_instance_types", "compute_resource_requirements", "model_data_download_timeout", @@ -2274,14 +2277,17 @@ def __init__( self, init_kwargs: JumpStartModelInitKwargs, deploy_kwargs: JumpStartModelDeployKwargs, - supported_instance_types: List[str], + resolved_config: Dict[str, Any], ): """Instantiates DeploymentConfig object.""" if init_kwargs is not None: self.image_uri = init_kwargs.image_uri self.model_data = init_kwargs.model_data - self.instance_type = init_kwargs.instance_type - self.supported_instance_types = supported_instance_types + self.selected_instance_type = init_kwargs.instance_type + self.default_instance_type = resolved_config.get("default_inference_instance_type") + self.supported_instance_types = resolved_config.get( + "supported_inference_instance_types" + ) self.environment = init_kwargs.env if init_kwargs.resources is not None: self.compute_resource_requirements = ( @@ -2308,16 +2314,12 @@ def __init__( self, config_name: str, benchmark_metrics: Dict[str, List[JumpStartBenchmarkStat]], - supported_instance_types: List[str], + resolved_config: Dict[str, Any], init_kwargs: JumpStartModelInitKwargs, deploy_kwargs: JumpStartModelDeployKwargs, ): """Instantiates DeploymentConfigMetadata object.""" self.deployment_config_name = config_name - self.deployment_args = DeploymentArgs( - init_kwargs, - deploy_kwargs, - supported_instance_types - ) - self.acceleration_configs = None + self.deployment_args = DeploymentArgs(init_kwargs, deploy_kwargs, resolved_config) + self.acceleration_configs = resolved_config.get("acceleration_configs") self.benchmark_metrics = benchmark_metrics diff --git a/src/sagemaker/jumpstart/utils.py b/src/sagemaker/jumpstart/utils.py index 0c17351da5..a461532db7 100644 --- a/src/sagemaker/jumpstart/utils.py +++ b/src/sagemaker/jumpstart/utils.py @@ -1052,7 +1052,7 @@ def get_metrics_from_deployment_configs( instance_type_to_display = ( f"{current_instance_type} (Default)" if current_instance_type - == deployment_config.get("DeploymentArgs").get("InstanceType") + == deployment_config.get("DeploymentArgs").get("DefaultInstanceType") else current_instance_type ) data["Instance Type"].append(instance_type_to_display) diff --git a/src/sagemaker/utils.py b/src/sagemaker/utils.py index ad86ba20e7..0e7ea1095b 100644 --- a/src/sagemaker/utils.py +++ b/src/sagemaker/utils.py @@ -1665,16 +1665,17 @@ def deep_override_dict( def get_instance_rate_per_hour( instance_type: str, region: str, -) -> Dict[str, str]: +) -> Optional[Dict[str, str]]: """Gets instance rate per hour for the given instance type. Args: instance_type (str): The instance type. region (str): The region. Returns: - Union[Dict[str, str], None]: Instance rate per hour. - Example: {'name': 'Instance Rate', 'unit': 'USD/Hrs', 'value': '1.1250000000'}}. + Optional[Dict[str, str]]: Instance rate per hour. + Example: {'name': 'Instance Rate', 'unit': 'USD/Hrs', 'value': '1.1250000000'}}. """ + error_message = "Instance rate metrics will be omitted. Reason: %s" region_name = "us-east-1" if region.startswith("eu") or region.startswith("af"): @@ -1699,14 +1700,14 @@ def get_instance_rate_per_hour( if isinstance(price_data, str): price_data = json.loads(price_data) - rate = extract_instance_rate_per_hour(price_data) - if rate is None: - return {} - return rate + return extract_instance_rate_per_hour(price_data) + return None except ClientError as e: - return e.response["Error"] + logger.warning(error_message, e.response["Error"]) + return None except Exception: # pylint: disable=W0703 - return {"Message": "Something went wrong while getting instance rate."} + logger.warning(error_message, "Something went wrong while getting instance rates.") + return None def extract_instance_rate_per_hour(price_data: Dict[str, Any]) -> Optional[Dict[str, str]]: From 9420310a2a5ad2b689c4e661a6581e7c87fe2f26 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Fri, 26 Apr 2024 19:03:58 -0700 Subject: [PATCH 05/19] NB Testing and Refactoring --- src/sagemaker/jumpstart/model.py | 2 ++ src/sagemaker/jumpstart/utils.py | 8 ++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/sagemaker/jumpstart/model.py b/src/sagemaker/jumpstart/model.py index 5e1422f3b3..126231469f 100644 --- a/src/sagemaker/jumpstart/model.py +++ b/src/sagemaker/jumpstart/model.py @@ -907,6 +907,8 @@ def _get_deployment_configs(self, selected_instance_type: str) -> List[Dict[str, else: benchmark_metric.append(JumpStartBenchmarkStat(instance_type_rate)) benchmark_metrics[instance_type] = benchmark_metric + else: + benchmark_metrics[instance_type] = benchmark_metric init_kwargs = get_init_kwargs( model_id=self.model_id, diff --git a/src/sagemaker/jumpstart/utils.py b/src/sagemaker/jumpstart/utils.py index a461532db7..8868329f97 100644 --- a/src/sagemaker/jumpstart/utils.py +++ b/src/sagemaker/jumpstart/utils.py @@ -1060,16 +1060,16 @@ def get_metrics_from_deployment_configs( if index == 0: temp_data = {} for metric in current_instance_type_metrics: - column_name = f"{metric.name} ({metric.unit})" - if metric.name.lower() == "instance rate": + column_name = f"{metric.get('name')} ({metric.get('unit')})" + if metric.get("name").lower() == "instance rate": data[column_name] = [] else: temp_data[column_name] = [] data = {**data, **temp_data} for metric in current_instance_type_metrics: - column_name = f"{metric.name} ({metric.unit})" + column_name = f"{metric.get('name')} ({metric.get('unit')})" if column_name in data: - data[column_name].append(metric.value) + data[column_name].append(metric.get('value')) return data From 1d678ac1bf15a659fb7e2b27ebcc2b82fafc542a Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Fri, 26 Apr 2024 19:57:07 -0700 Subject: [PATCH 06/19] Testing --- src/sagemaker/jumpstart/model.py | 21 +++++++++++++++------ src/sagemaker/jumpstart/types.py | 6 ++---- src/sagemaker/jumpstart/utils.py | 22 ++++++++++++---------- 3 files changed, 29 insertions(+), 20 deletions(-) diff --git a/src/sagemaker/jumpstart/model.py b/src/sagemaker/jumpstart/model.py index 126231469f..3f48bb4cc6 100644 --- a/src/sagemaker/jumpstart/model.py +++ b/src/sagemaker/jumpstart/model.py @@ -449,7 +449,10 @@ def deployment_config(self) -> Optional[Dict[str, Any]]: Returns: Optional[Dict[str, Any]]: Deployment config that will be applied to the model. """ - return self._retrieve_selected_deployment_config(self.config_name, self.instance_type) + deployment_config = self._retrieve_selected_deployment_config( + self.config_name, self.instance_type + ) + return deployment_config.to_json() if deployment_config is not None else None @property def benchmark_metrics(self) -> pd.DataFrame: @@ -470,7 +473,11 @@ def list_deployment_configs(self) -> List[Dict[str, Any]]: Returns: List[Dict[str, Any]]: A list of deployment configs. """ - return self._get_deployment_configs(self.instance_type) + # Temp + return [ + deployment_config.to_json() + for deployment_config in self._get_deployment_configs(self.instance_type) + ] def _create_sagemaker_model( self, @@ -871,7 +878,7 @@ def _get_benchmarks_data(self) -> Dict[str, Any]: @lru_cache def _retrieve_selected_deployment_config( self, config_name: str, instance_type: str - ) -> Optional[Dict[str, Any]]: + ) -> Optional[DeploymentConfigMetadata]: """Retrieve the deployment config to apply to the model. Args: @@ -883,12 +890,14 @@ def _retrieve_selected_deployment_config( return None for deployment_config in self._get_deployment_configs(instance_type): - if deployment_config.get("DeploymentConfigName") == config_name: + if deployment_config.deployment_config_name == config_name: return deployment_config return None @lru_cache - def _get_deployment_configs(self, selected_instance_type: str) -> List[Dict[str, Any]]: + def _get_deployment_configs( + self, selected_instance_type: str + ) -> List[DeploymentConfigMetadata]: """Retrieve the deployment configs to apply to the model.""" deployment_configs = [] @@ -927,7 +936,7 @@ def _get_deployment_configs(self, selected_instance_type: str) -> List[Dict[str, init_kwargs, deploy_kwargs, ) - deployment_configs.append(deployment_config_metadata.to_json()) + deployment_configs.append(deployment_config_metadata) return deployment_configs diff --git a/src/sagemaker/jumpstart/types.py b/src/sagemaker/jumpstart/types.py index d5f72e718a..57d3da71e9 100644 --- a/src/sagemaker/jumpstart/types.py +++ b/src/sagemaker/jumpstart/types.py @@ -2265,9 +2265,7 @@ class DeploymentArgs(BaseDeploymentConfigDataHolder): "image_uri", "model_data", "environment", - "selected_instance_type", - "default_instance_type", - "supported_instance_types", + "instance_type", "compute_resource_requirements", "model_data_download_timeout", "container_startup_health_check_timeout", @@ -2283,7 +2281,7 @@ def __init__( if init_kwargs is not None: self.image_uri = init_kwargs.image_uri self.model_data = init_kwargs.model_data - self.selected_instance_type = init_kwargs.instance_type + self.instance_type = init_kwargs.instance_type self.default_instance_type = resolved_config.get("default_inference_instance_type") self.supported_instance_types = resolved_config.get( "supported_inference_instance_types" diff --git a/src/sagemaker/jumpstart/utils.py b/src/sagemaker/jumpstart/utils.py index 8868329f97..cdc4020e98 100644 --- a/src/sagemaker/jumpstart/utils.py +++ b/src/sagemaker/jumpstart/utils.py @@ -41,6 +41,7 @@ JumpStartModelHeader, JumpStartModelSpecs, JumpStartVersionedModelId, + DeploymentConfigMetadata, ) from sagemaker.session import Session from sagemaker.config import load_sagemaker_config @@ -1031,7 +1032,7 @@ def get_jumpstart_configs( def get_metrics_from_deployment_configs( - deployment_configs: List[Dict[str, Any]] + deployment_configs: List[DeploymentConfigMetadata], ) -> Dict[str, List[str]]: """Extracts metrics from deployment configs. @@ -1042,17 +1043,16 @@ def get_metrics_from_deployment_configs( data = {"Config Name": [], "Instance Type": []} for index, deployment_config in enumerate(deployment_configs): - if deployment_config.get("DeploymentArgs") is None: + if deployment_config.deployment_args is None: continue - benchmark_metrics = deployment_config.get("BenchmarkMetrics") + benchmark_metrics = deployment_config.benchmark_metrics for current_instance_type, current_instance_type_metrics in benchmark_metrics.items(): - data["Config Name"].append(deployment_config.get("DeploymentConfigName")) + data["Config Name"].append(deployment_config.deployment_config_name) instance_type_to_display = ( f"{current_instance_type} (Default)" - if current_instance_type - == deployment_config.get("DeploymentArgs").get("DefaultInstanceType") + if current_instance_type == deployment_config.deployment_args.default_instance_type else current_instance_type ) data["Instance Type"].append(instance_type_to_display) @@ -1060,16 +1060,18 @@ def get_metrics_from_deployment_configs( if index == 0: temp_data = {} for metric in current_instance_type_metrics: - column_name = f"{metric.get('name')} ({metric.get('unit')})" - if metric.get("name").lower() == "instance rate": + column_name = f"{metric.name} ({metric.unit})" + if metric.name.lower() == "instance rate": data[column_name] = [] else: temp_data[column_name] = [] data = {**data, **temp_data} for metric in current_instance_type_metrics: - column_name = f"{metric.get('name')} ({metric.get('unit')})" + column_name = f"{metric.name} ({metric.unit})" if column_name in data: - data[column_name].append(metric.get('value')) + for _ in range(index): + data[column_name].append(" - ") + data[column_name].append(metric.value) return data From 7b5c16159500eccefc1d4c8ea592f55061a21329 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Fri, 26 Apr 2024 20:09:39 -0700 Subject: [PATCH 07/19] Refactoring --- src/sagemaker/jumpstart/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sagemaker/jumpstart/utils.py b/src/sagemaker/jumpstart/utils.py index cdc4020e98..1de19b2f97 100644 --- a/src/sagemaker/jumpstart/utils.py +++ b/src/sagemaker/jumpstart/utils.py @@ -1070,7 +1070,7 @@ def get_metrics_from_deployment_configs( for metric in current_instance_type_metrics: column_name = f"{metric.name} ({metric.unit})" if column_name in data: - for _ in range(index): + for i in range(len(data[column_name]), index): data[column_name].append(" - ") data[column_name].append(metric.value) From 7fe7b45efdbb1193fb9c22e7cac8f4812eb30f2b Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Fri, 26 Apr 2024 20:17:59 -0700 Subject: [PATCH 08/19] Testing with NB --- src/sagemaker/jumpstart/model.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/sagemaker/jumpstart/model.py b/src/sagemaker/jumpstart/model.py index 3f48bb4cc6..c0acc0e084 100644 --- a/src/sagemaker/jumpstart/model.py +++ b/src/sagemaker/jumpstart/model.py @@ -905,6 +905,9 @@ def _get_deployment_configs( for config_name, metadata_config in self._metadata_configs.items(): benchmark_metrics = {} for instance_type, benchmark_metric in metadata_config.benchmark_metrics.items(): + if len(instance_type.split(".")) < 3: + instance_type = f"ml.{instance_type}" + if should_fetch_instance_rate_metric: instance_type_rate = get_instance_rate_per_hour( instance_type=instance_type, region=self.region From 3437483854ecd4c843f0ccd81e6f4e38cc71a8e7 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Fri, 26 Apr 2024 20:41:25 -0700 Subject: [PATCH 09/19] Debug --- src/sagemaker/jumpstart/model.py | 7 ++++++- src/sagemaker/jumpstart/utils.py | 4 +++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/sagemaker/jumpstart/model.py b/src/sagemaker/jumpstart/model.py index c0acc0e084..8494a26829 100644 --- a/src/sagemaker/jumpstart/model.py +++ b/src/sagemaker/jumpstart/model.py @@ -905,9 +905,14 @@ def _get_deployment_configs( for config_name, metadata_config in self._metadata_configs.items(): benchmark_metrics = {} for instance_type, benchmark_metric in metadata_config.benchmark_metrics.items(): - if len(instance_type.split(".")) < 3: + if not instance_type.startswith("ml."): instance_type = f"ml.{instance_type}" + for metric in benchmark_metric: + if metric.name.lower() == "instance rate": + should_fetch_instance_rate_metric = False + break + if should_fetch_instance_rate_metric: instance_type_rate = get_instance_rate_per_hour( instance_type=instance_type, region=self.region diff --git a/src/sagemaker/jumpstart/utils.py b/src/sagemaker/jumpstart/utils.py index 1de19b2f97..aa80498e25 100644 --- a/src/sagemaker/jumpstart/utils.py +++ b/src/sagemaker/jumpstart/utils.py @@ -1070,8 +1070,10 @@ def get_metrics_from_deployment_configs( for metric in current_instance_type_metrics: column_name = f"{metric.name} ({metric.unit})" if column_name in data: - for i in range(len(data[column_name]), index): + for _ in range(len(data[column_name]), index): data[column_name].append(" - ") data[column_name].append(metric.value) + print("**********************") + print(data) return data From cc5946c24395cd61f7ed258d243fbbdd593863c4 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Fri, 26 Apr 2024 20:46:31 -0700 Subject: [PATCH 10/19] Debug display API --- src/sagemaker/jumpstart/model.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/sagemaker/jumpstart/model.py b/src/sagemaker/jumpstart/model.py index 8494a26829..841532f8b0 100644 --- a/src/sagemaker/jumpstart/model.py +++ b/src/sagemaker/jumpstart/model.py @@ -908,10 +908,10 @@ def _get_deployment_configs( if not instance_type.startswith("ml."): instance_type = f"ml.{instance_type}" - for metric in benchmark_metric: - if metric.name.lower() == "instance rate": - should_fetch_instance_rate_metric = False - break + for metric in benchmark_metric: + if metric.name.lower() == "instance rate": + should_fetch_instance_rate_metric = False + break if should_fetch_instance_rate_metric: instance_type_rate = get_instance_rate_per_hour( From 0be9ceda7fbdf0271fb2c28e38f2d090d2f829e5 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Fri, 26 Apr 2024 21:06:33 -0700 Subject: [PATCH 11/19] Debug with NB --- src/sagemaker/jumpstart/utils.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/sagemaker/jumpstart/utils.py b/src/sagemaker/jumpstart/utils.py index aa80498e25..176c9bc6b3 100644 --- a/src/sagemaker/jumpstart/utils.py +++ b/src/sagemaker/jumpstart/utils.py @@ -1074,6 +1074,12 @@ def get_metrics_from_deployment_configs( data[column_name].append(" - ") data[column_name].append(metric.value) + # Todo: Temp + _len = len(data['Config Name']) + for key in data: + if len(data[key]) < _len: + for _ in range(len(data[key]), _len): + data[key].append(" - ") print("**********************") print(data) return data From c063a910f8547f7f52315dbb2608424ab8a9f795 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Fri, 26 Apr 2024 21:10:59 -0700 Subject: [PATCH 12/19] Testing with NB --- src/sagemaker/jumpstart/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sagemaker/jumpstart/utils.py b/src/sagemaker/jumpstart/utils.py index 176c9bc6b3..2d485a718f 100644 --- a/src/sagemaker/jumpstart/utils.py +++ b/src/sagemaker/jumpstart/utils.py @@ -1080,6 +1080,6 @@ def get_metrics_from_deployment_configs( if len(data[key]) < _len: for _ in range(len(data[key]), _len): data[key].append(" - ") - print("**********************") + print("*********Testing*************") print(data) return data From 6633954be9b07bb7458bc19709b33f3952de7c39 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Sat, 27 Apr 2024 03:19:43 -0700 Subject: [PATCH 13/19] Refactoring --- src/sagemaker/jumpstart/model.py | 85 +++++++---- src/sagemaker/jumpstart/types.py | 40 +++-- src/sagemaker/jumpstart/utils.py | 26 ++-- src/sagemaker/utils.py | 12 +- tests/unit/sagemaker/jumpstart/test_types.py | 39 +++++ tests/unit/sagemaker/jumpstart/test_utils.py | 60 ++------ tests/unit/sagemaker/jumpstart/utils.py | 33 +++++ tests/unit/test_utils.py | 146 ++++++++++++++++--- 8 files changed, 299 insertions(+), 142 deletions(-) diff --git a/src/sagemaker/jumpstart/model.py b/src/sagemaker/jumpstart/model.py index 841532f8b0..1353d73b9f 100644 --- a/src/sagemaker/jumpstart/model.py +++ b/src/sagemaker/jumpstart/model.py @@ -444,10 +444,10 @@ def set_deployment_config(self, config_name: str, instance_type: str) -> None: @property def deployment_config(self) -> Optional[Dict[str, Any]]: - """The deployment config that will be applied to the model. + """The deployment config that will be applied to ``This`` model. Returns: - Optional[Dict[str, Any]]: Deployment config that will be applied to the model. + Optional[Dict[str, Any]]: Deployment config. """ deployment_config = self._retrieve_selected_deployment_config( self.config_name, self.instance_type @@ -456,16 +456,18 @@ def deployment_config(self) -> Optional[Dict[str, Any]]: @property def benchmark_metrics(self) -> pd.DataFrame: - """Benchmark Metrics for deployment configs + """Benchmark Metrics for deployment configs. Returns: - Metrics: Pandas DataFrame object. + Benchmark Metrics: Pandas DataFrame object. """ - return pd.DataFrame(self._get_benchmarks_data()) + return pd.DataFrame( + self._get_deployment_configs_benchmarks_data(self.config_name, self.instance_type) + ) def display_benchmark_metrics(self) -> None: - """Display Benchmark Metrics for deployment configs.""" - print(self.benchmark_metrics.to_markdown(index=False, tablefmt="grid")) + """Display deployment configs benchmark metrics.""" + print(self.benchmark_metrics.to_markdown(index=False)) def list_deployment_configs(self) -> List[Dict[str, Any]]: """List deployment configs for ``This`` model. @@ -473,10 +475,11 @@ def list_deployment_configs(self) -> List[Dict[str, Any]]: Returns: List[Dict[str, Any]]: A list of deployment configs. """ - # Temp return [ deployment_config.to_json() - for deployment_config in self._get_deployment_configs(self.instance_type) + for deployment_config in self._get_deployment_configs( + self.config_name, self.instance_type + ) ] def _create_sagemaker_model( @@ -867,80 +870,100 @@ def register_deploy_wrapper(*args, **kwargs): return model_package - def _get_benchmarks_data(self) -> Dict[str, Any]: + @lru_cache + def _get_deployment_configs_benchmarks_data( + self, config_name: str, instance_type: str + ) -> Dict[str, Any]: """Deployment configs benchmark metrics. + Args: + config_name (str): Name of selected deployment config. + instance_type (str): The selected Instance type. Returns: Dict[str, List[str]]: Deployment config benchmark data. """ - return get_metrics_from_deployment_configs(self._get_deployment_configs(self.instance_type)) + return get_metrics_from_deployment_configs( + self._get_deployment_configs(config_name, instance_type) + ) @lru_cache def _retrieve_selected_deployment_config( self, config_name: str, instance_type: str ) -> Optional[DeploymentConfigMetadata]: - """Retrieve the deployment config to apply to the model. + """Retrieve the deployment config to apply to `This` model. Args: config_name (str): The name of the deployment config to retrieve. + instance_type (str): The instance type of the deployment config to retrieve. Returns: Optional[Dict[str, Any]]: The retrieved deployment config. """ if config_name is None: return None - for deployment_config in self._get_deployment_configs(instance_type): + for deployment_config in self._get_deployment_configs(config_name, instance_type): if deployment_config.deployment_config_name == config_name: return deployment_config return None @lru_cache def _get_deployment_configs( - self, selected_instance_type: str + self, selected_config_name: str, selected_instance_type: str ) -> List[DeploymentConfigMetadata]: - """Retrieve the deployment configs to apply to the model.""" + """Retrieve deployment configs metadata. + + Args: + selected_config_name (str): The name of the selected deployment config. + selected_instance_type (str): The selected instance type. + """ deployment_configs = [] - should_fetch_instance_rate_metric = True + should_fetch_instance_rate_metric_stat = True for config_name, metadata_config in self._metadata_configs.items(): - benchmark_metrics = {} - for instance_type, benchmark_metric in metadata_config.benchmark_metrics.items(): + final_benchmark_metrics = {} + for instance_type, benchmark_metrics in metadata_config.benchmark_metrics.items(): if not instance_type.startswith("ml."): instance_type = f"ml.{instance_type}" - for metric in benchmark_metric: - if metric.name.lower() == "instance rate": - should_fetch_instance_rate_metric = False + for benchmark_metric_stat in benchmark_metrics: + if benchmark_metric_stat.name.lower() == "instance rate": + should_fetch_instance_rate_metric_stat = False break - if should_fetch_instance_rate_metric: + if should_fetch_instance_rate_metric_stat: instance_type_rate = get_instance_rate_per_hour( instance_type=instance_type, region=self.region ) if instance_type_rate is None: - should_fetch_instance_rate_metric = False - benchmark_metrics[instance_type] = benchmark_metric + should_fetch_instance_rate_metric_stat = False + final_benchmark_metrics[instance_type] = benchmark_metrics else: - benchmark_metric.append(JumpStartBenchmarkStat(instance_type_rate)) - benchmark_metrics[instance_type] = benchmark_metric + benchmark_metrics.append(JumpStartBenchmarkStat(instance_type_rate)) + final_benchmark_metrics[instance_type] = benchmark_metrics else: - benchmark_metrics[instance_type] = benchmark_metric + final_benchmark_metrics[instance_type] = benchmark_metrics + + resolved_config = metadata_config.resolved_config + if selected_config_name == config_name: + instance_type_to_use = selected_instance_type + else: + instance_type_to_use = resolved_config.get("default_inference_instance_type") init_kwargs = get_init_kwargs( model_id=self.model_id, - instance_type=selected_instance_type, + instance_type=instance_type_to_use, sagemaker_session=self.sagemaker_session, ) deploy_kwargs = get_deploy_kwargs( model_id=self.model_id, - instance_type=selected_instance_type, + instance_type=instance_type_to_use, sagemaker_session=self.sagemaker_session, ) deployment_config_metadata = DeploymentConfigMetadata( config_name, - benchmark_metrics, - metadata_config.resolved_config, + final_benchmark_metrics, + resolved_config, init_kwargs, deploy_kwargs, ) diff --git a/src/sagemaker/jumpstart/types.py b/src/sagemaker/jumpstart/types.py index 57d3da71e9..e0a0f9bea7 100644 --- a/src/sagemaker/jumpstart/types.py +++ b/src/sagemaker/jumpstart/types.py @@ -2239,7 +2239,13 @@ def to_json(self) -> Dict[str, Any]: return json_obj def _val_to_json(self, val: Any) -> Any: - """Converts ``Any`` to JSON.""" + """Converts the given value to JSON. + + Args: + val (Any): The value to convert. + Returns: + Any: The converted json value. + """ if issubclass(type(val), JumpStartDataHolderType): return val.to_json() if isinstance(val, list): @@ -2259,7 +2265,7 @@ def _val_to_json(self, val: Any) -> Any: class DeploymentArgs(BaseDeploymentConfigDataHolder): - """Dataclass representing a Deployment Config.""" + """Dataclass representing a Deployment Args.""" __slots__ = [ "image_uri", @@ -2273,19 +2279,15 @@ class DeploymentArgs(BaseDeploymentConfigDataHolder): def __init__( self, - init_kwargs: JumpStartModelInitKwargs, - deploy_kwargs: JumpStartModelDeployKwargs, - resolved_config: Dict[str, Any], + init_kwargs: Optional[JumpStartModelInitKwargs] = None, + deploy_kwargs: Optional[JumpStartModelDeployKwargs] = None, + resolved_config: Optional[Dict[str, Any]] = None, ): - """Instantiates DeploymentConfig object.""" + """Instantiates DeploymentArgs object.""" if init_kwargs is not None: self.image_uri = init_kwargs.image_uri self.model_data = init_kwargs.model_data self.instance_type = init_kwargs.instance_type - self.default_instance_type = resolved_config.get("default_inference_instance_type") - self.supported_instance_types = resolved_config.get( - "supported_inference_instance_types" - ) self.environment = init_kwargs.env if init_kwargs.resources is not None: self.compute_resource_requirements = ( @@ -2296,6 +2298,11 @@ def __init__( self.container_startup_health_check_timeout = ( deploy_kwargs.container_startup_health_check_timeout ) + if resolved_config is not None: + self.default_instance_type = resolved_config.get("default_inference_instance_type") + self.supported_instance_types = resolved_config.get( + "supported_inference_instance_types" + ) class DeploymentConfigMetadata(BaseDeploymentConfigDataHolder): @@ -2310,14 +2317,15 @@ class DeploymentConfigMetadata(BaseDeploymentConfigDataHolder): def __init__( self, - config_name: str, - benchmark_metrics: Dict[str, List[JumpStartBenchmarkStat]], - resolved_config: Dict[str, Any], - init_kwargs: JumpStartModelInitKwargs, - deploy_kwargs: JumpStartModelDeployKwargs, + config_name: Optional[str] = None, + benchmark_metrics: Optional[Dict[str, List[JumpStartBenchmarkStat]]] = None, + resolved_config: Optional[Dict[str, Any]] = None, + init_kwargs: Optional[JumpStartModelInitKwargs] = None, + deploy_kwargs: Optional[JumpStartModelDeployKwargs] = None, ): """Instantiates DeploymentConfigMetadata object.""" self.deployment_config_name = config_name self.deployment_args = DeploymentArgs(init_kwargs, deploy_kwargs, resolved_config) - self.acceleration_configs = resolved_config.get("acceleration_configs") self.benchmark_metrics = benchmark_metrics + if resolved_config is not None: + self.acceleration_configs = resolved_config.get("acceleration_configs") diff --git a/src/sagemaker/jumpstart/utils.py b/src/sagemaker/jumpstart/utils.py index 2d485a718f..04e6bb8ef7 100644 --- a/src/sagemaker/jumpstart/utils.py +++ b/src/sagemaker/jumpstart/utils.py @@ -1034,20 +1034,23 @@ def get_jumpstart_configs( def get_metrics_from_deployment_configs( deployment_configs: List[DeploymentConfigMetadata], ) -> Dict[str, List[str]]: - """Extracts metrics from deployment configs. + """Extracts benchmark metrics from deployment configs metadata. Args: - deployment_configs (list[dict[str, Any]]): List of deployment configs. + deployment_configs (List[DeploymentConfigMetadata]): List of deployment configs metadata. """ - data = {"Config Name": [], "Instance Type": []} - for index, deployment_config in enumerate(deployment_configs): + for outer_index, deployment_config in enumerate(deployment_configs): if deployment_config.deployment_args is None: continue benchmark_metrics = deployment_config.benchmark_metrics - for current_instance_type, current_instance_type_metrics in benchmark_metrics.items(): + if benchmark_metrics is None: + continue + + for inner_index, current_instance_type in enumerate(benchmark_metrics): + current_instance_type_metrics = benchmark_metrics[current_instance_type] data["Config Name"].append(deployment_config.deployment_config_name) instance_type_to_display = ( @@ -1057,7 +1060,7 @@ def get_metrics_from_deployment_configs( ) data["Instance Type"].append(instance_type_to_display) - if index == 0: + if outer_index == 0 and inner_index == 0: temp_data = {} for metric in current_instance_type_metrics: column_name = f"{metric.name} ({metric.unit})" @@ -1070,16 +1073,5 @@ def get_metrics_from_deployment_configs( for metric in current_instance_type_metrics: column_name = f"{metric.name} ({metric.unit})" if column_name in data: - for _ in range(len(data[column_name]), index): - data[column_name].append(" - ") data[column_name].append(metric.value) - - # Todo: Temp - _len = len(data['Config Name']) - for key in data: - if len(data[key]) < _len: - for _ in range(len(data[key]), _len): - data[key].append(" - ") - print("*********Testing*************") - print(data) return data diff --git a/src/sagemaker/utils.py b/src/sagemaker/utils.py index 0e7ea1095b..abfbd27655 100644 --- a/src/sagemaker/utils.py +++ b/src/sagemaker/utils.py @@ -37,7 +37,6 @@ import boto3 import botocore -from botocore.exceptions import ClientError from botocore.utils import merge_dicts from six.moves.urllib import parse import pandas as pd @@ -1673,7 +1672,7 @@ def get_instance_rate_per_hour( region (str): The region. Returns: Optional[Dict[str, str]]: Instance rate per hour. - Example: {'name': 'Instance Rate', 'unit': 'USD/Hrs', 'value': '1.1250000000'}}. + Example: {'name': 'Instance Rate', 'unit': 'USD/Hrs', 'value': '1.125'}. """ error_message = "Instance rate metrics will be omitted. Reason: %s" @@ -1702,8 +1701,8 @@ def get_instance_rate_per_hour( return extract_instance_rate_per_hour(price_data) return None - except ClientError as e: - logger.warning(error_message, e.response["Error"]) + except botocore.exceptions.ClientError as e: + logger.warning(error_message, e.response["Error"]["Message"]) return None except Exception: # pylint: disable=W0703 logger.warning(error_message, "Something went wrong while getting instance rates.") @@ -1724,9 +1723,12 @@ def extract_instance_rate_per_hour(price_data: Dict[str, Any]) -> Optional[Dict[ for dimension in price_dimensions: for price in dimension.get("priceDimensions", {}).values(): for currency in price.get("pricePerUnit", {}).keys(): + value = price.get("pricePerUnit", {}).get(currency) + if value is not None: + value = str(round(float(value), 3)) return { "unit": f"{currency}/{price.get('unit', 'Hrs')}", - "value": price.get("pricePerUnit", {}).get(currency), + "value": value, "name": "Instance Rate", } return None diff --git a/tests/unit/sagemaker/jumpstart/test_types.py b/tests/unit/sagemaker/jumpstart/test_types.py index 5ca01c3c52..c52bf76f4e 100644 --- a/tests/unit/sagemaker/jumpstart/test_types.py +++ b/tests/unit/sagemaker/jumpstart/test_types.py @@ -22,6 +22,8 @@ JumpStartModelSpecs, JumpStartModelHeader, JumpStartConfigComponent, + DeploymentConfigMetadata, + JumpStartModelInitKwargs, ) from tests.unit.sagemaker.jumpstart.constants import ( BASE_SPEC, @@ -29,6 +31,7 @@ INFERENCE_CONFIGS, TRAINING_CONFIG_RANKINGS, TRAINING_CONFIGS, + INIT_KWARGS, ) INSTANCE_TYPE_VARIANT = JumpStartInstanceTypeVariants( @@ -1248,3 +1251,39 @@ def test_set_training_config(): with pytest.raises(ValueError) as error: specs1.set_config("invalid_name", scope="unknown scope") + + +def test_deployment_config_metadata(): + spec = {**BASE_SPEC, **INFERENCE_CONFIGS, **INFERENCE_CONFIG_RANKINGS} + specs = JumpStartModelSpecs(spec) + jumpstart_config = specs.inference_configs.get_top_config_from_ranking() + + deployment_config_metadata = DeploymentConfigMetadata( + jumpstart_config.config_name, + jumpstart_config.benchmark_metrics, + jumpstart_config.resolved_config, + JumpStartModelInitKwargs( + model_id=specs.model_id, + model_data=INIT_KWARGS.get("model_data"), + image_uri=INIT_KWARGS.get("image_uri"), + instance_type=INIT_KWARGS.get("instance_type"), + env=INIT_KWARGS.get("env"), + config_name=jumpstart_config.config_name, + ), + ) + + json_obj = deployment_config_metadata.to_json() + + assert isinstance(json_obj, dict) + assert json_obj["DeploymentConfigName"] == jumpstart_config.config_name + for key in json_obj["BenchmarkMetrics"]: + assert len(json_obj["BenchmarkMetrics"][key]) == len( + jumpstart_config.benchmark_metrics.get(key) + ) + assert json_obj["AccelerationConfigs"] == jumpstart_config.resolved_config.get( + "acceleration_configs" + ) + assert json_obj["DeploymentArgs"]["ImageUri"] == INIT_KWARGS.get("image_uri") + assert json_obj["DeploymentArgs"]["ModelData"] == INIT_KWARGS.get("model_data") + assert json_obj["DeploymentArgs"]["Environment"] == INIT_KWARGS.get("env") + assert json_obj["DeploymentArgs"]["InstanceType"] == INIT_KWARGS.get("instance_type") diff --git a/tests/unit/sagemaker/jumpstart/test_utils.py b/tests/unit/sagemaker/jumpstart/test_utils.py index f576e36185..05ecc240e8 100644 --- a/tests/unit/sagemaker/jumpstart/test_utils.py +++ b/tests/unit/sagemaker/jumpstart/test_utils.py @@ -49,8 +49,7 @@ get_spec_from_base_spec, get_special_model_spec, get_prototype_manifest, - get_base_deployment_configs, - get_base_deployment_configs_with_acceleration_configs, + get_base_deployment_configs_metadata, ) from mock import MagicMock @@ -1763,53 +1762,12 @@ def test_get_jumpstart_benchmark_stats_training( } -@pytest.mark.parametrize( - "config_name, configs, expected", - [ - ( - None, - get_base_deployment_configs(), - { - "Config Name": [ - "neuron-inference", - "neuron-inference-budget", - "gpu-inference-budget", - "gpu-inference", - ], - "Instance Type": ["ml.p2.xlarge", "ml.p2.xlarge", "ml.p2.xlarge", "ml.p2.xlarge"], - "Selected": ["No", "No", "No", "No"], - "Instance Rate (USD/Hrs)": [ - "0.0083000000", - "0.0083000000", - "0.0083000000", - "0.0083000000", - ], - }, - ), - ( - "neuron-inference", - get_base_deployment_configs_with_acceleration_configs(), - { - "Config Name": [ - "neuron-inference", - "neuron-inference-budget", - "gpu-inference-budget", - "gpu-inference", - ], - "Instance Type": ["ml.p2.xlarge", "ml.p2.xlarge", "ml.p2.xlarge", "ml.p2.xlarge"], - "Selected": ["Yes", "No", "No", "No"], - "Accelerated": ["Yes", "No", "No", "No"], - "Instance Rate (USD/Hrs)": [ - "0.0083000000", - "0.0083000000", - "0.0083000000", - "0.0083000000", - ], - }, - ), - ], -) -def test_extract_metrics_from_deployment_configs(config_name, configs, expected): - data = utils.get_metrics_from_deployment_configs(configs, config_name) +def test_extract_metrics_from_deployment_configs(): + configs = get_base_deployment_configs_metadata() + configs[0].benchmark_metrics = None + configs[2].deployment_args = None + + data = utils.get_metrics_from_deployment_configs(configs) - assert data == expected + for key in data: + assert len(data[key]) == (len(configs) - 2) diff --git a/tests/unit/sagemaker/jumpstart/utils.py b/tests/unit/sagemaker/jumpstart/utils.py index 8b814c3d71..672a310fc0 100644 --- a/tests/unit/sagemaker/jumpstart/utils.py +++ b/tests/unit/sagemaker/jumpstart/utils.py @@ -29,6 +29,9 @@ JumpStartS3FileType, JumpStartModelHeader, JumpStartModelInitKwargs, + DeploymentConfigMetadata, + JumpStartModelDeployKwargs, + JumpStartBenchmarkStat, ) from sagemaker.jumpstart.enums import JumpStartModelType from sagemaker.jumpstart.utils import get_formatted_manifest @@ -348,3 +351,33 @@ def get_mock_init_kwargs( resources=ResourceRequirements(), config_name=config_name, ) + + +def get_base_deployment_configs_metadata() -> List[DeploymentConfigMetadata]: + configs = [] + for ( + config_name, + jumpstart_config, + ) in get_base_spec_with_prototype_configs().inference_configs.configs.items(): + benchmark_metrics = jumpstart_config.benchmark_metrics + for instance_type in benchmark_metrics: + benchmark_metrics[instance_type].append( + JumpStartBenchmarkStat( + {"name": "Instance Rate", "unit": "USD/Hrs", "value": "3.76"} + ) + ) + + configs.append( + DeploymentConfigMetadata( + config_name=config_name, + benchmark_metrics=jumpstart_config.benchmark_metrics, + resolved_config=jumpstart_config.resolved_config, + init_kwargs=get_mock_init_kwargs( + get_base_spec_with_prototype_configs().model_id, config_name + ), + deploy_kwargs=JumpStartModelDeployKwargs( + model_id=get_base_spec_with_prototype_configs().model_id, + ), + ) + ) + return configs diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index bf6a7cb09f..41f2b1d964 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -27,6 +27,7 @@ from boto3 import exceptions import botocore import pytest +from botocore.exceptions import ClientError from mock import call, patch, Mock, MagicMock, PropertyMock import sagemaker @@ -1871,42 +1872,143 @@ def test_deep_override_skip_keys(self): @pytest.mark.parametrize( - "instance, region", + "instance, region, amazon_sagemaker_price_result, expected", [ - ("t4g.nano", "us-west-2"), - ("t4g.nano", "eu-central-1"), - ("t4g.nano", "af-south-1"), - ("t4g.nano", "ap-northeast-2"), - ("t4g.nano", "cn-north-1"), + ( + "ml.t4g.nano", + "us-west-2", + { + "PriceList": [ + { + "terms": { + "OnDemand": { + "3WK7G7WSYVS3K492.JRTCKXETXF": { + "priceDimensions": { + "3WK7G7WSYVS3K492.JRTCKXETXF.6YS6EN2CT7": { + "unit": "Hrs", + "endRange": "Inf", + "description": "$0.9 per Unused Reservation Linux p2.xlarge Instance Hour", + "appliesTo": [], + "rateCode": "3WK7G7WSYVS3K492.JRTCKXETXF.6YS6EN2CT7", + "beginRange": "0", + "pricePerUnit": {"USD": "0.9000000000"}, + } + } + } + } + }, + } + ] + }, + {"name": "Instance Rate", "unit": "USD/Hrs", "value": "0.9"}, + ), + ( + "ml.t4g.nano", + "eu-central-1", + { + "PriceList": [ + '{"terms": {"OnDemand": {"22VNQ3N6GZGZMXYM.JRTCKXETXF": {"priceDimensions":{' + '"22VNQ3N6GZGZMXYM.JRTCKXETXF.6YS6EN2CT7": {"unit": "Hrs", "endRange": "Inf", "description": ' + '"$0.0083 per' + "On" + 'Demand Ubuntu Pro t4g.nano Instance Hour", "appliesTo": [], "rateCode": ' + '"22VNQ3N6GZGZMXYM.JRTCKXETXF.6YS6EN2CT7", "beginRange": "0", "pricePerUnit":{"USD": ' + '"0.0083000000"}}},' + '"sku": "22VNQ3N6GZGZMXYM", "effectiveDate": "2024-04-01T00:00:00Z", "offerTermCode": "JRTCKXETXF",' + '"termAttributes": {}}}}}' + ] + }, + {"name": "Instance Rate", "unit": "USD/Hrs", "value": "0.008"}, + ), + ( + "ml.t4g.nano", + "af-south-1", + { + "PriceList": [ + '{"terms": {"OnDemand": {"22VNQ3N6GZGZMXYM.JRTCKXETXF": {"priceDimensions":{' + '"22VNQ3N6GZGZMXYM.JRTCKXETXF.6YS6EN2CT7": {"unit": "Hrs", "endRange": "Inf", "description": ' + '"$0.0083 per' + "On" + 'Demand Ubuntu Pro t4g.nano Instance Hour", "appliesTo": [], "rateCode": ' + '"22VNQ3N6GZGZMXYM.JRTCKXETXF.6YS6EN2CT7", "beginRange": "0", "pricePerUnit":{"USD": ' + '"0.0083000000"}}},' + '"sku": "22VNQ3N6GZGZMXYM", "effectiveDate": "2024-04-01T00:00:00Z", "offerTermCode": "JRTCKXETXF",' + '"termAttributes": {}}}}}' + ] + }, + {"name": "Instance Rate", "unit": "USD/Hrs", "value": "0.008"}, + ), + ( + "ml.t4g.nano", + "ap-northeast-2", + { + "PriceList": [ + '{"terms": {"OnDemand": {"22VNQ3N6GZGZMXYM.JRTCKXETXF": {"priceDimensions":{' + '"22VNQ3N6GZGZMXYM.JRTCKXETXF.6YS6EN2CT7": {"unit": "Hrs", "endRange": "Inf", "description": ' + '"$0.0083 per' + "On" + 'Demand Ubuntu Pro t4g.nano Instance Hour", "appliesTo": [], "rateCode": ' + '"22VNQ3N6GZGZMXYM.JRTCKXETXF.6YS6EN2CT7", "beginRange": "0", "pricePerUnit":{"USD": ' + '"0.0083000000"}}},' + '"sku": "22VNQ3N6GZGZMXYM", "effectiveDate": "2024-04-01T00:00:00Z", "offerTermCode": "JRTCKXETXF",' + '"termAttributes": {}}}}}' + ] + }, + {"name": "Instance Rate", "unit": "USD/Hrs", "value": "0.008"}, + ), + ( + "ml.t4g.nano", + "cn-north-1", + {"PriceList": []}, + None, + ), ], ) @patch("boto3.client") -def test_get_instance_rate_per_hour(mock_client, instance, region): - amazon_sagemaker_price_result = { - "PriceList": [ - '{"terms": {"OnDemand": {"22VNQ3N6GZGZMXYM.JRTCKXETXF": {"priceDimensions":{' - '"22VNQ3N6GZGZMXYM.JRTCKXETXF.6YS6EN2CT7": {"unit": "Hrs", "endRange": "Inf", "description": "$0.0083 per ' - "On" - 'Demand Ubuntu Pro t4g.nano Instance Hour", "appliesTo": [], "rateCode": ' - '"22VNQ3N6GZGZMXYM.JRTCKXETXF.6YS6EN2CT7", "beginRange": "0", "pricePerUnit":{"USD": "0.0083000000"}}}, ' - '"sku": "22VNQ3N6GZGZMXYM", "effectiveDate": "2024-04-01T00:00:00Z", "offerTermCode": "JRTCKXETXF", ' - '"termAttributes": {}}}}}' - ] - } +def test_get_instance_rate_per_hour( + mock_client, instance, region, amazon_sagemaker_price_result, expected +): mock_client.return_value.get_products.side_effect = ( lambda *args, **kwargs: amazon_sagemaker_price_result ) instance_rate = get_instance_rate_per_hour(instance_type=instance, region=region) - assert instance_rate == {"name": "Instance Rate", "unit": "USD/Hrs", "value": "0.0083000000"} + assert instance_rate == expected + + +@patch("sagemaker.utils.logger") +@patch("boto3.client") +def test_get_instance_rate_per_hour_client_ex(mock_client, mock_logger): + err_msg = ( + "User: arn:aws:sts::123456789123:assumed-role/AmazonSageMaker-ExecutionRole-20230707T131628/SageMaker " + "is not authorized to perform: pricing:GetProducts because no identity-based policy allows the " + "pricing:GetProducts action" + ) + mock_client.return_value.get_products.side_effect = ClientError( + {"Error": {"Message": err_msg, "Code": "AccessDeniedException"}}, + "GetProducts", + ) + + instance_rate = get_instance_rate_per_hour(instance_type="ml.t4g.nano", region="us-west-2") + + mock_logger.warning.assert_called_with( + "Instance rate metrics will be omitted. Reason: %s", err_msg + ) + assert instance_rate is None +@patch("sagemaker.utils.logger") @patch("boto3.client") -def test_get_instance_rate_per_hour_ex(mock_client): - mock_client.return_value.get_products.side_effect = lambda *args, **kwargs: Exception() +def test_get_instance_rate_per_hour_ex(mock_client, mock_logger): + mock_client.return_value.get_products.side_effect = Exception() + instance_rate = get_instance_rate_per_hour(instance_type="ml.t4g.nano", region="us-west-2") + mock_logger.warning.assert_called_with( + "Instance rate metrics will be omitted. Reason: %s", + "Something went wrong while getting instance rates.", + ) assert instance_rate is None @@ -1934,7 +2036,7 @@ def test_get_instance_rate_per_hour_ex(mock_client): } } }, - {"name": "Instance Rate", "unit": "USD/Hrs", "value": "0.9000000000"}, + {"name": "Instance Rate", "unit": "USD/Hrs", "value": "0.9"}, ), ], ) From 38049622002e29376a288b819fb305a93ea3c43d Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Sat, 27 Apr 2024 12:34:18 -0700 Subject: [PATCH 14/19] Refactoring --- src/sagemaker/jumpstart/model.py | 65 +++++++---- src/sagemaker/jumpstart/utils.py | 4 +- .../serve/builder/jumpstart_builder.py | 13 ++- src/sagemaker/utils.py | 43 +++---- .../sagemaker/jumpstart/model/test_model.py | 110 ++++++++---------- tests/unit/sagemaker/jumpstart/utils.py | 38 +++--- .../serve/builder/test_js_builder.py | 8 +- tests/unit/test_utils.py | 53 ++++----- 8 files changed, 168 insertions(+), 166 deletions(-) diff --git a/src/sagemaker/jumpstart/model.py b/src/sagemaker/jumpstart/model.py index 1353d73b9f..f85984950b 100644 --- a/src/sagemaker/jumpstart/model.py +++ b/src/sagemaker/jumpstart/model.py @@ -461,9 +461,12 @@ def benchmark_metrics(self) -> pd.DataFrame: Returns: Benchmark Metrics: Pandas DataFrame object. """ - return pd.DataFrame( - self._get_deployment_configs_benchmarks_data(self.config_name, self.instance_type) + benchmark_metrics_data = self._get_deployment_configs_benchmarks_data( + self.config_name, self.instance_type ) + keys = list(benchmark_metrics_data.keys()) + df = pd.DataFrame(benchmark_metrics_data).sort_values(by=[keys[0], keys[1]]) + return df def display_benchmark_metrics(self) -> None: """Display deployment configs benchmark metrics.""" @@ -917,32 +920,44 @@ def _get_deployment_configs( selected_instance_type (str): The selected instance type. """ deployment_configs = [] + if self._metadata_configs is None: + return deployment_configs should_fetch_instance_rate_metric_stat = True + err_message = None + for config_name, metadata_config in self._metadata_configs.items(): - final_benchmark_metrics = {} - for instance_type, benchmark_metrics in metadata_config.benchmark_metrics.items(): - if not instance_type.startswith("ml."): - instance_type = f"ml.{instance_type}" - - for benchmark_metric_stat in benchmark_metrics: - if benchmark_metric_stat.name.lower() == "instance rate": - should_fetch_instance_rate_metric_stat = False - break - - if should_fetch_instance_rate_metric_stat: - instance_type_rate = get_instance_rate_per_hour( - instance_type=instance_type, region=self.region - ) - - if instance_type_rate is None: - should_fetch_instance_rate_metric_stat = False - final_benchmark_metrics[instance_type] = benchmark_metrics + final_benchmark_metrics = None + + if metadata_config.benchmark_metrics is not None: + final_benchmark_metrics = {} + for instance_type, benchmark_metrics in metadata_config.benchmark_metrics.items(): + if not instance_type.startswith("ml."): + instance_type = f"ml.{instance_type}" + + if benchmark_metrics is None: + should_fetch_instance_rate_metric_stat = True + else: + for benchmark_metric_stat in benchmark_metrics: + if benchmark_metric_stat.name.lower() == "instance rate": + should_fetch_instance_rate_metric_stat = False + break + + if should_fetch_instance_rate_metric_stat: + try: + instance_type_rate = get_instance_rate_per_hour( + instance_type=instance_type, region=self.region + ) + + benchmark_metrics.append(JumpStartBenchmarkStat(instance_type_rate)) + final_benchmark_metrics[instance_type] = benchmark_metrics + except ClientError as e: + err_message = e.response["Error"]["Message"] + should_fetch_instance_rate_metric_stat = False + except Exception: # pylint: disable=W0703 + should_fetch_instance_rate_metric_stat = True else: - benchmark_metrics.append(JumpStartBenchmarkStat(instance_type_rate)) final_benchmark_metrics[instance_type] = benchmark_metrics - else: - final_benchmark_metrics[instance_type] = benchmark_metrics resolved_config = metadata_config.resolved_config if selected_config_name == config_name: @@ -969,6 +984,10 @@ def _get_deployment_configs( ) deployment_configs.append(deployment_config_metadata) + if err_message is not None: + error_message = "Instance rate metrics will be omitted. Reason: %s" + JUMPSTART_LOGGER.warning(error_message, err_message) + return deployment_configs def __str__(self) -> str: diff --git a/src/sagemaker/jumpstart/utils.py b/src/sagemaker/jumpstart/utils.py index 04e6bb8ef7..1025931d73 100644 --- a/src/sagemaker/jumpstart/utils.py +++ b/src/sagemaker/jumpstart/utils.py @@ -1063,7 +1063,7 @@ def get_metrics_from_deployment_configs( if outer_index == 0 and inner_index == 0: temp_data = {} for metric in current_instance_type_metrics: - column_name = f"{metric.name} ({metric.unit})" + column_name = f"{metric.name.replace('_', ' ').title()} ({metric.unit})" if metric.name.lower() == "instance rate": data[column_name] = [] else: @@ -1071,7 +1071,7 @@ def get_metrics_from_deployment_configs( data = {**data, **temp_data} for metric in current_instance_type_metrics: - column_name = f"{metric.name} ({metric.unit})" + column_name = f"{metric.name.replace('_', ' ').title()} ({metric.unit})" if column_name in data: data[column_name].append(metric.value) return data diff --git a/src/sagemaker/serve/builder/jumpstart_builder.py b/src/sagemaker/serve/builder/jumpstart_builder.py index d3c2581885..ec987dd9fe 100644 --- a/src/sagemaker/serve/builder/jumpstart_builder.py +++ b/src/sagemaker/serve/builder/jumpstart_builder.py @@ -431,18 +431,21 @@ def tune_for_tgi_jumpstart(self, max_tuning_duration: int = 1800): sharded_supported=sharded_supported, max_tuning_duration=max_tuning_duration ) - def set_deployment_config(self, config_name: Optional[str]) -> None: + def set_deployment_config(self, config_name: str, instance_type: str) -> None: """Sets the deployment config to apply to the model. Args: - config_name (Optional[str]): - The name of the deployment config. Set to None to unset - any existing config that is applied to the model. + config_name (str): + The name of the deployment config to apply to the model. + Call list_deployment_configs to see the list of config names. + instance_type (str): + The instance_type that the model will use after setting + the config. """ if not hasattr(self, "pysdk_model") or self.pysdk_model is None: raise Exception("Cannot set deployment config to an uninitialized model.") - self.pysdk_model.set_deployment_config(config_name) + self.pysdk_model.set_deployment_config(config_name, instance_type) def get_deployment_config(self) -> Optional[Dict[str, Any]]: """Gets the deployment config to apply to the model. diff --git a/src/sagemaker/utils.py b/src/sagemaker/utils.py index abfbd27655..68e6b9543a 100644 --- a/src/sagemaker/utils.py +++ b/src/sagemaker/utils.py @@ -1673,9 +1673,11 @@ def get_instance_rate_per_hour( Returns: Optional[Dict[str, str]]: Instance rate per hour. Example: {'name': 'Instance Rate', 'unit': 'USD/Hrs', 'value': '1.125'}. + Raises: + Exception: An exception is raised if + the IAM role is not authorized to perform pricing:GetProducts. + or unexpected event happened. """ - error_message = "Instance rate metrics will be omitted. Reason: %s" - region_name = "us-east-1" if region.startswith("eu") or region.startswith("af"): region_name = "eu-central-1" @@ -1683,30 +1685,23 @@ def get_instance_rate_per_hour( region_name = "ap-south-1" pricing_client: boto3.client = boto3.client("pricing", region_name=region_name) - try: - res = pricing_client.get_products( - ServiceCode="AmazonSageMaker", - Filters=[ - {"Type": "TERM_MATCH", "Field": "instanceName", "Value": instance_type}, - {"Type": "TERM_MATCH", "Field": "locationType", "Value": "AWS Region"}, - {"Type": "TERM_MATCH", "Field": "regionCode", "Value": region}, - ], - ) + res = pricing_client.get_products( + ServiceCode="AmazonSageMaker", + Filters=[ + {"Type": "TERM_MATCH", "Field": "instanceName", "Value": instance_type}, + {"Type": "TERM_MATCH", "Field": "locationType", "Value": "AWS Region"}, + {"Type": "TERM_MATCH", "Field": "regionCode", "Value": region}, + ], + ) - price_list = res.get("PriceList", []) - if len(price_list) > 0: - price_data = price_list[0] - if isinstance(price_data, str): - price_data = json.loads(price_data) + price_list = res.get("PriceList", []) + if len(price_list) > 0: + price_data = price_list[0] + if isinstance(price_data, str): + price_data = json.loads(price_data) + return extract_instance_rate_per_hour(price_data) - return extract_instance_rate_per_hour(price_data) - return None - except botocore.exceptions.ClientError as e: - logger.warning(error_message, e.response["Error"]["Message"]) - return None - except Exception: # pylint: disable=W0703 - logger.warning(error_message, "Something went wrong while getting instance rates.") - return None + raise Exception(f"Unable to get instance rate per hour for instance type: {instance_type}.") def extract_instance_rate_per_hour(price_data: Dict[str, Any]) -> Optional[Dict[str, str]]: diff --git a/tests/unit/sagemaker/jumpstart/model/test_model.py b/tests/unit/sagemaker/jumpstart/model/test_model.py index 5bbc31a5b1..cfad1dc48f 100644 --- a/tests/unit/sagemaker/jumpstart/model/test_model.py +++ b/tests/unit/sagemaker/jumpstart/model/test_model.py @@ -15,7 +15,6 @@ from typing import Optional, Set from unittest import mock import unittest -import pandas as pd from mock import MagicMock, Mock import pytest from sagemaker.async_inference.async_inference_config import AsyncInferenceConfig @@ -66,7 +65,6 @@ class ModelTest(unittest.TestCase): - mock_session_empty_config = MagicMock(sagemaker_config={}) @mock.patch( @@ -1718,11 +1716,9 @@ def test_model_set_deployment_config_incompatible_instance_type_or_name( @mock.patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor._get_manifest") @mock.patch("sagemaker.jumpstart.factory.model.Session") @mock.patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor.get_model_specs") - @mock.patch("sagemaker.jumpstart.model.Model.deploy") @mock.patch("sagemaker.jumpstart.factory.model.JUMPSTART_DEFAULT_REGION_NAME", region) def test_model_list_deployment_configs( self, - mock_model_deploy: mock.Mock, mock_get_model_specs: mock.Mock, mock_session: mock.Mock, mock_get_manifest: mock.Mock, @@ -1739,13 +1735,12 @@ def test_model_list_deployment_configs( mock_get_instance_rate_per_hour.side_effect = lambda *args, **kwargs: { "name": "Instance Rate", "unit": "USD/Hrs", - "value": "0.0083000000", + "value": "3.76", } mock_get_model_specs.side_effect = get_prototype_spec_with_configs mock_get_manifest.side_effect = ( lambda region, model_type, *args, **kwargs: get_prototype_manifest(region, model_type) ) - mock_model_deploy.return_value = default_predictor mock_session.return_value = sagemaker_session @@ -1756,19 +1751,15 @@ def test_model_list_deployment_configs( self.assertEqual(configs, get_base_deployment_configs()) @mock.patch("sagemaker.jumpstart.utils.verify_model_region_and_return_specs") - @mock.patch("sagemaker.jumpstart.model.get_instance_rate_per_hour") @mock.patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor._get_manifest") @mock.patch("sagemaker.jumpstart.factory.model.Session") @mock.patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor.get_model_specs") - @mock.patch("sagemaker.jumpstart.model.Model.deploy") @mock.patch("sagemaker.jumpstart.factory.model.JUMPSTART_DEFAULT_REGION_NAME", region) def test_model_list_deployment_configs_empty( self, - mock_model_deploy: mock.Mock, mock_get_model_specs: mock.Mock, mock_session: mock.Mock, mock_get_manifest: mock.Mock, - mock_get_instance_rate_per_hour: mock.Mock, mock_verify_model_region_and_return_specs: mock.Mock, ): model_id, _ = "pytorch-eqa-bert-base-cased", "*" @@ -1776,16 +1767,10 @@ def test_model_list_deployment_configs_empty( mock_verify_model_region_and_return_specs.side_effect = ( lambda *args, **kwargs: get_special_model_spec(model_id="gemma-model") ) - mock_get_instance_rate_per_hour.side_effect = lambda *args, **kwargs: { - "name": "Instance Rate", - "unit": "USD/Hrs", - "value": "0.0083000000", - } mock_get_model_specs.side_effect = get_prototype_spec_with_configs mock_get_manifest.side_effect = ( lambda region, model_type, *args, **kwargs: get_prototype_manifest(region, model_type) ) - mock_model_deploy.return_value = default_predictor mock_session.return_value = sagemaker_session @@ -1821,7 +1806,7 @@ def test_model_retrieve_deployment_config( mock_get_instance_rate_per_hour.side_effect = lambda *args, **kwargs: { "name": "Instance Rate", "unit": "USD/Hrs", - "value": "0.0083000000", + "value": "0.0083", } mock_get_model_specs.side_effect = get_prototype_spec_with_configs mock_get_manifest.side_effect = ( @@ -1829,7 +1814,7 @@ def test_model_retrieve_deployment_config( ) mock_model_deploy.return_value = default_predictor - expected = get_base_deployment_configs()[0] + expected = get_base_deployment_configs(True)[0] config_name = expected.get("DeploymentConfigName") instance_type = expected.get("InstanceType") mock_get_init_kwargs.side_effect = lambda *args, **kwargs: get_mock_init_kwargs( @@ -1853,11 +1838,9 @@ def test_model_retrieve_deployment_config( @mock.patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor._get_manifest") @mock.patch("sagemaker.jumpstart.factory.model.Session") @mock.patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor.get_model_specs") - @mock.patch("sagemaker.jumpstart.model.Model.deploy") @mock.patch("sagemaker.jumpstart.factory.model.JUMPSTART_DEFAULT_REGION_NAME", region) def test_model_display_benchmark_metrics( self, - mock_model_deploy: mock.Mock, mock_get_model_specs: mock.Mock, mock_session: mock.Mock, mock_get_manifest: mock.Mock, @@ -1874,13 +1857,12 @@ def test_model_display_benchmark_metrics( mock_get_instance_rate_per_hour.side_effect = lambda *args, **kwargs: { "name": "Instance Rate", "unit": "USD/Hrs", - "value": "0.0083000000", + "value": "0.0083", } mock_get_model_specs.side_effect = get_prototype_spec_with_configs mock_get_manifest.side_effect = ( lambda region, model_type, *args, **kwargs: get_prototype_manifest(region, model_type) ) - mock_model_deploy.return_value = default_predictor mock_session.return_value = sagemaker_session @@ -1888,48 +1870,48 @@ def test_model_display_benchmark_metrics( model.display_benchmark_metrics() - @mock.patch("sagemaker.jumpstart.model.get_init_kwargs") - @mock.patch("sagemaker.jumpstart.utils.verify_model_region_and_return_specs") - @mock.patch("sagemaker.jumpstart.model.get_instance_rate_per_hour") - @mock.patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor._get_manifest") - @mock.patch("sagemaker.jumpstart.factory.model.Session") - @mock.patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor.get_model_specs") - @mock.patch("sagemaker.jumpstart.model.Model.deploy") - @mock.patch("sagemaker.jumpstart.factory.model.JUMPSTART_DEFAULT_REGION_NAME", region) - def test_model_benchmark_metrics( - self, - mock_model_deploy: mock.Mock, - mock_get_model_specs: mock.Mock, - mock_session: mock.Mock, - mock_get_manifest: mock.Mock, - mock_get_instance_rate_per_hour: mock.Mock, - mock_verify_model_region_and_return_specs: mock.Mock, - mock_get_init_kwargs: mock.Mock, - ): - model_id, _ = "pytorch-eqa-bert-base-cased", "*" - - mock_get_init_kwargs.side_effect = lambda *args, **kwargs: get_mock_init_kwargs(model_id) - mock_verify_model_region_and_return_specs.side_effect = ( - lambda *args, **kwargs: get_base_spec_with_prototype_configs() - ) - mock_get_instance_rate_per_hour.side_effect = lambda *args, **kwargs: { - "name": "Instance Rate", - "unit": "USD/Hrs", - "value": "0.0083000000", - } - mock_get_model_specs.side_effect = get_prototype_spec_with_configs - mock_get_manifest.side_effect = ( - lambda region, model_type, *args, **kwargs: get_prototype_manifest(region, model_type) - ) - mock_model_deploy.return_value = default_predictor - - mock_session.return_value = sagemaker_session - - model = JumpStartModel(model_id=model_id) - - df = model.benchmark_metrics - - self.assertTrue(isinstance(df, pd.DataFrame)) + # @mock.patch("sagemaker.jumpstart.model.get_init_kwargs") + # @mock.patch("sagemaker.jumpstart.utils.verify_model_region_and_return_specs") + # @mock.patch("sagemaker.jumpstart.model.get_instance_rate_per_hour") + # @mock.patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor._get_manifest") + # @mock.patch("sagemaker.jumpstart.factory.model.Session") + # @mock.patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor.get_model_specs") + # @mock.patch("sagemaker.jumpstart.model.Model.deploy") + # @mock.patch("sagemaker.jumpstart.factory.model.JUMPSTART_DEFAULT_REGION_NAME", region) + # def test_model_benchmark_metrics( + # self, + # mock_model_deploy: mock.Mock, + # mock_get_model_specs: mock.Mock, + # mock_session: mock.Mock, + # mock_get_manifest: mock.Mock, + # mock_get_instance_rate_per_hour: mock.Mock, + # mock_verify_model_region_and_return_specs: mock.Mock, + # mock_get_init_kwargs: mock.Mock, + # ): + # model_id, _ = "pytorch-eqa-bert-base-cased", "*" + # + # mock_get_init_kwargs.side_effect = lambda *args, **kwargs: get_mock_init_kwargs(model_id) + # mock_verify_model_region_and_return_specs.side_effect = ( + # lambda *args, **kwargs: get_base_spec_with_prototype_configs() + # ) + # mock_get_instance_rate_per_hour.side_effect = lambda *args, **kwargs: { + # "name": "Instance Rate", + # "unit": "USD/Hrs", + # "value": "0.0083000000", + # } + # mock_get_model_specs.side_effect = get_prototype_spec_with_configs + # mock_get_manifest.side_effect = ( + # lambda region, model_type, *args, **kwargs: get_prototype_manifest(region, model_type) + # ) + # mock_model_deploy.return_value = default_predictor + # + # mock_session.return_value = sagemaker_session + # + # model = JumpStartModel(model_id=model_id) + # + # df = model.benchmark_metrics + # + # self.assertTrue(isinstance(df, pd.DataFrame)) def test_jumpstart_model_requires_model_id(): diff --git a/tests/unit/sagemaker/jumpstart/utils.py b/tests/unit/sagemaker/jumpstart/utils.py index 672a310fc0..9e1f9f5e14 100644 --- a/tests/unit/sagemaker/jumpstart/utils.py +++ b/tests/unit/sagemaker/jumpstart/utils.py @@ -326,10 +326,6 @@ def overwrite_dictionary( return base_dictionary -def get_base_deployment_configs() -> List[Dict[str, Any]]: - return DEPLOYMENT_CONFIGS - - def get_base_deployment_configs_with_acceleration_configs() -> List[Dict[str, Any]]: configs = copy.deepcopy(DEPLOYMENT_CONFIGS) configs[0]["AccelerationConfigs"] = [ @@ -353,19 +349,25 @@ def get_mock_init_kwargs( ) -def get_base_deployment_configs_metadata() -> List[DeploymentConfigMetadata]: +def get_base_deployment_configs_metadata( + omit_benchmark_metrics: bool = False, +) -> List[DeploymentConfigMetadata]: + specs = ( + get_base_spec_with_prototype_configs_with_missing_benchmarks() + if omit_benchmark_metrics + else get_base_spec_with_prototype_configs() + ) configs = [] - for ( - config_name, - jumpstart_config, - ) in get_base_spec_with_prototype_configs().inference_configs.configs.items(): + for config_name, jumpstart_config in specs.inference_configs.configs.items(): benchmark_metrics = jumpstart_config.benchmark_metrics - for instance_type in benchmark_metrics: - benchmark_metrics[instance_type].append( - JumpStartBenchmarkStat( - {"name": "Instance Rate", "unit": "USD/Hrs", "value": "3.76"} + + if benchmark_metrics: + for instance_type in benchmark_metrics: + benchmark_metrics[instance_type].append( + JumpStartBenchmarkStat( + {"name": "Instance Rate", "unit": "USD/Hrs", "value": "3.76"} + ) ) - ) configs.append( DeploymentConfigMetadata( @@ -381,3 +383,11 @@ def get_base_deployment_configs_metadata() -> List[DeploymentConfigMetadata]: ) ) return configs + + +def get_base_deployment_configs( + omit_benchmark_metrics: bool = False, +) -> List[Dict[str, Any]]: + return [ + config.to_json() for config in get_base_deployment_configs_metadata(omit_benchmark_metrics) + ] diff --git a/tests/unit/sagemaker/serve/builder/test_js_builder.py b/tests/unit/sagemaker/serve/builder/test_js_builder.py index b83b113209..56b01cd9e3 100644 --- a/tests/unit/sagemaker/serve/builder/test_js_builder.py +++ b/tests/unit/sagemaker/serve/builder/test_js_builder.py @@ -752,9 +752,11 @@ def test_set_deployment_config( mock_pre_trained_model.return_value.image_uri = mock_tgi_image_uri builder.build() - builder.set_deployment_config("config-1") + builder.set_deployment_config("config-1", "ml.g5.24xlarge") - mock_pre_trained_model.return_value.set_deployment_config.assert_called_with("config-1") + mock_pre_trained_model.return_value.set_deployment_config.assert_called_with( + "config-1", "ml.g5.24xlarge" + ) @patch("sagemaker.serve.builder.jumpstart_builder._capture_telemetry", side_effect=None) @patch( @@ -789,7 +791,7 @@ def test_set_deployment_config_ex( "Cannot set deployment config to an uninitialized model.", lambda: ModelBuilder( model="facebook/galactica-mock-model-id", schema_builder=mock_schema_builder - ).set_deployment_config("config-2"), + ).set_deployment_config("config-2", "ml.g5.24xlarge"), ) @patch("sagemaker.serve.builder.jumpstart_builder._capture_telemetry", side_effect=None) diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 41f2b1d964..32444aca60 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -1977,39 +1977,30 @@ def test_get_instance_rate_per_hour( assert instance_rate == expected -@patch("sagemaker.utils.logger") -@patch("boto3.client") -def test_get_instance_rate_per_hour_client_ex(mock_client, mock_logger): - err_msg = ( - "User: arn:aws:sts::123456789123:assumed-role/AmazonSageMaker-ExecutionRole-20230707T131628/SageMaker " - "is not authorized to perform: pricing:GetProducts because no identity-based policy allows the " - "pricing:GetProducts action" - ) - mock_client.return_value.get_products.side_effect = ClientError( - {"Error": {"Message": err_msg, "Code": "AccessDeniedException"}}, - "GetProducts", - ) - - instance_rate = get_instance_rate_per_hour(instance_type="ml.t4g.nano", region="us-west-2") - - mock_logger.warning.assert_called_with( - "Instance rate metrics will be omitted. Reason: %s", err_msg - ) - assert instance_rate is None - - -@patch("sagemaker.utils.logger") -@patch("boto3.client") -def test_get_instance_rate_per_hour_ex(mock_client, mock_logger): - mock_client.return_value.get_products.side_effect = Exception() +# @patch("sagemaker.utils.logger") +# @patch("boto3.client") +# def test_get_instance_rate_per_hour_client_ex(mock_client, mock_logger): +# err_msg = ( +# "User: arn:aws:sts::123456789123:assumed-role/AmazonSageMaker-ExecutionRole-20230707T131628/SageMaker " +# "is not authorized to perform: pricing:GetProducts because no identity-based policy allows the " +# "pricing:GetProducts action" +# ) +# mock_client.return_value.get_products.side_effect = ClientError( +# {"Error": {"Message": err_msg, "Code": "AccessDeniedException"}}, +# "GetProducts", +# ) +# +# instance_rate = get_instance_rate_per_hour(instance_type="ml.t4g.nano", region="us-west-2") +# +# mock_logger.warning.assert_called_with( +# "Instance rate metrics will be omitted. Reason: %s", err_msg +# ) +# assert instance_rate is None - instance_rate = get_instance_rate_per_hour(instance_type="ml.t4g.nano", region="us-west-2") - mock_logger.warning.assert_called_with( - "Instance rate metrics will be omitted. Reason: %s", - "Something went wrong while getting instance rates.", - ) - assert instance_rate is None +def test_get_instance_rate_per_hour_ex(): + with pytest.raises(Exception): + get_instance_rate_per_hour(instance_type="ml.t4g.nano", region="us-west-2") @pytest.mark.parametrize( From 57a8898d53193b96c395b19dcc4a141259db2f22 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Sat, 27 Apr 2024 13:02:43 -0700 Subject: [PATCH 15/19] Refactoring and NB testing --- src/sagemaker/jumpstart/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sagemaker/jumpstart/utils.py b/src/sagemaker/jumpstart/utils.py index 1025931d73..7f1d35e552 100644 --- a/src/sagemaker/jumpstart/utils.py +++ b/src/sagemaker/jumpstart/utils.py @@ -45,7 +45,7 @@ ) from sagemaker.session import Session from sagemaker.config import load_sagemaker_config -from sagemaker.utils import resolve_value_from_config, TagsDict +from sagemaker.utils import resolve_value_from_config, TagsDict, get_instance_rate_per_hour from sagemaker.workflow import is_pipeline_variable From 722c2db06f727ffbd1693ff03f7f53d26a8ff872 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Sat, 27 Apr 2024 15:15:15 -0700 Subject: [PATCH 16/19] Testing with NB --- src/sagemaker/jumpstart/model.py | 51 ++------ src/sagemaker/jumpstart/utils.py | 61 +++++++++ src/sagemaker/utils.py | 5 +- .../sagemaker/jumpstart/model/test_model.py | 122 +++++++++--------- tests/unit/sagemaker/jumpstart/test_utils.py | 93 +++++++++++++ tests/unit/sagemaker/jumpstart/utils.py | 13 ++ tests/unit/test_utils.py | 31 +---- 7 files changed, 245 insertions(+), 131 deletions(-) diff --git a/src/sagemaker/jumpstart/model.py b/src/sagemaker/jumpstart/model.py index f85984950b..619af2f7a9 100644 --- a/src/sagemaker/jumpstart/model.py +++ b/src/sagemaker/jumpstart/model.py @@ -41,17 +41,17 @@ from sagemaker.jumpstart.types import ( JumpStartSerializablePayload, DeploymentConfigMetadata, - JumpStartBenchmarkStat, ) from sagemaker.jumpstart.utils import ( validate_model_id_and_get_type, verify_model_region_and_return_specs, get_jumpstart_configs, get_metrics_from_deployment_configs, + add_instance_rate_stats_to_benchmark_metrics, ) from sagemaker.jumpstart.constants import JUMPSTART_LOGGER from sagemaker.jumpstart.enums import JumpStartModelType -from sagemaker.utils import stringify_object, format_tags, Tags, get_instance_rate_per_hour +from sagemaker.utils import stringify_object, format_tags, Tags from sagemaker.model import ( Model, ModelPackage, @@ -923,41 +923,14 @@ def _get_deployment_configs( if self._metadata_configs is None: return deployment_configs - should_fetch_instance_rate_metric_stat = True - err_message = None - + err = None for config_name, metadata_config in self._metadata_configs.items(): - final_benchmark_metrics = None - - if metadata_config.benchmark_metrics is not None: - final_benchmark_metrics = {} - for instance_type, benchmark_metrics in metadata_config.benchmark_metrics.items(): - if not instance_type.startswith("ml."): - instance_type = f"ml.{instance_type}" - - if benchmark_metrics is None: - should_fetch_instance_rate_metric_stat = True - else: - for benchmark_metric_stat in benchmark_metrics: - if benchmark_metric_stat.name.lower() == "instance rate": - should_fetch_instance_rate_metric_stat = False - break - - if should_fetch_instance_rate_metric_stat: - try: - instance_type_rate = get_instance_rate_per_hour( - instance_type=instance_type, region=self.region - ) - - benchmark_metrics.append(JumpStartBenchmarkStat(instance_type_rate)) - final_benchmark_metrics[instance_type] = benchmark_metrics - except ClientError as e: - err_message = e.response["Error"]["Message"] - should_fetch_instance_rate_metric_stat = False - except Exception: # pylint: disable=W0703 - should_fetch_instance_rate_metric_stat = True - else: - final_benchmark_metrics[instance_type] = benchmark_metrics + if err is None or "is not authorized to perform: pricing:GetProducts" not in err: + err, metadata_config.benchmark_metrics = ( + add_instance_rate_stats_to_benchmark_metrics( + self.region, metadata_config.benchmark_metrics + ) + ) resolved_config = metadata_config.resolved_config if selected_config_name == config_name: @@ -977,16 +950,16 @@ def _get_deployment_configs( ) deployment_config_metadata = DeploymentConfigMetadata( config_name, - final_benchmark_metrics, + metadata_config.benchmark_metrics, resolved_config, init_kwargs, deploy_kwargs, ) deployment_configs.append(deployment_config_metadata) - if err_message is not None: + if err is not None and "is not authorized to perform: pricing:GetProducts" in err: error_message = "Instance rate metrics will be omitted. Reason: %s" - JUMPSTART_LOGGER.warning(error_message, err_message) + JUMPSTART_LOGGER.warning(error_message, err) return deployment_configs diff --git a/src/sagemaker/jumpstart/utils.py b/src/sagemaker/jumpstart/utils.py index 7f1d35e552..0221007afa 100644 --- a/src/sagemaker/jumpstart/utils.py +++ b/src/sagemaker/jumpstart/utils.py @@ -17,6 +17,7 @@ from typing import Any, Dict, List, Set, Optional, Tuple, Union from urllib.parse import urlparse import boto3 +from botocore.exceptions import ClientError from packaging.version import Version import sagemaker from sagemaker.config.config_schema import ( @@ -1031,6 +1032,66 @@ def get_jumpstart_configs( ) +def add_instance_rate_stats_to_benchmark_metrics( + region: str, + benchmark_metrics: Optional[Dict[str, List[JumpStartBenchmarkStat]]], +) -> Optional[Tuple[str, Dict[str, List[JumpStartBenchmarkStat]]]]: + """Adds instance types metric stats to the given benchmark_metrics dict. + + Args: + region (str): AWS region. + benchmark_metrics (Dict[str, List[JumpStartBenchmarkStat]]): + Returns: + Tuple[str, Dict[str, List[JumpStartBenchmarkStat]]]: + Contains Error message and metrics dict. + """ + + if benchmark_metrics is None: + return None + + err_message = None + for instance_type, benchmark_metric_stats in benchmark_metrics.items(): + if not instance_type.startswith("ml."): + instance_type = f"ml.{instance_type}" + + if not has_instance_rate_stat(benchmark_metric_stats): + try: + instance_type_rate = get_instance_rate_per_hour( + instance_type=instance_type, region=region + ) + + benchmark_metric_stats.append(JumpStartBenchmarkStat(instance_type_rate)) + benchmark_metrics[instance_type] = benchmark_metric_stats + + except ClientError as e: + err_message = e.response["Error"]["Message"] + except Exception: # pylint: disable=W0703 + err_message = ( + f"Unable to get instance rate per hour for instance type: {instance_type}." + ) + + return err_message, benchmark_metrics + + +def has_instance_rate_stat(benchmark_metric_stats: Optional[List[JumpStartBenchmarkStat]]) -> bool: + """Determines whether a benchmark metric stats contains instance rate metric stat. + + Args: + benchmark_metric_stats (Optional[List[JumpStartBenchmarkStat]]): + List of benchmark metric stats. + Returns: + bool: Whether the benchmark metric stats contains instance rate metric stat. + """ + if benchmark_metric_stats is None: + return False + + for benchmark_metric_stat in benchmark_metric_stats: + if benchmark_metric_stat.name.lower() == "instance rate": + return True + + return False + + def get_metrics_from_deployment_configs( deployment_configs: List[DeploymentConfigMetadata], ) -> Dict[str, List[str]]: diff --git a/src/sagemaker/utils.py b/src/sagemaker/utils.py index 68e6b9543a..6c9e1b4b16 100644 --- a/src/sagemaker/utils.py +++ b/src/sagemaker/utils.py @@ -1673,6 +1673,7 @@ def get_instance_rate_per_hour( Returns: Optional[Dict[str, str]]: Instance rate per hour. Example: {'name': 'Instance Rate', 'unit': 'USD/Hrs', 'value': '1.125'}. + Raises: Exception: An exception is raised if the IAM role is not authorized to perform pricing:GetProducts. @@ -1699,8 +1700,10 @@ def get_instance_rate_per_hour( price_data = price_list[0] if isinstance(price_data, str): price_data = json.loads(price_data) - return extract_instance_rate_per_hour(price_data) + instance_rate_per_hour = extract_instance_rate_per_hour(price_data) + if instance_rate_per_hour is not None: + return instance_rate_per_hour raise Exception(f"Unable to get instance rate per hour for instance type: {instance_type}.") diff --git a/tests/unit/sagemaker/jumpstart/model/test_model.py b/tests/unit/sagemaker/jumpstart/model/test_model.py index cfad1dc48f..cd11d950d5 100644 --- a/tests/unit/sagemaker/jumpstart/model/test_model.py +++ b/tests/unit/sagemaker/jumpstart/model/test_model.py @@ -15,6 +15,8 @@ from typing import Optional, Set from unittest import mock import unittest + +import pandas as pd from mock import MagicMock, Mock import pytest from sagemaker.async_inference.async_inference_config import AsyncInferenceConfig @@ -51,6 +53,7 @@ get_mock_init_kwargs, get_base_deployment_configs, get_base_spec_with_prototype_configs_with_missing_benchmarks, + append_instance_stat_metrics, ) import boto3 @@ -1712,7 +1715,7 @@ def test_model_set_deployment_config_incompatible_instance_type_or_name( @mock.patch("sagemaker.jumpstart.model.get_init_kwargs") @mock.patch("sagemaker.jumpstart.utils.verify_model_region_and_return_specs") - @mock.patch("sagemaker.jumpstart.model.get_instance_rate_per_hour") + @mock.patch("sagemaker.jumpstart.model.add_instance_rate_stats_to_benchmark_metrics") @mock.patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor._get_manifest") @mock.patch("sagemaker.jumpstart.factory.model.Session") @mock.patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor.get_model_specs") @@ -1722,7 +1725,7 @@ def test_model_list_deployment_configs( mock_get_model_specs: mock.Mock, mock_session: mock.Mock, mock_get_manifest: mock.Mock, - mock_get_instance_rate_per_hour: mock.Mock, + mock_add_instance_rate_stats_to_benchmark_metrics: mock.Mock, mock_verify_model_region_and_return_specs: mock.Mock, mock_get_init_kwargs: mock.Mock, ): @@ -1732,11 +1735,10 @@ def test_model_list_deployment_configs( mock_verify_model_region_and_return_specs.side_effect = ( lambda *args, **kwargs: get_base_spec_with_prototype_configs() ) - mock_get_instance_rate_per_hour.side_effect = lambda *args, **kwargs: { - "name": "Instance Rate", - "unit": "USD/Hrs", - "value": "3.76", - } + mock_add_instance_rate_stats_to_benchmark_metrics.side_effect = lambda region, metrics: ( + None, + append_instance_stat_metrics(metrics), + ) mock_get_model_specs.side_effect = get_prototype_spec_with_configs mock_get_manifest.side_effect = ( lambda region, model_type, *args, **kwargs: get_prototype_manifest(region, model_type) @@ -1782,7 +1784,7 @@ def test_model_list_deployment_configs_empty( @mock.patch("sagemaker.jumpstart.model.get_init_kwargs") @mock.patch("sagemaker.jumpstart.utils.verify_model_region_and_return_specs") - @mock.patch("sagemaker.jumpstart.model.get_instance_rate_per_hour") + @mock.patch("sagemaker.jumpstart.model.add_instance_rate_stats_to_benchmark_metrics") @mock.patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor._get_manifest") @mock.patch("sagemaker.jumpstart.factory.model.Session") @mock.patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor.get_model_specs") @@ -1794,7 +1796,7 @@ def test_model_retrieve_deployment_config( mock_get_model_specs: mock.Mock, mock_session: mock.Mock, mock_get_manifest: mock.Mock, - mock_get_instance_rate_per_hour: mock.Mock, + mock_add_instance_rate_stats_to_benchmark_metrics: mock.Mock, mock_verify_model_region_and_return_specs: mock.Mock, mock_get_init_kwargs: mock.Mock, ): @@ -1803,11 +1805,10 @@ def test_model_retrieve_deployment_config( mock_verify_model_region_and_return_specs.side_effect = ( lambda *args, **kwargs: get_base_spec_with_prototype_configs_with_missing_benchmarks() ) - mock_get_instance_rate_per_hour.side_effect = lambda *args, **kwargs: { - "name": "Instance Rate", - "unit": "USD/Hrs", - "value": "0.0083", - } + mock_add_instance_rate_stats_to_benchmark_metrics.side_effect = lambda region, metrics: ( + None, + append_instance_stat_metrics(metrics), + ) mock_get_model_specs.side_effect = get_prototype_spec_with_configs mock_get_manifest.side_effect = ( lambda region, model_type, *args, **kwargs: get_prototype_manifest(region, model_type) @@ -1834,7 +1835,7 @@ def test_model_retrieve_deployment_config( @mock.patch("sagemaker.jumpstart.model.get_init_kwargs") @mock.patch("sagemaker.jumpstart.utils.verify_model_region_and_return_specs") - @mock.patch("sagemaker.jumpstart.model.get_instance_rate_per_hour") + @mock.patch("sagemaker.jumpstart.model.add_instance_rate_stats_to_benchmark_metrics") @mock.patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor._get_manifest") @mock.patch("sagemaker.jumpstart.factory.model.Session") @mock.patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor.get_model_specs") @@ -1844,7 +1845,7 @@ def test_model_display_benchmark_metrics( mock_get_model_specs: mock.Mock, mock_session: mock.Mock, mock_get_manifest: mock.Mock, - mock_get_instance_rate_per_hour: mock.Mock, + mock_add_instance_rate_stats_to_benchmark_metrics: mock.Mock, mock_verify_model_region_and_return_specs: mock.Mock, mock_get_init_kwargs: mock.Mock, ): @@ -1854,11 +1855,10 @@ def test_model_display_benchmark_metrics( mock_verify_model_region_and_return_specs.side_effect = ( lambda *args, **kwargs: get_base_spec_with_prototype_configs_with_missing_benchmarks() ) - mock_get_instance_rate_per_hour.side_effect = lambda *args, **kwargs: { - "name": "Instance Rate", - "unit": "USD/Hrs", - "value": "0.0083", - } + mock_add_instance_rate_stats_to_benchmark_metrics.side_effect = lambda region, metrics: ( + None, + append_instance_stat_metrics(metrics), + ) mock_get_model_specs.side_effect = get_prototype_spec_with_configs mock_get_manifest.side_effect = ( lambda region, model_type, *args, **kwargs: get_prototype_manifest(region, model_type) @@ -1870,48 +1870,44 @@ def test_model_display_benchmark_metrics( model.display_benchmark_metrics() - # @mock.patch("sagemaker.jumpstart.model.get_init_kwargs") - # @mock.patch("sagemaker.jumpstart.utils.verify_model_region_and_return_specs") - # @mock.patch("sagemaker.jumpstart.model.get_instance_rate_per_hour") - # @mock.patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor._get_manifest") - # @mock.patch("sagemaker.jumpstart.factory.model.Session") - # @mock.patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor.get_model_specs") - # @mock.patch("sagemaker.jumpstart.model.Model.deploy") - # @mock.patch("sagemaker.jumpstart.factory.model.JUMPSTART_DEFAULT_REGION_NAME", region) - # def test_model_benchmark_metrics( - # self, - # mock_model_deploy: mock.Mock, - # mock_get_model_specs: mock.Mock, - # mock_session: mock.Mock, - # mock_get_manifest: mock.Mock, - # mock_get_instance_rate_per_hour: mock.Mock, - # mock_verify_model_region_and_return_specs: mock.Mock, - # mock_get_init_kwargs: mock.Mock, - # ): - # model_id, _ = "pytorch-eqa-bert-base-cased", "*" - # - # mock_get_init_kwargs.side_effect = lambda *args, **kwargs: get_mock_init_kwargs(model_id) - # mock_verify_model_region_and_return_specs.side_effect = ( - # lambda *args, **kwargs: get_base_spec_with_prototype_configs() - # ) - # mock_get_instance_rate_per_hour.side_effect = lambda *args, **kwargs: { - # "name": "Instance Rate", - # "unit": "USD/Hrs", - # "value": "0.0083000000", - # } - # mock_get_model_specs.side_effect = get_prototype_spec_with_configs - # mock_get_manifest.side_effect = ( - # lambda region, model_type, *args, **kwargs: get_prototype_manifest(region, model_type) - # ) - # mock_model_deploy.return_value = default_predictor - # - # mock_session.return_value = sagemaker_session - # - # model = JumpStartModel(model_id=model_id) - # - # df = model.benchmark_metrics - # - # self.assertTrue(isinstance(df, pd.DataFrame)) + @mock.patch("sagemaker.jumpstart.model.get_init_kwargs") + @mock.patch("sagemaker.jumpstart.utils.verify_model_region_and_return_specs") + @mock.patch("sagemaker.jumpstart.model.add_instance_rate_stats_to_benchmark_metrics") + @mock.patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor._get_manifest") + @mock.patch("sagemaker.jumpstart.factory.model.Session") + @mock.patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor.get_model_specs") + @mock.patch("sagemaker.jumpstart.factory.model.JUMPSTART_DEFAULT_REGION_NAME", region) + def test_model_benchmark_metrics( + self, + mock_get_model_specs: mock.Mock, + mock_session: mock.Mock, + mock_get_manifest: mock.Mock, + mock_add_instance_rate_stats_to_benchmark_metrics: mock.Mock, + mock_verify_model_region_and_return_specs: mock.Mock, + mock_get_init_kwargs: mock.Mock, + ): + model_id, _ = "pytorch-eqa-bert-base-cased", "*" + + mock_get_init_kwargs.side_effect = lambda *args, **kwargs: get_mock_init_kwargs(model_id) + mock_verify_model_region_and_return_specs.side_effect = ( + lambda *args, **kwargs: get_base_spec_with_prototype_configs() + ) + mock_add_instance_rate_stats_to_benchmark_metrics.side_effect = lambda region, metrics: ( + None, + append_instance_stat_metrics(metrics), + ) + mock_get_model_specs.side_effect = get_prototype_spec_with_configs + mock_get_manifest.side_effect = ( + lambda region, model_type, *args, **kwargs: get_prototype_manifest(region, model_type) + ) + + mock_session.return_value = sagemaker_session + + model = JumpStartModel(model_id=model_id) + + df = model.benchmark_metrics + + self.assertTrue(isinstance(df, pd.DataFrame)) def test_jumpstart_model_requires_model_id(): diff --git a/tests/unit/sagemaker/jumpstart/test_utils.py b/tests/unit/sagemaker/jumpstart/test_utils.py index 05ecc240e8..f7458a29e9 100644 --- a/tests/unit/sagemaker/jumpstart/test_utils.py +++ b/tests/unit/sagemaker/jumpstart/test_utils.py @@ -13,6 +13,8 @@ from __future__ import absolute_import import os from unittest import TestCase + +from botocore.exceptions import ClientError from mock.mock import Mock, patch import pytest import boto3 @@ -1771,3 +1773,94 @@ def test_extract_metrics_from_deployment_configs(): for key in data: assert len(data[key]) == (len(configs) - 2) + + +@patch("sagemaker.jumpstart.utils.get_instance_rate_per_hour") +def test_add_instance_rate_stats_to_benchmark_metrics( + mock_get_instance_rate_per_hour, +): + mock_get_instance_rate_per_hour.side_effect = lambda *args, **kwargs: { + "name": "Instance Rate", + "unit": "USD/Hrs", + "value": "3.76", + } + + err, out = utils.add_instance_rate_stats_to_benchmark_metrics( + "us-west-2", + { + "ml.p2.xlarge": [ + JumpStartBenchmarkStat({"name": "Latency", "value": "100", "unit": "Tokens/S"}) + ], + "ml.gd4.xlarge": [ + JumpStartBenchmarkStat({"name": "Latency", "value": "100", "unit": "Tokens/S"}) + ], + }, + ) + + assert err is None + for key in out: + assert len(out[key]) == 2 + for metric in out[key]: + if metric.name == "Instance Rate": + assert metric.to_json() == { + "name": "Instance Rate", + "unit": "USD/Hrs", + "value": "3.76", + } + + +@patch("sagemaker.jumpstart.utils.get_instance_rate_per_hour") +def test_add_instance_rate_stats_to_benchmark_metrics_client_ex( + mock_get_instance_rate_per_hour, +): + mock_get_instance_rate_per_hour.side_effect = ClientError( + {"Error": {"Message": "is not authorized to perform: pricing:GetProducts"}}, "GetProducts" + ) + + err, out = utils.add_instance_rate_stats_to_benchmark_metrics( + "us-west-2", + { + "ml.p2.xlarge": [ + JumpStartBenchmarkStat({"name": "Latency", "value": "100", "unit": "Tokens/S"}) + ], + }, + ) + + assert err == "is not authorized to perform: pricing:GetProducts" + for key in out: + assert len(out[key]) == 1 + + +@patch("sagemaker.jumpstart.utils.get_instance_rate_per_hour") +def test_add_instance_rate_stats_to_benchmark_metrics_ex( + mock_get_instance_rate_per_hour, +): + mock_get_instance_rate_per_hour.side_effect = Exception() + + err, out = utils.add_instance_rate_stats_to_benchmark_metrics( + "us-west-2", + { + "ml.p2.xlarge": [ + JumpStartBenchmarkStat({"name": "Latency", "value": "100", "unit": "Tokens/S"}) + ], + }, + ) + + assert err == "Unable to get instance rate per hour for instance type: ml.p2.xlarge." + for key in out: + assert len(out[key]) == 1 + + +@pytest.mark.parametrize( + "stats, expected", + [ + (None, False), + ( + [JumpStartBenchmarkStat({"name": "Instance Rate", "unit": "USD/Hrs", "value": "3.76"})], + True, + ), + ([JumpStartBenchmarkStat({"name": "Latency", "value": "100", "unit": "Tokens/S"})], False), + ], +) +def test_has_instance_rate_stat(stats, expected): + assert utils.has_instance_rate_stat(stats) is expected diff --git a/tests/unit/sagemaker/jumpstart/utils.py b/tests/unit/sagemaker/jumpstart/utils.py index 9e1f9f5e14..e8a93dff6c 100644 --- a/tests/unit/sagemaker/jumpstart/utils.py +++ b/tests/unit/sagemaker/jumpstart/utils.py @@ -391,3 +391,16 @@ def get_base_deployment_configs( return [ config.to_json() for config in get_base_deployment_configs_metadata(omit_benchmark_metrics) ] + + +def append_instance_stat_metrics( + metrics: Dict[str, List[JumpStartBenchmarkStat]] +) -> Dict[str, List[JumpStartBenchmarkStat]]: + if metrics is not None: + for key in metrics: + metrics[key].append( + JumpStartBenchmarkStat( + {"name": "Instance Rate", "value": "3.76", "unit": "USD/Hrs"} + ) + ) + return metrics diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 32444aca60..ccb7a5a1c4 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -27,7 +27,6 @@ from boto3 import exceptions import botocore import pytest -from botocore.exceptions import ClientError from mock import call, patch, Mock, MagicMock, PropertyMock import sagemaker @@ -1956,12 +1955,6 @@ def test_deep_override_skip_keys(self): }, {"name": "Instance Rate", "unit": "USD/Hrs", "value": "0.008"}, ), - ( - "ml.t4g.nano", - "cn-north-1", - {"PriceList": []}, - None, - ), ], ) @patch("boto3.client") @@ -1977,28 +1970,10 @@ def test_get_instance_rate_per_hour( assert instance_rate == expected -# @patch("sagemaker.utils.logger") -# @patch("boto3.client") -# def test_get_instance_rate_per_hour_client_ex(mock_client, mock_logger): -# err_msg = ( -# "User: arn:aws:sts::123456789123:assumed-role/AmazonSageMaker-ExecutionRole-20230707T131628/SageMaker " -# "is not authorized to perform: pricing:GetProducts because no identity-based policy allows the " -# "pricing:GetProducts action" -# ) -# mock_client.return_value.get_products.side_effect = ClientError( -# {"Error": {"Message": err_msg, "Code": "AccessDeniedException"}}, -# "GetProducts", -# ) -# -# instance_rate = get_instance_rate_per_hour(instance_type="ml.t4g.nano", region="us-west-2") -# -# mock_logger.warning.assert_called_with( -# "Instance rate metrics will be omitted. Reason: %s", err_msg -# ) -# assert instance_rate is None - +@patch("boto3.client") +def test_get_instance_rate_per_hour_ex(mock_client): + mock_client.return_value.get_products.side_effect = lambda *args, **kwargs: {"PriceList": []} -def test_get_instance_rate_per_hour_ex(): with pytest.raises(Exception): get_instance_rate_per_hour(instance_type="ml.t4g.nano", region="us-west-2") From 561565d608d1db838990cc28512079833fd56477 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Sat, 27 Apr 2024 15:40:35 -0700 Subject: [PATCH 17/19] Refactoring --- src/sagemaker/jumpstart/utils.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/sagemaker/jumpstart/utils.py b/src/sagemaker/jumpstart/utils.py index 0221007afa..e4486efa31 100644 --- a/src/sagemaker/jumpstart/utils.py +++ b/src/sagemaker/jumpstart/utils.py @@ -1051,13 +1051,14 @@ def add_instance_rate_stats_to_benchmark_metrics( err_message = None for instance_type, benchmark_metric_stats in benchmark_metrics.items(): - if not instance_type.startswith("ml."): - instance_type = f"ml.{instance_type}" + ml_instance_type = ( + instance_type if instance_type.startswith("ml.") else f"ml.{instance_type}" + ) if not has_instance_rate_stat(benchmark_metric_stats): try: instance_type_rate = get_instance_rate_per_hour( - instance_type=instance_type, region=region + instance_type=ml_instance_type, region=region ) benchmark_metric_stats.append(JumpStartBenchmarkStat(instance_type_rate)) From 7c7f51db7d310fd2eff507600bc1210978757ec6 Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Sat, 27 Apr 2024 16:01:38 -0700 Subject: [PATCH 18/19] Prefix instance type with ml --- src/sagemaker/jumpstart/utils.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/sagemaker/jumpstart/utils.py b/src/sagemaker/jumpstart/utils.py index e4486efa31..a8c4bd7c21 100644 --- a/src/sagemaker/jumpstart/utils.py +++ b/src/sagemaker/jumpstart/utils.py @@ -1049,29 +1049,31 @@ def add_instance_rate_stats_to_benchmark_metrics( if benchmark_metrics is None: return None + final_benchmark_metrics = {} + err_message = None for instance_type, benchmark_metric_stats in benchmark_metrics.items(): - ml_instance_type = ( - instance_type if instance_type.startswith("ml.") else f"ml.{instance_type}" - ) + instance_type = instance_type if instance_type.startswith("ml.") else f"ml.{instance_type}" if not has_instance_rate_stat(benchmark_metric_stats): try: instance_type_rate = get_instance_rate_per_hour( - instance_type=ml_instance_type, region=region + instance_type=instance_type, region=region ) benchmark_metric_stats.append(JumpStartBenchmarkStat(instance_type_rate)) - benchmark_metrics[instance_type] = benchmark_metric_stats + final_benchmark_metrics[instance_type] = benchmark_metric_stats except ClientError as e: + final_benchmark_metrics[instance_type] = benchmark_metric_stats err_message = e.response["Error"]["Message"] except Exception: # pylint: disable=W0703 + final_benchmark_metrics[instance_type] = benchmark_metric_stats err_message = ( f"Unable to get instance rate per hour for instance type: {instance_type}." ) - return err_message, benchmark_metrics + return err_message, final_benchmark_metrics def has_instance_rate_stat(benchmark_metric_stats: Optional[List[JumpStartBenchmarkStat]]) -> bool: From dbf0279d7cdd0fa1710094e5c90df8787c8d9d5d Mon Sep 17 00:00:00 2001 From: Jonathan Makunga Date: Sat, 27 Apr 2024 16:49:09 -0700 Subject: [PATCH 19/19] Fix unit tests --- tests/unit/test_utils.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index ccb7a5a1c4..e94f3087ad 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -1970,14 +1970,6 @@ def test_get_instance_rate_per_hour( assert instance_rate == expected -@patch("boto3.client") -def test_get_instance_rate_per_hour_ex(mock_client): - mock_client.return_value.get_products.side_effect = lambda *args, **kwargs: {"PriceList": []} - - with pytest.raises(Exception): - get_instance_rate_per_hour(instance_type="ml.t4g.nano", region="us-west-2") - - @pytest.mark.parametrize( "price_data, expected_result", [