Skip to content

Commit

Permalink
Add tei cpu image (aws#4695)
Browse files Browse the repository at this point in the history
* Add tei cpu image

* fix format issue

* fix unit tests

* fix typo

* fix typo
  • Loading branch information
haixiw authored and jiapinw committed Jun 25, 2024
1 parent aa59619 commit 279e0e6
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 5 deletions.
7 changes: 7 additions & 0 deletions src/sagemaker/huggingface/llm_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,13 @@ def get_huggingface_llm_image_uri(
version=version,
image_scope="inference",
)
if backend == "huggingface-tei-cpu":
return image_uris.retrieve(
"huggingface-tei-cpu",
region=region,
version=version,
image_scope="inference",
)
if backend == "lmi":
version = version or "0.24.0"
return image_uris.retrieve(framework="djl-deepspeed", region=region, version=version)
Expand Down
59 changes: 59 additions & 0 deletions src/sagemaker/image_uri_config/huggingface-tei-cpu.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
{
"inference": {
"processors": [
"cpu"
],
"version_aliases": {
"1.2": "1.2.3"
},
"versions": {
"1.2.3": {
"py_versions": [
"py310"
],
"registries": {
"af-south-1": "510948584623",
"ap-east-1": "651117190479",
"ap-northeast-1": "354813040037",
"ap-northeast-2": "366743142698",
"ap-northeast-3": "867004704886",
"ap-south-1": "720646828776",
"ap-south-2": "628508329040",
"ap-southeast-1": "121021644041",
"ap-southeast-2": "783357654285",
"ap-southeast-3": "951798379941",
"ap-southeast-4": "106583098589",
"ca-central-1": "341280168497",
"ca-west-1": "190319476487",
"cn-north-1": "450853457545",
"cn-northwest-1": "451049120500",
"eu-central-1": "492215442770",
"eu-central-2": "680994064768",
"eu-north-1": "662702820516",
"eu-south-1": "978288397137",
"eu-south-2": "104374241257",
"eu-west-1": "141502667606",
"eu-west-2": "764974769150",
"eu-west-3": "659782779980",
"il-central-1": "898809789911",
"me-central-1": "272398656194",
"me-south-1": "801668240914",
"sa-east-1": "737474898029",
"us-east-1": "683313688378",
"us-east-2": "257758044811",
"us-gov-east-1": "237065988967",
"us-gov-west-1": "414596584902",
"us-iso-east-1": "833128469047",
"us-isob-east-1": "281123927165",
"us-west-1": "746614075791",
"us-west-2": "246618743249"
},
"tag_prefix": "2.0.1-tei1.2.3",
"repository": "tei-cpu",
"container_version": {
"cpu": "ubuntu22.04"
}
}
}
}
}
6 changes: 4 additions & 2 deletions src/sagemaker/image_uris.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@
ECR_URI_TEMPLATE = "{registry}.dkr.{hostname}/{repository}"
HUGGING_FACE_FRAMEWORK = "huggingface"
HUGGING_FACE_LLM_FRAMEWORK = "huggingface-llm"
HUGGING_FACE_TEI_FRAMEWORK = "huggingface-tei"
HUGGING_FACE_TEI_GPU_FRAMEWORK = "huggingface-tei"
HUGGING_FACE_TEI_CPU_FRAMEWORK = "huggingface-tei-cpu"
HUGGING_FACE_LLM_NEURONX_FRAMEWORK = "huggingface-llm-neuronx"
XGBOOST_FRAMEWORK = "xgboost"
SKLEARN_FRAMEWORK = "sklearn"
Expand Down Expand Up @@ -478,7 +479,8 @@ def _validate_version_and_set_if_needed(version, config, framework):
if version is None and framework in [
DATA_WRANGLER_FRAMEWORK,
HUGGING_FACE_LLM_FRAMEWORK,
HUGGING_FACE_TEI_FRAMEWORK,
HUGGING_FACE_TEI_GPU_FRAMEWORK,
HUGGING_FACE_TEI_CPU_FRAMEWORK,
HUGGING_FACE_LLM_NEURONX_FRAMEWORK,
STABILITYAI_FRAMEWORK,
]:
Expand Down
12 changes: 9 additions & 3 deletions tests/unit/sagemaker/image_uris/test_huggingface_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
"gpu": {
"1.2.3": "2.0.1-tei1.2.3-gpu-py310-cu122-ubuntu22.04",
},
"cpu": {
"1.2.3": "2.0.1-tei1.2.3-cpu-py310-ubuntu22.04",
},
}
HF_VERSIONS_MAPPING = {
"gpu": {
Expand Down Expand Up @@ -73,17 +76,20 @@ def test_huggingface_uris(load_config):
assert expected == uri


@pytest.mark.parametrize("load_config", ["huggingface-tei.json"], indirect=True)
@pytest.mark.parametrize(
"load_config", ["huggingface-tei.json", "huggingface-tei-cpu.json"], indirect=True
)
def test_huggingface_tei_uris(load_config):
VERSIONS = load_config["inference"]["versions"]
device = load_config["inference"]["processors"][0]
backend = "huggingface-tei"
backend = "huggingface-tei" if device == "gpu" else "huggingface-tei-cpu"
repo = "tei" if device == "gpu" else "tei-cpu"
for version in VERSIONS:
ACCOUNTS = load_config["inference"]["versions"][version]["registries"]
for region in ACCOUNTS.keys():
uri = get_huggingface_llm_image_uri(backend, region=region, version=version)
expected = expected_uris.huggingface_llm_framework_uri(
"tei",
repo,
ACCOUNTS[region],
version,
TEI_VERSIONS_MAPPING[device][version],
Expand Down

0 comments on commit 279e0e6

Please sign in to comment.