microsoft · TosinSeg · Jun 19, 2023 · Jun 20, 2023 · Jun 20, 2023 · Jun 20, 2023
@@ -0,0 +1,47 @@
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0
+
+# DeepSpeed Team
+import mii
+
+gpu_index_map1 = {'master': [0]}
+gpu_index_map2 = {'master': [1]}
+gpu_index_map3 = {'master': [0, 1]}
+
+deployments = []
+
+mii_configs1 = {"tensor_parallel": 2, "dtype": "fp16"}
+mii_configs2 = {"tensor_parallel": 1}
+
+name = "bigscience/bloom-560m"
+deployments.append(
+    mii.DeploymentConfig(task='text-generation',
+                         model=name,
+                         deployment_name=name + "_deployment",
+                         GPU_index_map=gpu_index_map3,
+                         mii_config=mii.config.MIIConfig(**mii_configs1)))
+
+# gpt2
+name = "microsoft/DialogRPT-human-vs-rand"
+deployments.append(
+    mii.DeploymentConfig(task='text-classification',
+                         model=name,
+                         deployment_name=name + "_deployment",
+                         GPU_index_map=gpu_index_map2))
+
+name = "microsoft/DialoGPT-large"
+deployments.append(
+    mii.DeploymentConfig(task='conversational',
+                         model=name,
+                         deployment_name=name + "_deployment",
+                         GPU_index_map=gpu_index_map1,
+                         mii_config=mii.config.MIIConfig(**mii_configs2)))
+
+name = "deepset/roberta-large-squad2"
+deployments.append(
+    mii.DeploymentConfig(task="question-answering",
+                         model=name,
+                         deployment_name=name + "-qa-deployment",
+                         GPU_index_map=gpu_index_map2))
+
+mii.deploy(deployment_tag="multi_models", deployments=deployments)
@@ -0,0 +1,46 @@
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0
+
+# DeepSpeed Team
+
+import mii
+
+results = []
+generator = mii.mii_query_handle("multi_models")
+result = generator.query(
+    {
+        "query": ["DeepSpeed is",
+                  "Seattle is"],
+        "deployment_name": "bigscience/bloom-560m_deployment"
+    },
+    do_sample=True,
+    max_new_tokens=30,
+)
+results.append(result)
+
+result = generator.query({
+    'query':
+    "DeepSpeed is the greatest",
+    "deployment_name":
+    "microsoft/DialogRPT-human-vs-rand_deployment"
+})
+results.append(result)
+
+result = generator.query({
+    'text': "DeepSpeed is the greatest",
+    'conversation_id': 3,
+    'past_user_inputs': [],
+    'generated_responses': [],
+    "deployment_name": "microsoft/DialoGPT-large_deployment"
+})
+results.append(result)
+
+result = generator.query({
+    'question':
+    "What is the greatest?",
+    'context':
+    "DeepSpeed is the greatest",
+    "deployment_name":
+    "deepset/roberta-large-squad2" + "-qa-deployment"
+})
+results.append(result)
@@ -0,0 +1,7 @@
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0
+
+# DeepSpeed Team
+import mii
+
+mii.terminate("multi_models")
@@ -10,7 +10,7 @@
 from .constants import DeploymentType, Tasks
 from .aml_related.utils import aml_output_path
 
-from .config import MIIConfig, LoadBalancerConfig
+from .config import MIIConfig, LoadBalancerConfig, DeploymentConfig
 from .grpc_related.proto import modelresponse_pb2_grpc
 
 __version__ = "0.0.0"