FedML-AI · fedml-alex · May 29, 2024 · May 29, 2024 · May 30, 2024 · May 30, 2024
diff --git a/python/README.md b/python/README.md
@@ -43,5 +43,5 @@ Other low-level APIs related to security and privacy are also supported. All alg
 
 **utils**: Common utilities shared by other modules.
 
-## About FedML, Inc.
-https://FedML.ai
+## About TensorOpera, Inc.
+https://tensoropera.ai
diff --git a/python/examples/README.md b/python/examples/README.md
@@ -2,14 +2,14 @@
 # FEDML Examples (Including Prebuilt Jobs in Jobs Store)
 
 - `FedML/python/examples` -- examples for training, deployment, and federated learning
-  - `FedML/python/examples/launch` -- examples for FEDML®Launch
-  - `FedML/python/examples/serving` -- examples for FEDML®Deploy
-  - `FedML/python/examples/train` -- examples for FEDML®Train
-  - `FedML/python/examples/cross_cloud` -- examples for FEDML®Train cross-cloud distributed training
+  - `FedML/python/examples/launch` -- examples for TensorOpera®Launch
+  - `FedML/python/examples/serving` -- examples for TensorOpera®Deploy
+  - `FedML/python/examples/train` -- examples for TensorOpera®Train
+  - `FedML/python/examples/cross_cloud` -- examples for TensorOpera®Train cross-cloud distributed training
   - `FedML/python/examples/federate/prebuilt_jobs` -- examples for federated learning prebuilt jobs (FedCV, FedNLP, FedGraphNN, Healthcare, etc.)
   - `FedML/python/examples/federate/cross_silo` -- examples for cross-silo federated learning
   - `FedML/python/examples/federate/cross_device` -- examples for cross-device federated learning
   - `FedML/python/examples/federate/simulation` -- examples for federated learning simulation
-  - `FedML/python/examples/federate/security` -- examples for FEDML®Federate security related features
-  - `FedML/python/examples/federate/privacy` -- examples for FEDML®Federate privacy related features
-  - `FedML/python/examples/federate/federated_analytics` -- examples for FEDML®Federate federated analytics (FA)
+  - `FedML/python/examples/federate/security` -- examples for TensorOpera®Federate security related features
+  - `FedML/python/examples/federate/privacy` -- examples for TensorOpera®Federate privacy related features
+  - `FedML/python/examples/federate/federated_analytics` -- examples for TensorOpera®Federate federated analytics (FA)
diff --git a/python/examples/deploy/complex_example/README.md b/python/examples/deploy/complex_example/README.md
@@ -16,7 +16,7 @@ Use -cf to indicate the configuration file.
     curl -XPOST localhost:2345/predict -d '{"text": "Hello"}'
     ```
 
-## Option 2: Deploy to the Cloud (Using fedml®launch platform)
+## Option 2: Deploy to the Cloud (Using TensorOpera®launch platform)
 - Uncomment the following line in config.yaml
 
     For information about the configuration, please refer to fedml ® launch.

diff --git a/python/examples/deploy/complex_example/config.yaml b/python/examples/deploy/complex_example/config.yaml
@@ -15,7 +15,7 @@ environment_variables:
   LOCAL_RANK: "0"
 
 # If you do not have any GPU resource but want to serve the model
-# Try FedML® Nexus AI Platform, and Uncomment the following lines.
+# Try TensorOpera® Nexus AI Platform, and Uncomment the following lines.
 # ------------------------------------------------------------
 computing:
   minimum_num_gpus: 1           # minimum # of GPUs to provision

diff --git a/python/examples/deploy/custom_inference_image/README.md b/python/examples/deploy/custom_inference_image/README.md
diff --git a/python/examples/deploy/custom_inference_image/custom_inference_image.yaml b/python/examples/deploy/custom_inference_image/custom_inference_image.yaml
diff --git a/python/examples/deploy/custom_inference_image/quickstart/config.yaml b/python/examples/deploy/custom_inference_image/quickstart/config.yaml
@@ -0,0 +1,12 @@
+workspace: "."
+inference_image: "your_docker_hub_repo/your_image_name"
+
+workspace_mount_path: "/my_workspace"  # Default is "/home/fedml/models_serving"
+
+container_run_command: "echo hello && python3 /my_workspace/main_entry.py"
+
+# If you want to install some packages
+# Please write the command in the bootstrap.sh
+bootstrap: |
+  echo "Install some packages..."
+  echo "Install finished!"
diff --git a/python/examples/deploy/custom_inference_image/quickstart/main_entry.py b/python/examples/deploy/custom_inference_image/quickstart/main_entry.py
@@ -0,0 +1,28 @@
+from fedml.serving import FedMLPredictor
+from fedml.serving import FedMLInferenceRunner
+import uuid
+
+
+class Bot(FedMLPredictor):  # Inherit FedMLClientPredictor
+    def __init__(self):
+        super().__init__()
+
+        # --- Your model initialization code here, here is a example ---
+        self.uuid = uuid.uuid4()
+        # -------------------------------------------
+
+    def predict(self, request: dict):
+        input_dict = request
+        question: str = input_dict.get("text", "").strip()
+
+        # --- Your model inference code here ---
+        response = f"I am a replica, my id is {self.uuid}"
+        # ---------------------------------------
+
+        return {"v1_generated_text": f"V1: The answer to your question {question} is: {response}"}
+
+
+if __name__ == "__main__":
+    chatbot = Bot()
+    fedml_inference_runner = FedMLInferenceRunner(chatbot)
+    fedml_inference_runner.run()
diff --git a/python/examples/deploy/custom_inference_image/serve_main.py b/python/examples/deploy/custom_inference_image/serve_main.py
diff --git a/python/examples/deploy/custom_inference_image/template.yaml b/python/examples/deploy/custom_inference_image/template.yaml
@@ -0,0 +1,22 @@
+# Required
+workspace: "./"                     # We will pacakge all the files in the workspace directory
+expose_subdomains: true             # For customized image, if you want to route all the subdomains, set to true. e.g. localhost:2345/{all-subdomain}
+inference_image_name: ""            # Container image name
+container_run_command: ""           # str or list, similar to CMD in the dockerfile
+port: 80                            # Service port, currently you can only indicate one arbitrary port
+
+# Optional, these are the default values
+readiness_probe:                    # Probe for checking whether a container is ready for inference
+  httpGet:
+    path: ""
+environment_variables: {}           # Environment variables inside the container
+volumes:                            # Volumes to mount to the container
+    - workspace_path: ""            # Path to the volume in the workspace
+      mount_path: ""                # Path to mount the volume inside the container
+deploy_timeout_sec: 900             # Maximum time waiting for deployment to finish (Does not include the time to pull the image)
+request_input_example: {}           # Example of input request, will be shown in the UI
+registry_specs:                     # Registry information for pulling the image
+  registry_name: ""
+  registry_provider: "DockerHub"
+  registry_user_name: ""
+  registry_user_password: ""
diff --git a/python/examples/deploy/custom_inference_image/tensorrt_llm/tensorrtllm.yaml b/python/examples/deploy/custom_inference_image/tensorrt_llm/tensorrtllm.yaml
@@ -0,0 +1,17 @@
+workspace: "./"
+
+expose_subdomains: true
+inference_image_name: "fedml/llama3-8b-tensorrtllm"
+
+# If you put the model repository in $workspace/model_repository, it will be mounted to /home/fedml/models_serving/model_repository
+container_run_command: ["sh", "-c", "cd / && huggingface-cli login --token $your_hf_token && pip install sentencepiece protobuf && python3 tensorrtllm_backend/scripts/launch_triton_server.py --model_repo tensorrtllm_backend/all_models/inflight_batcher_llm --world_size 1 && tail -f /dev/null"]
+
+readiness_probe:
+  httpGet:
+    path: "/v2/health/ready"
+
+port: 8000
+
+deploy_timeout_sec: 1600
+
+
diff --git a/...eploy/custom_inference_image/triton_inference_server/template/custom_inference_image.yaml b/...eploy/custom_inference_image/triton_inference_server/template/custom_inference_image.yaml
@@ -0,0 +1,20 @@
+workspace: "./"
+
+expose_subdomains: true
+inference_image_name: "nvcr.io/nvidia/tritonserver:24.05-py3"
+
+volumes:
+  - workspace_path: "./model_repository"
+    mount_path: "/repo_inside_container"
+
+container_run_command: "tritonserver --model-repository=/repo_inside_container"
+
+readiness_probe:
+  httpGet:
+    path: "/v2/health/ready"
+
+port: 8000
+
+deploy_timeout_sec: 1600
+
+request_input_example: {"text_input": "Hello"}
diff --git a/...custom_inference_image/triton_inference_server/template/model_repository/dummy/1/model.py b/...custom_inference_image/triton_inference_server/template/model_repository/dummy/1/model.py
@@ -0,0 +1,25 @@
+import json
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+class TritonPythonModel:
+    def initialize(self, args):
+        self.model_name = args['model_name']
+
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        auto_complete_model_config.add_input( {"name": "text_input",  "data_type": "TYPE_STRING", "dims": [-1]})
+        auto_complete_model_config.add_output({"name": "text_output", "data_type": "TYPE_STRING", "dims": [-1]})
+        auto_complete_model_config.set_max_batch_size(0)
+        return auto_complete_model_config
+
+    def execute(self, requests):
+        responses = []
+        for request in requests:
+            in_numpy = pb_utils.get_input_tensor_by_name(request, "text_input").as_numpy()
+            assert np.object_ == in_numpy.dtype, 'in this demo, triton passes in a numpy array of size 1 with object_ dtype, this dtype encapsulates a python bytes-array'
+            print('in this demo len(in_numpy) is 1:', len(in_numpy.tolist()))
+            out_numpy = np.array([ (self.model_name + ': ' + python_byte_array.decode('utf-8') + ' World').encode('utf-8') for python_byte_array in in_numpy.tolist()], dtype = np.object_)
+            out_pb = pb_utils.Tensor("text_output", out_numpy)
+            responses.append(pb_utils.InferenceResponse(output_tensors = [out_pb]))
+        return responses
diff --git a/python/examples/deploy/custom_inference_image/trt-llm-openai/config.yaml b/python/examples/deploy/custom_inference_image/trt-llm-openai/config.yaml
@@ -0,0 +1,22 @@
+workspace: "./"
+
+inference_image_name: "fedml/trt-llm-openai"
+
+# The image has its self-contained cmd, no need for rewriting the command
+container_run_command: null
+
+port: 3000
+
+readiness_probe:
+  httpGet:
+    path: "/health_check"
+
+# If you do not use serverless container mode, and you want to indicate another resource path,
+# e.g. localhost:3000/v1/chat/completions, you can set the following uri:
+service:
+  httpPost:
+    path: "/v1/chat/completions"
+
+deploy_timeout_sec: 1600
+
+endpoint_api_type: "text2text_llm_openai_chat_completions"
diff --git a/python/examples/deploy/debug/inference_timeout/config.yaml b/python/examples/deploy/debug/inference_timeout/config.yaml
@@ -0,0 +1,10 @@
+workspace: "./src"
+entry_point: "serve_main.py"
+bootstrap: |
+  echo "Bootstrap start..."
+  sleep 5
+  echo "Bootstrap finished"
+auto_detect_public_ip: true
+use_gpu: true
+
+request_timeout_sec: 10
diff --git a/python/examples/deploy/debug/inference_timeout/src/serve_main.py b/python/examples/deploy/debug/inference_timeout/src/serve_main.py
@@ -0,0 +1,32 @@
+from fedml.serving import FedMLPredictor
+from fedml.serving import FedMLInferenceRunner
+import uuid
+import torch
+
+# Calculate the number of elements
+num_elements = 1_073_741_824 // 4  # using integer division for whole elements
+
+
+class DummyPredictor(FedMLPredictor):
+    def __init__(self):
+        super().__init__()
+        # Create a tensor with these many elements
+        tensor = torch.empty(num_elements, dtype=torch.float32)
+
+        # Move the tensor to GPU
+        tensor_gpu = tensor.cuda()
+
+        # for debug
+        with open("/tmp/dummy_gpu_occupier.txt", "w") as f:
+            f.write("GPU is occupied")
+
+        self.worker_id = uuid.uuid4()
+
+    def predict(self, request):
+        return {f"AlohaV0From{self.worker_id}": request}
+
+
+if __name__ == "__main__":
+    predictor = DummyPredictor()
+    fedml_inference_runner = FedMLInferenceRunner(predictor)
+    fedml_inference_runner.run()
diff --git a/python/examples/deploy/mnist/README.md b/python/examples/deploy/mnist/README.md
@@ -11,9 +11,9 @@ curl -XPOST localhost:2345/predict -d '{"arr":[$DATA]}'
 #For $DATA, please check the request_input_example, it is a 28*28=784 float array
 #Output:{"generated_text":"tensor([0.2333, 0.5296, 0.4350, 0.4537, 0.5424, 0.4583, 0.4803, 0.2862, 0.5507,\n        0.8683], grad_fn=<SigmoidBackward0>)"}
 ```
-## Option 2: Deploy to the Cloud (Using fedml® launch platform)
+## Option 2: Deploy to the Cloud (Using TensorOpera® launch platform)
 Uncomment the following line in mnist.yaml,
-for infomation about the configuration, please refer to fedml® launch.
+for infomation about the configuration, please refer to TensorOpera® launch.
 ```yaml
 # computing:
 #   minimum_num_gpus: 1

diff --git a/python/examples/deploy/mnist/mnist.yaml b/python/examples/deploy/mnist/mnist.yaml
@@ -5,7 +5,7 @@ data_cache_dir: ""
 bootstrap: ""
 
 # If you do not have any GPU resource but want to serve the model
-# Try FedML® Nexus AI Platform, and Uncomment the following lines.
+# Try TensorOpera® Nexus AI Platform, and Uncomment the following lines.
 # ------------------------------------------------------------
 computing:
  minimum_num_gpus: 1           # minimum # of GPUs to provision

diff --git a/python/examples/deploy/multi_service/README.md b/python/examples/deploy/multi_service/README.md
@@ -15,7 +15,7 @@ fedml model create --name $model_name --config_file config.yaml
 ```
 
 ## On-premsie Deploy
-Register an account on FedML website: https://fedml.ai
+Register an account on TensorOpera website: https://tensoropera.ai
 
 You will have a user id and api key, which can be found in the profile page.
 
@@ -44,8 +44,8 @@ You will have a user id and api key, which can be found in the profile page.
   ```
  - Result
 
-    See the deployment result in https://fedml.ai
+    See the deployment result in https://tensoropera.ai
 
 - OPT2: Deploy - UI
 
-    Follow the instructions on https://fedml.ai
+    Follow the instructions on https://tensoropera.ai
diff --git a/python/examples/deploy/quick_start/README.md b/python/examples/deploy/quick_start/README.md
@@ -16,7 +16,7 @@ Use -cf to indicate the configuration file.
     curl -XPOST localhost:2345/predict -d '{"text": "Hello"}'
     ```
 
-## Option 2: Deploy to the Cloud (Using fedml®launch platform)
+## Option 2: Deploy to the Cloud (Using TensorOpera®launch platform)
 - Uncomment the following line in config.yaml
 
     For information about the configuration, please refer to fedml ® launch.