fix README for QA

neuralmagic · Oct 10, 2023 · 18913d4 · 18913d4
1 parent c13832b
commit 18913d4
Show file tree

Hide file tree

Showing 4 changed files with 45 additions and 27 deletions.
diff --git a/src/deepsparse/server/README.md b/src/deepsparse/server/README.md
@@ -81,20 +81,20 @@ For example: If previously the following route `/pruned/model_1` was provided,
 the following endpoint would be avaialble:
 
 ```
-http://localhost:<port>/puned/model_1
+http://localhost:5543/pruned/model_1
 ```
 
 Now, the following endpoints are available:
 
 ```
-http://localhost:<port>/v2/models/puned/model_1/infer
-http://localhost:<port>/v2/models/puned/model_1/ready
-http://localhost:<port>/v2/models/puned/model_1
+http://localhost:5543/v2/models/pruned/model_1/infer
+http://localhost:5543/v2/models/pruned/model_1/ready
+http://localhost:5543/v2/models/pruned/model_1
 ```
 
 The same can be expected when a name is provided in the config file instead of a route.
 When neither a name or route is provided, a name will be generated for the endpoint,
-using the task provided (e.g question_answering will create question_answering-0)
+using the task provided.
 
 ---
 
@@ -113,7 +113,7 @@ To make a request to your server, use the `requests` library and pass the reques
 ```python
 import requests
 
-url = "http://localhost:5543/v2/models/question_answering-0/infer"
+url = "http://localhost:5543/v2/models/question_answering/infer"
 
 obj = {
     "question": "Who is Mark?", 
@@ -127,7 +127,7 @@ In addition, you can make a request with a `curl` command from terminal:
 
 ```bash
 curl -X POST \
-  'http://localhost:5543/v2/models/question_answering-0/infer' \
+  'http://localhost:5543/v2/models/question_answering/infer' \
   -H 'accept: application/json' \
   -H 'Content-Type: application/json' \
   -d '{
@@ -153,11 +153,6 @@ endpoints:
       model: zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/12layer_pruned80_quant-none-vnni
       batch_size: 1
 ```
-You can now run the server with the config file path using the `config` sub command:
-
-```bash
-deepsparse.server config config.yaml
-```
 
 You can send requests to a specific model by appending the model's `alias` from the `config.yaml` to the end of the request url. For example, to call the second model, you can send a request to its configured route:
 
@@ -180,5 +175,5 @@ All you need is to add `/docs` at the end of your host URL:
 
     localhost:5543/docs
 
-![alt text](./img/swagger_ui.png)
+![alt text](./img/endpoints.png)
 
diff --git a/src/deepsparse/server/cli.py b/src/deepsparse/server/cli.py
@@ -209,20 +209,6 @@ def main(
        ...
     ```
     """
-
-    def _fetch_server(integration: str, config_path: str):
-        if integration == "local":
-            return DeepsparseServer(server_config=config_path)
-        elif integration == "sagemaker":
-            return SagemakerServer(server_config=config_path)
-        elif integration == "openai":
-            return OpenAIServer(server_config=config_path)
-        else:
-            raise ValueError(
-                f"{integration} is not a supported integration. Must be "
-                f"one of {SUPPORTED_INTEGRATIONS}."
-            )
-
     if ctx.invoked_subcommand is not None:
         return
 
@@ -344,6 +330,43 @@ def task(
         category=DeprecationWarning,
     )
 
+    cfg = ServerConfig(
+        num_cores=num_cores,
+        num_workers=num_workers,
+        integration=integration,
+        endpoints=[
+            EndpointConfig(
+                task=task,
+                name=f"{task}",
+                model=model_path,
+                batch_size=batch_size,
+            )
+        ],
+        loggers={},
+    )
+
+    with TemporaryDirectory() as tmp_dir:
+        config_path = os.path.join(tmp_dir, "server-config.yaml")
+        with open(config_path, "w") as fp:
+            yaml.dump(cfg.dict(), fp)
+
+        server = _fetch_server(integration=integration, config_path=config_path)
+        server.start_server(host, port, log_level, hot_reload_config=hot_reload_config)
+
+
+def _fetch_server(integration: str, config_path: str):
+    if integration == "local":
+        return DeepsparseServer(server_config=config_path)
+    elif integration == "sagemaker":
+        return SagemakerServer(server_config=config_path)
+    elif integration == "openai":
+        return OpenAIServer(server_config=config_path)
+    else:
+        raise ValueError(
+            f"{integration} is not a supported integration. Must be "
+            f"one of {SUPPORTED_INTEGRATIONS}."
+        )
+
 
 if __name__ == "__main__":
     main()
diff --git a/src/deepsparse/server/img/endpoints.png b/src/deepsparse/server/img/endpoints.png
diff --git a/src/deepsparse/server/img/swagger_ui.png b/src/deepsparse/server/img/swagger_ui.png