batch size arg

vertyco · Jul 4, 2023 · 2fd76ec · 2fd76ec
1 parent 3c487d7
commit 2fd76ec
Show file tree

Hide file tree

Showing 6 changed files with 26 additions and 29 deletions.
diff --git a/README.md b/README.md
@@ -90,10 +90,18 @@ LOGS_PATH =
 
 # GPT4All quantized model
 MODEL_NAME = orca-mini-3b.ggmlv3.q4_0.bin
-# Must be a huggingface model for tokenizing
-TOKENIZER = deepset/roberta-base-squad2
+
+# Recommended to set this value to the number of physical CPU cores your system has (as opposed to the number of logical cores)
+THREADS = 1
+
+# Lowering prompt-batch-size reduces RAM usage during processing. However, this can increase the processing time as a trade-off
+BATCH_SIZE = 2048
 THREADS = 1
 MAX_TOKENS = 750
+
+
+# Must be a huggingface model for tokenizing
+TOKENIZER = deepset/roberta-base-squad2
 # huggingface embeddings model
 EMBED_MODEL = all-MiniLM-L12-v2
 ```
@@ -144,11 +152,17 @@ sudo systemctl enable gptapi
 sudo systemctl start gptapi
 ```
 
-# Deploying on Portainer with docker-compose
+# Deploying with Docker
 
-If using portainer's env variables, use `stack.env` for the `env_file` arg, otherwise specify the path to your env file.
+### Building from source
 
-## Pulling from docker images
+1. `git clone https://github.com/vertyco/gpt-api.git`
+2. `cd gpt-api`
+3. `docker compose -f docker-compose.local.yml up`
+
+## Portainer + pulling from image
+
+If running in Portainer, use `stack.env` for the `env_file` arg, otherwise specify the path to your env file.
 
 ```yml
 version: "3.8"
@@ -160,12 +174,10 @@ services:
     ports:
       - 8100:8100
     env_file:
-      - stack.env
+      - ./.env
 ```
 
-## Building from repo
-
-The repo's docker-compose file can be used with the `Repository` option in Portainers stack UI which will build the image from source.
+The repo's docker-compose file can be used with the `Repository` option in Portainers stack UI which will build the image from source. just specify `docker-compose.portainer.yml` for the compose filename.
 
 # NOTES
 

diff --git a/docker-compose.image.yml b/docker-compose.image.yml
@@ -7,4 +7,4 @@ services:
     ports:
       - 8000:8000
     env_file:
-      - .env
+      - ./.env
diff --git a/docker-compose.build.yml → docker-compose.portainer.yml b/docker-compose.build.yml → docker-compose.portainer.yml
diff --git a/scratches/gpt4all-orca copy.py b/scratches/gpt4all-orca copy.py
diff --git a/src/api.py b/src/api.py
@@ -87,6 +87,7 @@ def _run() -> dict:
             max_tokens=max_tokens,
             temp=payload.temperature,
             top_p=payload.top_p,
+            n_batch=config.BATCH_SIZE,
         )
 
         log.debug(f"Output: {output}")
@@ -118,6 +119,7 @@ def _run() -> dict:
             max_tokens=max_tokens,
             temp=payload.temperature,
             top_p=payload.top_p,
+            n_batch=config.BATCH_SIZE,
         )
 
         log.debug(f"Output: {output}")

diff --git a/src/config.py b/src/config.py
@@ -11,9 +11,10 @@
 LOGS_PATH = config("LOGS_PATH", default="")
 # GPT4All quantized model
 MODEL_NAME = config("MODEL_NAME", default="orca-mini-3b.ggmlv3.q4_0.bin")
+BATCH_SIZE = config("BATCH_SIZE", default=2048, cast=int)
 # Must be a huggingface model for tokenizing
 TOKENIZER = config("TOKENZIER", default="deepset/tinyroberta-squad2")
-
+# Set to
 THREADS = config("THREADS", default=None)
 MAX_TOKENS = config("MAX_TOKENS", default=750, cast=int)
 # embeddings
-Original file line number
+Diff line change
@@ Expand Up / @@ -7,4 +7,4 @@ services: @@
         ports:
           - 8000:8000
         env_file:
-          - .env
+          - ./.env