diff --git a/README.md b/README.md
index 34cb243..3464584 100644
--- a/README.md
+++ b/README.md
@@ -90,10 +90,18 @@ LOGS_PATH =
 
 # GPT4All quantized model
 MODEL_NAME = orca-mini-3b.ggmlv3.q4_0.bin
-# Must be a huggingface model for tokenizing
-TOKENIZER = deepset/roberta-base-squad2
+
+# Recommended to set this value to the number of physical CPU cores your system has (as opposed to the number of logical cores)
+THREADS = 1
+
+# Lowering prompt-batch-size reduces RAM usage during processing. However, this can increase the processing time as a trade-off
+BATCH_SIZE = 2048
 THREADS = 1
 MAX_TOKENS = 750
+
+
+# Must be a huggingface model for tokenizing
+TOKENIZER = deepset/roberta-base-squad2
 # huggingface embeddings model
 EMBED_MODEL = all-MiniLM-L12-v2
 ```
@@ -144,11 +152,17 @@ sudo systemctl enable gptapi
 sudo systemctl start gptapi
 ```
 
-# Deploying on Portainer with docker-compose
+# Deploying with Docker
 
-If using portainer's env variables, use `stack.env` for the `env_file` arg, otherwise specify the path to your env file.
+### Building from source
 
-## Pulling from docker images
+1. `git clone https://github.com/vertyco/gpt-api.git`
+2. `cd gpt-api`
+3. `docker compose -f docker-compose.local.yml up`
+
+## Portainer + pulling from image
+
+If running in Portainer, use `stack.env` for the `env_file` arg, otherwise specify the path to your env file.
 
 ```yml
 version: "3.8"
@@ -160,12 +174,10 @@ services:
     ports:
       - 8100:8100
     env_file:
-      - stack.env
+      - ./.env
 ```
 
-## Building from repo
-
-The repo's docker-compose file can be used with the `Repository` option in Portainers stack UI which will build the image from source.
+The repo's docker-compose file can be used with the `Repository` option in Portainers stack UI which will build the image from source. just specify `docker-compose.portainer.yml` for the compose filename.
 
 # NOTES
 
diff --git a/docker-compose.image.yml b/docker-compose.image.yml
index 5ccc519..b1d5022 100644
--- a/docker-compose.image.yml
+++ b/docker-compose.image.yml
@@ -7,4 +7,4 @@ services:
     ports:
       - 8000:8000
     env_file:
-      - .env
+      - ./.env
diff --git a/docker-compose.build.yml b/docker-compose.portainer.yml
similarity index 100%
rename from docker-compose.build.yml
rename to docker-compose.portainer.yml
diff --git a/scratches/gpt4all-orca copy.py b/scratches/gpt4all-orca copy.py
deleted file mode 100644
index c996e9f..0000000
--- a/scratches/gpt4all-orca copy.py	
+++ /dev/null
@@ -1,18 +0,0 @@
-import asyncio
-
-from gpt4all import GPT4All
-from sentence_transformers import SentenceTransformer
-
-model = GPT4All(model_name="orca-mini-3b.ggmlv3.q4_0.bin")
-
-
-async def run():
-    # Simplest invocation
-    output = model.generate("The capital of France is ", max_tokens=3)
-    print(output)
-    embedder = SentenceTransformer("all-MiniLM-L12-v2")
-    embed = embedder.encode(output)
-    print("type", type(embed))
-
-
-asyncio.run(run())
diff --git a/src/api.py b/src/api.py
index 12ee473..125d1d9 100644
--- a/src/api.py
+++ b/src/api.py
@@ -87,6 +87,7 @@ def _run() -> dict:
             max_tokens=max_tokens,
             temp=payload.temperature,
             top_p=payload.top_p,
+            n_batch=config.BATCH_SIZE,
         )
 
         log.debug(f"Output: {output}")
@@ -118,6 +119,7 @@ def _run() -> dict:
             max_tokens=max_tokens,
             temp=payload.temperature,
             top_p=payload.top_p,
+            n_batch=config.BATCH_SIZE,
         )
 
         log.debug(f"Output: {output}")
diff --git a/src/config.py b/src/config.py
index 01a1e4a..7e1fcd6 100644
--- a/src/config.py
+++ b/src/config.py
@@ -11,9 +11,10 @@
 LOGS_PATH = config("LOGS_PATH", default="")
 # GPT4All quantized model
 MODEL_NAME = config("MODEL_NAME", default="orca-mini-3b.ggmlv3.q4_0.bin")
+BATCH_SIZE = config("BATCH_SIZE", default=2048, cast=int)
 # Must be a huggingface model for tokenizing
 TOKENIZER = config("TOKENZIER", default="deepset/tinyroberta-squad2")
-
+# Set to
 THREADS = config("THREADS", default=None)
 MAX_TOKENS = config("MAX_TOKENS", default=750, cast=int)
 # embeddings