diff --git a/README.md b/README.md index 34cb243..3464584 100644 --- a/README.md +++ b/README.md @@ -90,10 +90,18 @@ LOGS_PATH = # GPT4All quantized model MODEL_NAME = orca-mini-3b.ggmlv3.q4_0.bin -# Must be a huggingface model for tokenizing -TOKENIZER = deepset/roberta-base-squad2 + +# Recommended to set this value to the number of physical CPU cores your system has (as opposed to the number of logical cores) +THREADS = 1 + +# Lowering prompt-batch-size reduces RAM usage during processing. However, this can increase the processing time as a trade-off +BATCH_SIZE = 2048 THREADS = 1 MAX_TOKENS = 750 + + +# Must be a huggingface model for tokenizing +TOKENIZER = deepset/roberta-base-squad2 # huggingface embeddings model EMBED_MODEL = all-MiniLM-L12-v2 ``` @@ -144,11 +152,17 @@ sudo systemctl enable gptapi sudo systemctl start gptapi ``` -# Deploying on Portainer with docker-compose +# Deploying with Docker -If using portainer's env variables, use `stack.env` for the `env_file` arg, otherwise specify the path to your env file. +### Building from source -## Pulling from docker images +1. `git clone https://github.com/vertyco/gpt-api.git` +2. `cd gpt-api` +3. `docker compose -f docker-compose.local.yml up` + +## Portainer + pulling from image + +If running in Portainer, use `stack.env` for the `env_file` arg, otherwise specify the path to your env file. ```yml version: "3.8" @@ -160,12 +174,10 @@ services: ports: - 8100:8100 env_file: - - stack.env + - ./.env ``` -## Building from repo - -The repo's docker-compose file can be used with the `Repository` option in Portainers stack UI which will build the image from source. +The repo's docker-compose file can be used with the `Repository` option in Portainers stack UI which will build the image from source. just specify `docker-compose.portainer.yml` for the compose filename. # NOTES diff --git a/docker-compose.image.yml b/docker-compose.image.yml index 5ccc519..b1d5022 100644 --- a/docker-compose.image.yml +++ b/docker-compose.image.yml @@ -7,4 +7,4 @@ services: ports: - 8000:8000 env_file: - - .env + - ./.env diff --git a/docker-compose.build.yml b/docker-compose.portainer.yml similarity index 100% rename from docker-compose.build.yml rename to docker-compose.portainer.yml diff --git a/scratches/gpt4all-orca copy.py b/scratches/gpt4all-orca copy.py deleted file mode 100644 index c996e9f..0000000 --- a/scratches/gpt4all-orca copy.py +++ /dev/null @@ -1,18 +0,0 @@ -import asyncio - -from gpt4all import GPT4All -from sentence_transformers import SentenceTransformer - -model = GPT4All(model_name="orca-mini-3b.ggmlv3.q4_0.bin") - - -async def run(): - # Simplest invocation - output = model.generate("The capital of France is ", max_tokens=3) - print(output) - embedder = SentenceTransformer("all-MiniLM-L12-v2") - embed = embedder.encode(output) - print("type", type(embed)) - - -asyncio.run(run()) diff --git a/src/api.py b/src/api.py index 12ee473..125d1d9 100644 --- a/src/api.py +++ b/src/api.py @@ -87,6 +87,7 @@ def _run() -> dict: max_tokens=max_tokens, temp=payload.temperature, top_p=payload.top_p, + n_batch=config.BATCH_SIZE, ) log.debug(f"Output: {output}") @@ -118,6 +119,7 @@ def _run() -> dict: max_tokens=max_tokens, temp=payload.temperature, top_p=payload.top_p, + n_batch=config.BATCH_SIZE, ) log.debug(f"Output: {output}") diff --git a/src/config.py b/src/config.py index 01a1e4a..7e1fcd6 100644 --- a/src/config.py +++ b/src/config.py @@ -11,9 +11,10 @@ LOGS_PATH = config("LOGS_PATH", default="") # GPT4All quantized model MODEL_NAME = config("MODEL_NAME", default="orca-mini-3b.ggmlv3.q4_0.bin") +BATCH_SIZE = config("BATCH_SIZE", default=2048, cast=int) # Must be a huggingface model for tokenizing TOKENIZER = config("TOKENZIER", default="deepset/tinyroberta-squad2") - +# Set to THREADS = config("THREADS", default=None) MAX_TOKENS = config("MAX_TOKENS", default=750, cast=int) # embeddings