From f26524cf9cda711db653b92d4f83f390d148748b Mon Sep 17 00:00:00 2001 From: sd109 Date: Sat, 2 Nov 2024 16:15:20 +0000 Subject: [PATCH] Reinstate base chart schema --- charts/azimuth-llm/azimuth-ui.schema.yaml | 33 ++++++ charts/azimuth-llm/values.schema.json | 128 ++++++++++++++++++++++ 2 files changed, 161 insertions(+) create mode 100644 charts/azimuth-llm/azimuth-ui.schema.yaml create mode 100644 charts/azimuth-llm/values.schema.json diff --git a/charts/azimuth-llm/azimuth-ui.schema.yaml b/charts/azimuth-llm/azimuth-ui.schema.yaml new file mode 100644 index 0000000..9989cc1 --- /dev/null +++ b/charts/azimuth-llm/azimuth-ui.schema.yaml @@ -0,0 +1,33 @@ +controls: + /huggingface/model: + type: TextControl + required: true + /huggingface/token: + type: TextControl + secret: true + # Use mirror to mimic yaml anchor in base Helm chart + /ui/appSettings/model_name: + type: MirrorControl + path: /huggingface/model + visuallyHidden: true + # Azimuth UI doesn't handle json type ["integer","null"] + # properly so we allow any type in JSON schema then + # constrain to (optional) integer here. + /api/modelMaxContextLength: + type: IntegerControl + minimum: 100 + required: false + +sortOrder: + - /huggingface/model + - /huggingface/token + - /ui/appSettings/model_instruction + - /ui/appSettings/page_title + - /api/image/version + - /ui/appSettings/llm_params/temperature + - /ui/appSettings/llm_params/max_tokens + - /ui/appSettings/llm_params/frequency_penalty + - /ui/appSettings/llm_params/presence_penalty + - /ui/appSettings/llm_params/top_p + - /ui/appSettings/llm_params/top_k + - /api/modelMaxContextLength diff --git a/charts/azimuth-llm/values.schema.json b/charts/azimuth-llm/values.schema.json new file mode 100644 index 0000000..eeac476 --- /dev/null +++ b/charts/azimuth-llm/values.schema.json @@ -0,0 +1,128 @@ +{ + "type": "object", + "properties": { + "huggingface": { + "type": "object", + "properties": { + "model": { + "type": "string", + "title": "Model", + "description": "The [HuggingFace model](https://huggingface.co/models) to deploy (see [here](https://github.com/stackhpc/azimuth-llm?tab=readme-ov-file#tested-models) for a list of tested models).", + "default": "microsoft/Phi-3.5-mini-instruct" + }, + "token": { + "type": [ + "string", + "null" + ], + "title": "Access Token", + "description": "A HuggingFace [access token](https://huggingface.co/docs/hub/security-tokens). Required for [gated models](https://huggingface.co/docs/hub/en/models-gated) (e.g. Llama 3)." + } + }, + "required": [ + "model" + ] + }, + "api": { + "type": "object", + "properties": { + "modelMaxContextLength": { + "title": "Model Context Length", + "description": "An override for the maximum context length to allow, if the model's default is not suitable." + }, + "image": { + "type": "object", + "properties": { + "version": { + "type": "string", + "title": "Backend vLLM version", + "description": "The vLLM version to use as a backend. Must be a version tag from [this list](https://github.com/vllm-project/vllm/tags)", + "default": "v0.6.3" + } + } + } + } + }, + "ui": { + "type": "object", + "properties": { + "appSettings": { + "type": "object", + "properties": { + "model_name": { + "type": "string", + "title": "Model Name", + "description": "Model name supplied to the OpenAI client in frontend web app. Should match huggingface.model above." + }, + "model_instruction": { + "type": "string", + "title": "Instruction", + "description": "The initial system prompt (i.e. the hidden instruction) to use when generating responses.", + "default": "You are a helpful AI assistant. Please respond appropriately." + }, + "page_title": { + "type": "string", + "title": "Page Title", + "description": "The title to display at the top of the chat interface.", + "default": "Large Language Model" + }, + "llm_params": { + "type": "object", + "properties": { + "max_tokens": { + "type": "integer", + "title": "Max Tokens", + "description": "The maximum number of new [tokens](https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens) to generate for each LLM responses.", + "default": 1000 + }, + "temperature": { + "type": "number", + "title": "LLM Temperature", + "description": "The [temperature](https://platform.openai.com/docs/api-reference/chat/create#chat-create-temperature) value to use when generating LLM responses.", + "default": 0, + "minimum": 0, + "maximum": 2 + }, + "top_p": { + "type": "number", + "title": "LLM Top P", + "description": "The [top p](https://platform.openai.com/docs/api-reference/chat/create#chat-create-top_p) value to use when generating LLM responses.", + "default": 1, + "exclusiveMinimum": 0, + "maximum": 1 + }, + "top_k": { + "type": "integer", + "title": "LLM Top K", + "description": "The [top k](https://docs.vllm.ai/en/stable/dev/sampling_params.html) value to use when generating LLM responses (must be an integer).", + "default": -1, + "minimum": -1 + }, + "presence_penalty": { + "type": "number", + "title": "LLM Presence Penalty", + "description": "The [presence penalty](https://platform.openai.com/docs/api-reference/chat/create#chat-create-presence_penalty) to use when generating LLM responses.", + "default": 0, + "minimum": -2, + "maximum": 2 + }, + "frequency_penalty": { + "type": "number", + "title": "LLM Frequency Penalty", + "description": "The [frequency_penalty](https://platform.openai.com/docs/api-reference/chat/create#chat-create-frequency_penalty) to use when generating LLM responses.", + "default": 0, + "minimum": -2, + "maximum": 2 + } + } + } + }, + "required": [ + "model_name", + "model_instruction" + ] + } + } + } + } +}