diff --git a/.github/workflows/build-push-artifacts.yml b/.github/workflows/build-push-artifacts.yml index 286d7c8..e8ca6c1 100644 --- a/.github/workflows/build-push-artifacts.yml +++ b/.github/workflows/build-push-artifacts.yml @@ -1,7 +1,6 @@ # Adapted from https://github.com/stackhpc/azimuth/blob/master/.github/workflows/build-push-artifacts.yaml name: Publish artifacts # Run the tasks on every push -# TODO: Add path filtering to only run on relevant changes on: push jobs: # Job to run change detection @@ -24,9 +23,10 @@ jobs: id: filter with: base: ${{ github.ref_name }} + # TODO: Make image filters more granular filters: | images: - - 'images/**' + - 'web-apps/**' chart: - 'chart/**' @@ -35,12 +35,11 @@ jobs: name: Build and push images runs-on: ubuntu-latest needs: changes - if: ${{ needs.changes.outputs.images == 'true' || github.ref_type == 'tag' }} + # if: ${{ needs.changes.outputs.images == 'true' || github.ref_type == 'tag' }} strategy: matrix: include: - # - component: api - - component: ui + - component: chat-interface steps: - name: Check out the repository uses: actions/checkout@v4 @@ -56,7 +55,7 @@ jobs: id: image-meta uses: docker/metadata-action@v5 with: - images: ghcr.io/stackhpc/azimuth-llm-${{ matrix.component }}-base + images: ghcr.io/stackhpc/azimuth-llm-${{ matrix.component }} # Produce the branch name or tag and the SHA as tags tags: | type=ref,event=branch @@ -64,38 +63,37 @@ jobs: type=sha,prefix= - name: Build and push image - uses: stackhpc/github-actions/docker-multiarch-build-push@allow-continue-after-scan + uses: azimuth-cloud/github-actions/docker-multiarch-build-push@update-trivy-action with: - cache-key: ${{ matrix.component }}-base - context: ./images/${{ matrix.component }}-base + cache-key: ${{ matrix.component }} + context: ./web-apps/${{ matrix.component }} platforms: linux/amd64,linux/arm64 push: true tags: ${{ steps.image-meta.outputs.tags }} labels: ${{ steps.image-meta.outputs.labels }} - fail_on_high_severity_cve: false - # Job to build and publish Helm chart - build_push_chart: - name: Build and push Helm chart - runs-on: ubuntu-latest - # Only build and push the chart if chart files have changed - needs: [changes] - if: ${{ needs.changes.outputs.chart == 'true' || github.ref_type == 'tag' }} - steps: - - name: Check out the repository - uses: actions/checkout@v4 - with: - # This is important for the semver action to work correctly - # when determining the number of commits since the last tag - fetch-depth: 0 + # # Job to build and publish Helm chart + # build_push_chart: + # name: Build and push Helm chart + # runs-on: ubuntu-latest + # # Only build and push the chart if chart files have changed + # needs: [changes] + # if: ${{ needs.changes.outputs.chart == 'true' || github.ref_type == 'tag' }} + # steps: + # - name: Check out the repository + # uses: actions/checkout@v4 + # with: + # # This is important for the semver action to work correctly + # # when determining the number of commits since the last tag + # fetch-depth: 0 - - name: Get SemVer version for current commit - id: semver - uses: stackhpc/github-actions/semver@master + # - name: Get SemVer version for current commit + # id: semver + # uses: azimuth-cloud/github-actions/semver@master - - name: Publish Helm charts - uses: stackhpc/github-actions/helm-publish@master - with: - token: ${{ secrets.GITHUB_TOKEN }} - version: ${{ steps.semver.outputs.version }} - app-version: ${{ steps.semver.outputs.short-sha }} + # - name: Publish Helm charts + # uses: azimuth-cloud/github-actions/helm-publish@master + # with: + # token: ${{ secrets.GITHUB_TOKEN }} + # version: ${{ steps.semver.outputs.version }} + # app-version: ${{ steps.semver.outputs.short-sha }} diff --git a/.github/workflows/test-pr.yml b/.github/workflows/test-pr.yml new file mode 100644 index 0000000..eaf9354 --- /dev/null +++ b/.github/workflows/test-pr.yml @@ -0,0 +1,50 @@ +name: Test pull request +on: + pull_request: +jobs: + chart_validation: + runs-on: ubuntu-latest + env: + CLUSTER_NAME: chart-testing + RELEASE_NAME: ci-test + steps: + - name: Checkout the repo + uses: actions/checkout@v4 + + - name: Install Helm + uses: azure/setup-helm@v4 + + - name: Set up chart testing + uses: helm/chart-testing-action@v2 + + - name: Run chart linting + run: ct lint --config ct.yaml + + - name: Run helm template with default values + run: helm template ci-test . + working-directory: chart + + - name: Build all web app images + run: ./build.sh + working-directory: web-apps + + - name: Create Kind Cluster + uses: helm/kind-action@v1 + with: + cluster-name: ${{ env.CLUSTER_NAME }} + + - name: Load local docker images into kind cluster + run: | + IMAGE_NAMES=$(docker image ls | grep ghcr.io/stackhpc/azimuth-llm- | awk '{print $1}') + kind load docker-image $IMAGE_NAMES -n ${{ env.CLUSTER_NAME }} + + - name: Add Helm repos for dependencies + run: | + helm repo add stakater https://stakater.github.io/stakater-charts + + # https://github.com/helm/charts/blob/master/test/README.md#providing-custom-test-values + # Each chart/ci/*-values.yaml file will be treated as a separate test case with it's + # own helm install/test process. + - name: Run chart install and test + run: ct install --config ct.yaml + diff --git a/.gitignore b/.gitignore index e916d76..7d21b1b 100644 --- a/.gitignore +++ b/.gitignore @@ -8,11 +8,8 @@ __pycache__/ # Ignore local dev helpers test-values.y[a]ml -chart/web-app/settings.yml -gradio-client-test.py **venv*/ - # Helm chart stuff chart/Chart.lock -chart/charts \ No newline at end of file +chart/charts diff --git a/chart/.helmignore b/chart/.helmignore index b4962b3..21fa195 100644 --- a/chart/.helmignore +++ b/chart/.helmignore @@ -30,7 +30,7 @@ venv/ __pycache__/ images/ .hf-token.secret -hu-poc/ test-values.yaml web-app/settings.yml -web-app/example-settings.yml \ No newline at end of file +web-app/example-settings.yml +ci/ diff --git a/chart/Chart.yaml b/chart/Chart.yaml index b60c5c1..3a73007 100644 --- a/chart/Chart.yaml +++ b/chart/Chart.yaml @@ -1,6 +1,10 @@ apiVersion: v2 name: azimuth-llm description: HuggingFace LLM model serving along with a simple web interface. +maintainers: + - name: "Scott Davidson" + email: scott@stackhpc.com + url: https://github.com/sd109 # A chart can be either an 'application' or a 'library' chart. # @@ -32,4 +36,4 @@ dependencies: - name: reloader version: 1.0.63 repository: https://stakater.github.io/stakater-charts - condition: ui.enabled \ No newline at end of file + condition: ui.enabled diff --git a/chart/ci/web-apps-only-values.yaml b/chart/ci/web-apps-only-values.yaml new file mode 100644 index 0000000..03554e2 --- /dev/null +++ b/chart/ci/web-apps-only-values.yaml @@ -0,0 +1,13 @@ +api: + enabled: false +ui: + image: + repository: ghcr.io/stackhpc/azimuth-llm-chat-interface + tag: latest + imagePullPolicy: Never + service: + zenith: + enabled: false + appSettings: + hf_model_name: AMead10/c4ai-command-r-08-2024-awq + backend_url: https://llm3-compute.cms.hu-berlin.de diff --git a/chart/templates/test/end-to-end.yml b/chart/templates/test/end-to-end.yml new file mode 100644 index 0000000..b751239 --- /dev/null +++ b/chart/templates/test/end-to-end.yml @@ -0,0 +1,23 @@ +{{- if .Values.api.enabled -}} +apiVersion: batch/v1 +kind: Job +metadata: + name: gradio-client-response + annotations: + "helm.sh/hook": test +spec: + template: + spec: + containers: + - name: gradio-client-test + image: ghcr.io/stackhpc/azimuth-llm-chat-interface + imagePullPolicy: IfNotPresent + command: + - python + - gradio-client-test.py + - http://{{ .Values.ui.service.name }}.{{ .Release.Namespace }}.svc + restartPolicy: Never + # Allow plenty of retries since downloading + # model weights can take a long time. + backoffLimit: 10 +{{- end -}} diff --git a/chart/templates/test/web-app.yml b/chart/templates/test/web-app.yml new file mode 100644 index 0000000..aa23e3b --- /dev/null +++ b/chart/templates/test/web-app.yml @@ -0,0 +1,22 @@ +{{- if not .Values.api.enabled -}} +apiVersion: batch/v1 +kind: Job +metadata: + name: web-app-liveness + annotations: + "helm.sh/hook": test +spec: + template: + spec: + containers: + - name: web-app-test + image: nicolaka/netshoot:v0.12 + imagePullPolicy: IfNotPresent + command: + - nc + - -vz + - {{ .Values.ui.service.name }}.{{ .Release.Namespace }}.svc + - "80" + restartPolicy: Never + backoffLimit: 3 +{{- end -}} diff --git a/chart/templates/ui/app-config-map.yml b/chart/templates/ui/app-config-map.yml index f4af425..e77da18 100644 --- a/chart/templates/ui/app-config-map.yml +++ b/chart/templates/ui/app-config-map.yml @@ -6,7 +6,6 @@ metadata: labels: {{- include "azimuth-llm.labels" . | nindent 4 }} data: -{{ (.Files.Glob "web-app/*").AsConfig | nindent 2 }} - settings.yml: | + overrides.yml: | {{- .Values.ui.appSettings | toYaml | nindent 4 }} -{{- end -}} \ No newline at end of file +{{- end -}} diff --git a/chart/templates/ui/deployment.yml b/chart/templates/ui/deployment.yml index 0e6c5e5..4a3e400 100644 --- a/chart/templates/ui/deployment.yml +++ b/chart/templates/ui/deployment.yml @@ -23,25 +23,20 @@ spec: containers: - name: {{ .Release.Name }}-ui image: {{ printf "%s:%s" .Values.ui.image.repository .Values.ui.image.version }} + imagePullPolicy: {{ .Values.ui.image.imagePullPolicy }} ports: - name: ui - containerPort: 7680 - workingDir: /etc/web-app + containerPort: 7860 volumeMounts: - name: app mountPath: /etc/web-app - command: - - python - args: - - {{ .Values.ui.entrypoint }} - - {{ .Values.huggingface.model }} env: - name: PYTHONUNBUFFERED value: "1" tty: true # Make stdout from python visible in k8s logs readinessProbe: tcpSocket: - port: 7680 + port: 7860 periodSeconds: 5 volumes: - name: app diff --git a/chart/values.yaml b/chart/values.yaml index 9c6cf44..446ec49 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -100,8 +100,11 @@ ui: - Arial # Container image config image: - repository: ghcr.io/stackhpc/azimuth-llm-ui-base - version: "0.3.0" + # repository: ghcr.io/stackhpc/azimuth-llm-ui-base + # version: "0.3.0" + repository: azimuth-llm-chat-interface + version: latest + imagePullPolicy: IfNotPresent # Service config service: name: web-app diff --git a/chart/web-app/config.py b/chart/web-app/config.py deleted file mode 100644 index 89818ac..0000000 --- a/chart/web-app/config.py +++ /dev/null @@ -1,73 +0,0 @@ -import yaml -from pydantic import Field, HttpUrl -from pydantic_settings import BaseSettings, SettingsConfigDict - -from typing import Optional, Union, List - - -def get_k8s_namespace(): - namespace_file_path = "/var/run/secrets/kubernetes.io/serviceaccount/namespace" - try: - current_k8s_namespace = open(namespace_file_path).read() - except: - current_k8s_namespace = "default" - print( - f"Failed to detect current k8s namespace in {namespace_file_path} - falling back to value '{current_k8s_namespace}'." - ) - return current_k8s_namespace - - -class AppSettings(BaseSettings): - """ - Settings object for the UI example app. - """ - - # Allow settings to be overwritten by LLM_UI_ env vars - model_config = SettingsConfigDict(env_prefix="llm_ui_") - - # General settings - hf_model_name: str = Field( - description="The model to use when constructing the LLM Chat client. This should match the model name running on the vLLM backend", - ) - backend_url: HttpUrl = Field( - default_factory=lambda: f"http://llm-backend.{get_k8s_namespace()}.svc" - ) - page_title: str = Field(default="Large Language Model") - page_description: Optional[str] = Field(default=None) - hf_model_instruction: str = Field( - default="You are a helpful and cheerful AI assistant. Please respond appropriately." - ) - - # Model settings - # For available parameters, see https://docs.vllm.ai/en/latest/dev/sampling_params.html - # which is based on https://platform.openai.com/docs/api-reference/completions/create - llm_max_tokens: int = Field(default=500) - llm_temperature: float = Field(default=0.5) - llm_top_p: float = Field(default=1) - llm_top_k: float = Field(default=-1) - llm_presence_penalty: float = Field(default=0, ge=-2, le=2) - llm_frequency_penalty: float = Field(default=0, ge=-2, le=2) - - # UI theming - - # Variables explicitly passed to gradio.theme.Default() - # For example: - # {"primary_hue": "red"} - theme_params: dict[str, Union[str, List[str]]] = Field(default_factory=dict) - # Overrides for theme.body_background_fill property - theme_background_colour: Optional[str] = Field(default=None) - # Provides arbitrary CSS and JS overrides to the UI, - # see https://www.gradio.app/guides/custom-CSS-and-JS - css_overrides: Optional[str] = Field(default=None) - custom_javascript: Optional[str] = Field(default=None) - - # Method for loading settings file - @staticmethod - def load(file_path: str): - try: - with open(file_path, "r") as file: - settings = yaml.safe_load(file) - except Exception as e: - print(f"Failed to read config file at: {file_path}\nException was:") - raise e - return AppSettings(**settings) diff --git a/chart/web-app/example-settings.yml b/chart/web-app/example-settings.yml deleted file mode 100644 index 34a3ae9..0000000 --- a/chart/web-app/example-settings.yml +++ /dev/null @@ -1,40 +0,0 @@ -backend_url: http://localhost:8081 -hf_model_name: ise-uiuc/Magicoder-S-DS-6.7B - -# model_instruction: You are a helpful and cheerful AI assistant. Please respond appropriately. -# llm_max_tokens: -# llm_temperature: -# llm_top_p: -# llm_frequency_penalty: -# llm_presence_penalty: - -page_description: "[Custom Markdown](https://google.com)" - -# UI theming tweaks -theme_background_colour: "#00376c" -theme_params: - # primary_hue: blue - # Use local system fonts rather than Google fonts API - font: - - sans-serif - font_mono: - - sans-serif - -# Customise page title colour -css_overrides: | - h1 { - color: white; - padding-top: 1em; - } - -# Example of a custom JS function which adds a -# privacy statement link to the page footer -custom_javascript: | - function addPrivacyStatement() { - var footer = document.querySelector('footer'); - footer.appendChild(footer.children[1].cloneNode(deep=true)); - var item = footer.children[2].cloneNode(); - item.href = 'https://gdpr.eu/eu-gdpr-personal-data/'; - item.textContent = 'Privacy Statement'; - footer.appendChild(item); - } diff --git a/ct.yaml b/ct.yaml new file mode 100644 index 0000000..866e08c --- /dev/null +++ b/ct.yaml @@ -0,0 +1,2 @@ +validate-maintainers: false +charts: chart/ diff --git a/images/ui-base/requirements.txt b/images/ui-base/requirements.txt deleted file mode 100644 index 2079632..0000000 --- a/images/ui-base/requirements.txt +++ /dev/null @@ -1,88 +0,0 @@ -aiofiles==23.2.1 -aiohttp==3.9.1 -aiosignal==1.3.1 -altair==5.2.0 -annotated-types==0.6.0 -anyio==4.2.0 -attrs==23.2.0 -certifi==2023.11.17 -charset-normalizer==3.3.2 -click==8.1.7 -colorama==0.4.6 -contourpy==1.2.0 -cycler==0.12.1 -dataclasses-json==0.6.3 -distro==1.9.0 -fastapi==0.109.0 -ffmpy==0.3.1 -filelock==3.13.1 -fonttools==4.47.2 -frozenlist==1.4.1 -fsspec==2023.12.2 -gradio==4.21.0 -gradio_client==0.12.0 -h11==0.14.0 -httpcore==1.0.2 -httpx==0.26.0 -huggingface-hub==0.20.3 -idna==3.6 -importlib-resources==6.1.1 -Jinja2==3.1.3 -jsonpatch==1.33 -jsonpointer==2.4 -jsonschema==4.21.1 -jsonschema-specifications==2023.12.1 -kiwisolver==1.4.5 -langchain==0.1.1 -langchain-community==0.0.13 -langchain-core==0.1.13 -langchain-openai==0.0.3 -langsmith==0.0.83 -markdown-it-py==3.0.0 -MarkupSafe==2.1.4 -marshmallow==3.20.2 -matplotlib==3.8.2 -mdurl==0.1.2 -multidict==6.0.4 -mypy-extensions==1.0.0 -numpy==1.26.3 -openai==1.9.0 -orjson==3.9.12 -packaging==23.2 -pandas==2.2.0 -pillow==10.2.0 -pydantic==2.5.3 -pydantic-settings==2.1.0 -pydantic_core==2.14.6 -pydub==0.25.1 -Pygments==2.17.2 -pyparsing==3.1.1 -python-dateutil==2.8.2 -python-dotenv==1.0.0 -python-multipart==0.0.9 -pytz==2023.3.post1 -PyYAML==6.0.1 -referencing==0.32.1 -regex==2023.12.25 -requests==2.31.0 -rpds-py==0.17.1 -ruff==0.3.3 -semantic-version==2.10.0 -shellingham==1.5.4 -six==1.16.0 -sniffio==1.3.0 -SQLAlchemy==2.0.25 -starlette==0.35.1 -tenacity==8.2.3 -tiktoken==0.5.2 -tomlkit==0.12.0 -toolz==0.12.0 -tqdm==4.66.1 -typer==0.9.0 -typing-inspect==0.9.0 -typing_extensions==4.9.0 -tzdata==2023.4 -urllib3==2.1.0 -uvicorn==0.26.0 -websockets==11.0.3 -yarl==1.9.4 diff --git a/web-apps/build.sh b/web-apps/build.sh new file mode 100755 index 0000000..5fe3c98 --- /dev/null +++ b/web-apps/build.sh @@ -0,0 +1,26 @@ +#!/bin/bash +set -e + +build() { + pushd $1 > /dev/null + if [[ -f Dockerfile ]]; then + echo Building $1 docker image + docker build . -t ghcr.io/stackhpc/azimuth-llm-$1 + else + echo No Dockerfile found for $1 + fi + popd > /dev/null +} + +# If a single app is provided as a +# script arg then just build that image, +# otherwise try building all images. +if [[ ! -z $1 ]]; then + build $1 +else + for item in $(ls); do + if [[ -d $item ]]; then + build $item + fi + done +fi diff --git a/images/ui-base/Dockerfile b/web-apps/chat-interface/Dockerfile similarity index 65% rename from images/ui-base/Dockerfile rename to web-apps/chat-interface/Dockerfile index 222de27..803d58f 100644 --- a/images/ui-base/Dockerfile +++ b/web-apps/chat-interface/Dockerfile @@ -1,9 +1,16 @@ FROM python:3.11-slim -ENV GRADIO_SERVER_PORT=7680 - COPY requirements.txt requirements.txt RUN pip install --no-cache-dir -r requirements.txt COPY purge-google-fonts.sh purge-google-fonts.sh RUN bash purge-google-fonts.sh + +WORKDIR /app + +COPY *.py . + +COPY defaults.yml . +# COPY overrides.yml . + +ENTRYPOINT ["python3", "app.py"] diff --git a/chart/web-app/app.py b/web-apps/chat-interface/app.py similarity index 88% rename from chart/web-app/app.py rename to web-apps/chat-interface/app.py index bef9a91..3ead467 100644 --- a/chart/web-app/app.py +++ b/web-apps/chat-interface/app.py @@ -14,7 +14,7 @@ logger.info("Starting app") -settings = AppSettings.load("./settings.yml") +settings = AppSettings.load() if len(sys.argv) > 1: settings.hf_model_name = sys.argv[1] logger.info("App settings: %s", settings) @@ -40,15 +40,22 @@ class PossibleSystemPromptException(Exception): openai_api_key="required-but-not-used", temperature=settings.llm_temperature, max_tokens=settings.llm_max_tokens, - model_kwargs={ - "top_p": settings.llm_top_p, - "frequency_penalty": settings.llm_frequency_penalty, - "presence_penalty": settings.llm_presence_penalty, - # Additional parameters supported by vLLM but not OpenAI API - # https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html#extra-parameters - "extra_body": { - "top_k": settings.llm_top_k, - } + # model_kwargs={ + # "top_p": settings.llm_top_p, + # "frequency_penalty": settings.llm_frequency_penalty, + # "presence_penalty": settings.llm_presence_penalty, + # # Additional parameters supported by vLLM but not OpenAI API + # # https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html#extra-parameters + # "extra_body": { + # "top_k": settings.llm_top_k, + # } + top_p=settings.llm_top_p, + frequency_penalty=settings.llm_frequency_penalty, + presence_penalty=settings.llm_presence_penalty, + # Additional parameters supported by vLLM but not OpenAI API + # https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html#extra-parameters + extra_body={ + "top_k": settings.llm_top_k, }, streaming=True, ) diff --git a/web-apps/chat-interface/config.py b/web-apps/chat-interface/config.py new file mode 100644 index 0000000..8592884 --- /dev/null +++ b/web-apps/chat-interface/config.py @@ -0,0 +1,97 @@ +import logging +import yaml +from pydantic import Field, HttpUrl +from pydantic_settings import BaseSettings, SettingsConfigDict + +from typing import Optional, Union, List + +logging.basicConfig() +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +NAMESPACE_FILE_PATH = "/var/run/secrets/kubernetes.io/serviceaccount/namespace" +def get_k8s_namespace(): + try: + current_k8s_namespace = open(NAMESPACE_FILE_PATH).read() + return current_k8s_namespace + except FileNotFoundError as err: + return None + +def default_backend(): + k8s_ns = get_k8s_namespace() + if k8s_ns: + return f"http://llm-backend.{k8s_ns}.svc" + else: + logger.warning('Failed to determine k8s namespace from %s - assuming non-kubernetes environment.', NAMESPACE_FILE_PATH) + + +class AppSettings(BaseSettings): + """ + Settings object for the UI example app. + """ + + # # Allow settings to be overwritten by LLM_UI_ env vars + # model_config = SettingsConfigDict(env_prefix="llm_ui_") + + # General settings + hf_model_name: str = Field( + description="The model to use when constructing the LLM Chat client. This should match the model name running on the vLLM backend", + ) + backend_url: HttpUrl = Field( + description="The address of the OpenAI compatible API server (either in-cluster or externally hosted)" + ) + page_title: str = Field(default="Large Language Model") + page_description: Optional[str] = Field(default=None) + hf_model_instruction: str = Field( + default="You are a helpful and cheerful AI assistant. Please respond appropriately." + ) + + # Model settings + + # For available parameters, see https://docs.vllm.ai/en/latest/dev/sampling_params.html + # which is based on https://platform.openai.com/docs/api-reference/completions/create + llm_max_tokens: int = Field(default=500) + llm_temperature: float = Field(default=0) + llm_top_p: float = Field(default=1) + llm_top_k: float = Field(default=-1) + llm_presence_penalty: float = Field(default=0, ge=-2, le=2) + llm_frequency_penalty: float = Field(default=0, ge=-2, le=2) + + # UI theming + + # Variables explicitly passed to gradio.theme.Default() + # For example: + # {"primary_hue": "red"} + theme_params: dict[str, Union[str, List[str]]] = Field(default_factory=dict) + # Overrides for theme.body_background_fill property + theme_background_colour: Optional[str] = Field(default=None) + # Provides arbitrary CSS and JS overrides to the UI, + # see https://www.gradio.app/guides/custom-CSS-and-JS + css_overrides: Optional[str] = Field(default=None) + custom_javascript: Optional[str] = Field(default=None) + + + # Method for loading settings from files + @staticmethod + def _load_yaml(file_path: str): + with open(file_path, "r") as file: + content = yaml.safe_load(file) or {} + return content + + @staticmethod + def load(): + defaults = AppSettings._load_yaml('./defaults.yml') + overrides = {} + try: + overrides = AppSettings._load_yaml('/etc/web-app/overrides.yml') + except FileNotFoundError: + pass + settings = {**defaults, **overrides} + # Sanity checks on settings + if 'backend_url' not in settings: + in_cluster_backend = default_backend() + if not in_cluster_backend: + raise Exception('Backend URL must be provided in settings when running this app outside of Kubernetes') + settings['backend_url'] = in_cluster_backend + return AppSettings(**settings) diff --git a/web-apps/chat-interface/defaults.yml b/web-apps/chat-interface/defaults.yml new file mode 100644 index 0000000..9520b39 --- /dev/null +++ b/web-apps/chat-interface/defaults.yml @@ -0,0 +1,36 @@ + +hf_model_name: "microsoft/Phi-3.5-mini-instruct" +hf_model_instruction: "You are a pirate" + +# UI theming tweaks +# css_overrides: | +# h1 { +# color: white; +# padding-top: 1em; +# } +# a { +# color: yellow; +# } +# theme_background_colour: "#00376c" +# theme_params: +# # primary_hue: blue +# font: +# - sans-serif +# font_mono: +# - sans-serif + +# custom_javascript: | +# function addPrivacyStatement() { +# var footer = document.querySelector('footer'); +# footer.appendChild(footer.children[1].cloneNode(deep=true)); +# var item = footer.children[2].cloneNode(); +# item.href = 'https://google.com'; +# item.textContent = 'Privacy Statement'; +# footer.appendChild(item); +# } + +# llm_max_tokens: +# llm_temperature: +# llm_top_p: +# llm_frequency_penalty: +# llm_presence_penalty: diff --git a/web-apps/chat-interface/gradio-client-test.py b/web-apps/chat-interface/gradio-client-test.py new file mode 100644 index 0000000..ddf245c --- /dev/null +++ b/web-apps/chat-interface/gradio-client-test.py @@ -0,0 +1,7 @@ +import sys +from gradio_client import Client + +gradio_host = sys.argv[1] +client = Client(gradio_host) +result = client.predict("Hi", api_name="/chat") +print(result) diff --git a/images/ui-base/purge-google-fonts.sh b/web-apps/chat-interface/purge-google-fonts.sh similarity index 100% rename from images/ui-base/purge-google-fonts.sh rename to web-apps/chat-interface/purge-google-fonts.sh diff --git a/web-apps/chat-interface/requirements.txt b/web-apps/chat-interface/requirements.txt new file mode 100644 index 0000000..3f34151 --- /dev/null +++ b/web-apps/chat-interface/requirements.txt @@ -0,0 +1,7 @@ +gradio<5 +gradio_client +openai +langchain +langchain_openai +pydantic +pydantic_settings diff --git a/web-apps/run.sh b/web-apps/run.sh new file mode 100755 index 0000000..5baa0c6 --- /dev/null +++ b/web-apps/run.sh @@ -0,0 +1,21 @@ +#!/bin/bash +set -e + +IMAGE_TAG=azimuth-llm-$1 + +error() { + echo $1 + exit 1 +} + +if [[ -z $1 ]]; then + error "App name is required as script arg" +elif [[ ! -d $1 ]]; then + error "App $1 not found" +elif [[ -z $(docker image ls -q $IMAGE_TAG) ]]; then + ./build.sh $1 +else + echo "Found local $IMAGE_TAG docker image" +fi + +docker run -p 7860:7860 $IMAGE_TAG