From 3c02faee55532577bd606c98ddb8e757e3ca2306 Mon Sep 17 00:00:00 2001 From: sd109 Date: Tue, 12 Nov 2024 12:46:09 +0000 Subject: [PATCH 01/14] Update comment --- charts/flux-image-gen/values.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/charts/flux-image-gen/values.yaml b/charts/flux-image-gen/values.yaml index b5d97b4..06303d2 100644 --- a/charts/flux-image-gen/values.yaml +++ b/charts/flux-image-gen/values.yaml @@ -85,13 +85,13 @@ api: deploymentStrategy: type: Recreate - # Downloading 100GB+ of model weights can take a long time so - # it's difficult to give these probes sensible default values... + # Downloading model weights can take a long time so it's + # difficult to give these probes sensible default values + # Is 30 minutes long enough...? startupProbe: # httpGet: # path: / # port: http - # Is 30 minutes long enough...? # failureThreshold: 180 # periodSeconds: 10 livenessProbe: From df66542b828379a8e73505a862ba4adbd26bcbe3 Mon Sep 17 00:00:00 2001 From: sd109 Date: Tue, 12 Nov 2024 12:46:25 +0000 Subject: [PATCH 02/14] Trim newline --- charts/flux-image-gen/templates/ui/configmap.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/flux-image-gen/templates/ui/configmap.yaml b/charts/flux-image-gen/templates/ui/configmap.yaml index 4773ef9..c03bca7 100644 --- a/charts/flux-image-gen/templates/ui/configmap.yaml +++ b/charts/flux-image-gen/templates/ui/configmap.yaml @@ -13,4 +13,4 @@ data: address: {{ printf "http://%s.%s.svc:%v" ( printf "%s-%s-api" (include "flux-image-gen.fullname" $) . ) $.Release.Namespace $.Values.api.service.port }} {{- end }} example_prompt: | - {{- .Values.examplePrompt | nindent 6 -}} + {{- .Values.examplePrompt | trimSuffix "\n" | nindent 6 -}} From 8f89589f3d1deced765c786b046a3d8a8e4b4d28 Mon Sep 17 00:00:00 2001 From: sd109 Date: Tue, 12 Nov 2024 13:36:13 +0000 Subject: [PATCH 03/14] Add image clean step to workaround storage capacity issue --- web-apps/kind-images.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/web-apps/kind-images.sh b/web-apps/kind-images.sh index 4a63498..32beadd 100755 --- a/web-apps/kind-images.sh +++ b/web-apps/kind-images.sh @@ -17,4 +17,6 @@ for image in $(find_images .); do docker pull $full_name:$REMOTE_TAG docker image tag $full_name:$REMOTE_TAG $full_name:$KIND_TAG kind load docker-image -n $CLUSTER_NAME $full_name:$KIND_TAG + # Clean up images to save on disk space + docker image rm $(docker image ls $full_name -q) done From 133708e0cca9a990c817de04b7f114221320a88c Mon Sep 17 00:00:00 2001 From: sd109 Date: Tue, 12 Nov 2024 14:21:51 +0000 Subject: [PATCH 04/14] Force image cleanup --- web-apps/kind-images.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web-apps/kind-images.sh b/web-apps/kind-images.sh index 32beadd..843bdbc 100755 --- a/web-apps/kind-images.sh +++ b/web-apps/kind-images.sh @@ -18,5 +18,5 @@ for image in $(find_images .); do docker image tag $full_name:$REMOTE_TAG $full_name:$KIND_TAG kind load docker-image -n $CLUSTER_NAME $full_name:$KIND_TAG # Clean up images to save on disk space - docker image rm $(docker image ls $full_name -q) + docker image rm -f $(docker image ls $full_name -q) done From 8a38a867f0756beb2383d0dfaacd52f1bcf06b20 Mon Sep 17 00:00:00 2001 From: sd109 Date: Tue, 12 Nov 2024 14:37:46 +0000 Subject: [PATCH 05/14] Try a full docker system prune --- web-apps/kind-images.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/web-apps/kind-images.sh b/web-apps/kind-images.sh index 843bdbc..6a28392 100755 --- a/web-apps/kind-images.sh +++ b/web-apps/kind-images.sh @@ -18,5 +18,6 @@ for image in $(find_images .); do docker image tag $full_name:$REMOTE_TAG $full_name:$KIND_TAG kind load docker-image -n $CLUSTER_NAME $full_name:$KIND_TAG # Clean up images to save on disk space - docker image rm -f $(docker image ls $full_name -q) + # docker image rm -f $(docker image ls $full_name -q) + docker system prune -af done From 2cc0484477e77028515166d625973c0dae46639b Mon Sep 17 00:00:00 2001 From: sd109 Date: Tue, 12 Nov 2024 14:39:28 +0000 Subject: [PATCH 06/14] Add health check endpoint --- web-apps/flux-image-gen/api_server.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/web-apps/flux-image-gen/api_server.py b/web-apps/flux-image-gen/api_server.py index 777857a..1d89e3f 100644 --- a/web-apps/flux-image-gen/api_server.py +++ b/web-apps/flux-image-gen/api_server.py @@ -31,6 +31,9 @@ class ImageGenInput(BaseModel): prompt: str add_sampling_metadata: bool +@app.get("/") +def health_check(): + return "Server is running" @app.get("/model") async def get_model(): From de0d856ec17e55b7b9f92ecef6f83d0331ebb568 Mon Sep 17 00:00:00 2001 From: sd109 Date: Tue, 12 Nov 2024 15:28:22 +0000 Subject: [PATCH 07/14] Try saving archive outside of /tmp --- web-apps/kind-images.sh | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/web-apps/kind-images.sh b/web-apps/kind-images.sh index 6a28392..da6aac1 100755 --- a/web-apps/kind-images.sh +++ b/web-apps/kind-images.sh @@ -16,8 +16,11 @@ for image in $(find_images .); do echo $full_name docker pull $full_name:$REMOTE_TAG docker image tag $full_name:$REMOTE_TAG $full_name:$KIND_TAG - kind load docker-image -n $CLUSTER_NAME $full_name:$KIND_TAG - # Clean up images to save on disk space - # docker image rm -f $(docker image ls $full_name -q) - docker system prune -af + # NOTE(scott): The 'load docker-image' command saves the + # intermediate tar archive to /tmp which has limited space + # inside a GH runner so do each step manually here instead. + # kind load docker-image -n $CLUSTER_NAME $full_name:$KIND_TAG + docker image save -o ./image.tar $full_name:$KIND_TAG + kind load image-archive -n $CLUSTER_NAME ./image.tar + rm image.tar done From 7704ce8934792199af6bb10a50c10b806003f124 Mon Sep 17 00:00:00 2001 From: sd109 Date: Tue, 12 Nov 2024 15:43:36 +0000 Subject: [PATCH 08/14] Add debug statements --- web-apps/chat/requirements.txt | 2 +- web-apps/image-analysis/requirements.txt | 2 +- web-apps/kind-images.sh | 12 ++++++++++++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/web-apps/chat/requirements.txt b/web-apps/chat/requirements.txt index 9e080e4..18dbfd7 100644 --- a/web-apps/chat/requirements.txt +++ b/web-apps/chat/requirements.txt @@ -1,4 +1,4 @@ -gradio<5 +gradio==4.21 # v4.41 breaks custom JS query param parsing gradio_client openai langchain diff --git a/web-apps/image-analysis/requirements.txt b/web-apps/image-analysis/requirements.txt index a54cba5..dd2933d 100644 --- a/web-apps/image-analysis/requirements.txt +++ b/web-apps/image-analysis/requirements.txt @@ -1,6 +1,6 @@ pillow requests -gradio<5 +gradio==4.21 # v4.41 breaks custom JS query param parsing gradio_client pydantic structlog diff --git a/web-apps/kind-images.sh b/web-apps/kind-images.sh index da6aac1..7e62273 100755 --- a/web-apps/kind-images.sh +++ b/web-apps/kind-images.sh @@ -7,6 +7,18 @@ if [[ -z $1 ]]; then exit 1 fi +# Clean up some storage capacity in CI runner +if [[ $CI == "true" ]]; then + docker system prune -af +fi + +# Debug storage issues +set -x +docker system df +lsblk +df -h +set +x + REMOTE_TAG=$1 CLUSTER_NAME=${2:-kind} echo Kind cluster name: $CLUSTER_NAME From 50ef3c5031e3f789ea067340489f42efc77cf350 Mon Sep 17 00:00:00 2001 From: sd109 Date: Tue, 12 Nov 2024 15:58:43 +0000 Subject: [PATCH 09/14] Try using extra storage at /mnt --- web-apps/kind-images.sh | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/web-apps/kind-images.sh b/web-apps/kind-images.sh index 7e62273..071dcb9 100755 --- a/web-apps/kind-images.sh +++ b/web-apps/kind-images.sh @@ -9,15 +9,16 @@ fi # Clean up some storage capacity in CI runner if [[ $CI == "true" ]]; then + # Debug storage issues + set -x + docker system df docker system prune -af + docker system df + lsblk + df -h + set +x fi -# Debug storage issues -set -x -docker system df -lsblk -df -h -set +x REMOTE_TAG=$1 CLUSTER_NAME=${2:-kind} @@ -25,14 +26,17 @@ echo Kind cluster name: $CLUSTER_NAME KIND_TAG=local for image in $(find_images .); do full_name=ghcr.io/stackhpc/azimuth-llm-$image-ui - echo $full_name + echo $full_name:{$REMOTE_TAG,$KIND_TAG} docker pull $full_name:$REMOTE_TAG docker image tag $full_name:$REMOTE_TAG $full_name:$KIND_TAG # NOTE(scott): The 'load docker-image' command saves the # intermediate tar archive to /tmp which has limited space # inside a GH runner so do each step manually here instead. # kind load docker-image -n $CLUSTER_NAME $full_name:$KIND_TAG - docker image save -o ./image.tar $full_name:$KIND_TAG - kind load image-archive -n $CLUSTER_NAME ./image.tar + # Apparently there's a separate 75G disk at /mnt so try using it. + TAR_PATH=/mnt/image.tar + docker image save -o $TAR_PATH $full_name:$KIND_TAG + docker rm $full_name:{$REMOTE_TAG,$KIND_TAG} + kind load image-archive -n $CLUSTER_NAME $TAR_PATH rm image.tar done From 1892f687ecd46424beb2fdfcd4e0e7807fddd866 Mon Sep 17 00:00:00 2001 From: sd109 Date: Tue, 12 Nov 2024 16:12:10 +0000 Subject: [PATCH 10/14] Try fixing file permissions --- web-apps/kind-images.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/web-apps/kind-images.sh b/web-apps/kind-images.sh index 071dcb9..f31e568 100755 --- a/web-apps/kind-images.sh +++ b/web-apps/kind-images.sh @@ -34,7 +34,14 @@ for image in $(find_images .); do # inside a GH runner so do each step manually here instead. # kind load docker-image -n $CLUSTER_NAME $full_name:$KIND_TAG # Apparently there's a separate 75G disk at /mnt so try using it. - TAR_PATH=/mnt/image.tar + if [[ $CI == "true" ]]; then + DIR=/mnt/gimme-more-space + sudo mkdir $DIR + sudo chown -R $USER:$USER $DIR + TAR_PATH=$DIR/image.tar + else + TAR_PATH="./image.tar" + fi docker image save -o $TAR_PATH $full_name:$KIND_TAG docker rm $full_name:{$REMOTE_TAG,$KIND_TAG} kind load image-archive -n $CLUSTER_NAME $TAR_PATH From f456ca5323cc43302090bdd51043f5e79247c53e Mon Sep 17 00:00:00 2001 From: sd109 Date: Tue, 12 Nov 2024 16:12:41 +0000 Subject: [PATCH 11/14] Add page title setting --- web-apps/flux-image-gen/gradio_ui.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/web-apps/flux-image-gen/gradio_ui.py b/web-apps/flux-image-gen/gradio_ui.py index 95f09e4..dcc1305 100644 --- a/web-apps/flux-image-gen/gradio_ui.py +++ b/web-apps/flux-image-gen/gradio_ui.py @@ -18,7 +18,9 @@ class Model(BaseModel): class AppSettings(BaseModel): models: List[Model] - example_prompt: str + example_prompt: str = "Yoda riding a skateboard." + title = "Flux Image Generation Demo" + settings_path = pathlib.Path("/etc/gradio-app/gradio_config.yaml") @@ -93,9 +95,8 @@ async def generate_image( return image, seed, filename, None - -with gr.Blocks() as demo: - gr.Markdown("# Flux Image Generation Demo") +with gr.Blocks(title=settings.title) as demo: + gr.Markdown(f"# {settings.title}") with gr.Row(): with gr.Column(): From 7975e0e5f8d02a69bda60fcff392bf98de8dd8a2 Mon Sep 17 00:00:00 2001 From: sd109 Date: Tue, 12 Nov 2024 16:24:52 +0000 Subject: [PATCH 12/14] Fix typo --- web-apps/kind-images.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/web-apps/kind-images.sh b/web-apps/kind-images.sh index f31e568..bbb2ea1 100755 --- a/web-apps/kind-images.sh +++ b/web-apps/kind-images.sh @@ -43,7 +43,7 @@ for image in $(find_images .); do TAR_PATH="./image.tar" fi docker image save -o $TAR_PATH $full_name:$KIND_TAG - docker rm $full_name:{$REMOTE_TAG,$KIND_TAG} + docker image rm $full_name:{$REMOTE_TAG,$KIND_TAG} kind load image-archive -n $CLUSTER_NAME $TAR_PATH - rm image.tar + rm $TAR_PATH done From 05c49a71c1d7ed2d3ccaa5638478bff3aa2ff003 Mon Sep 17 00:00:00 2001 From: sd109 Date: Tue, 12 Nov 2024 16:38:50 +0000 Subject: [PATCH 13/14] Clean up debug tasks --- web-apps/kind-images.sh | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/web-apps/kind-images.sh b/web-apps/kind-images.sh index bbb2ea1..6749320 100755 --- a/web-apps/kind-images.sh +++ b/web-apps/kind-images.sh @@ -7,19 +7,19 @@ if [[ -z $1 ]]; then exit 1 fi -# Clean up some storage capacity in CI runner +# Work around storage limits in GH runners if [[ $CI == "true" ]]; then - # Debug storage issues - set -x - docker system df - docker system prune -af - docker system df - lsblk - df -h - set +x + DIR=/mnt/gimme-more-space + sudo mkdir -p $DIR + sudo chown -R $USER:$USER $DIR + TAR_PATH=$DIR/image.tar +else + TAR_PATH="./image.tar" fi + + REMOTE_TAG=$1 CLUSTER_NAME=${2:-kind} echo Kind cluster name: $CLUSTER_NAME @@ -34,14 +34,6 @@ for image in $(find_images .); do # inside a GH runner so do each step manually here instead. # kind load docker-image -n $CLUSTER_NAME $full_name:$KIND_TAG # Apparently there's a separate 75G disk at /mnt so try using it. - if [[ $CI == "true" ]]; then - DIR=/mnt/gimme-more-space - sudo mkdir $DIR - sudo chown -R $USER:$USER $DIR - TAR_PATH=$DIR/image.tar - else - TAR_PATH="./image.tar" - fi docker image save -o $TAR_PATH $full_name:$KIND_TAG docker image rm $full_name:{$REMOTE_TAG,$KIND_TAG} kind load image-archive -n $CLUSTER_NAME $TAR_PATH From ee82539fec670410358fc5dfdb0872db7b935441 Mon Sep 17 00:00:00 2001 From: sd109 Date: Tue, 12 Nov 2024 16:59:14 +0000 Subject: [PATCH 14/14] Add missing type annotation --- web-apps/flux-image-gen/gradio_ui.py | 2 +- web-apps/kind-images.sh | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/web-apps/flux-image-gen/gradio_ui.py b/web-apps/flux-image-gen/gradio_ui.py index dcc1305..658b5e4 100644 --- a/web-apps/flux-image-gen/gradio_ui.py +++ b/web-apps/flux-image-gen/gradio_ui.py @@ -19,7 +19,7 @@ class Model(BaseModel): class AppSettings(BaseModel): models: List[Model] example_prompt: str = "Yoda riding a skateboard." - title = "Flux Image Generation Demo" + title: str = "Flux Image Generation Demo" diff --git a/web-apps/kind-images.sh b/web-apps/kind-images.sh index 6749320..388f382 100755 --- a/web-apps/kind-images.sh +++ b/web-apps/kind-images.sh @@ -17,9 +17,6 @@ else TAR_PATH="./image.tar" fi - - - REMOTE_TAG=$1 CLUSTER_NAME=${2:-kind} echo Kind cluster name: $CLUSTER_NAME