Deploy ChatGLM-6B on Akash network by Fogmeta Labs (akash-network#403)

0x1d · Jul 12, 2023 · b395a98 · b395a98
1 parent 36123d2
commit b395a98
Show file tree

Hide file tree

Showing 5 changed files with 174 additions and 0 deletions.
diff --git a/ChatGLM-6B/Dockerfile b/ChatGLM-6B/Dockerfile
@@ -0,0 +1,14 @@
+##thudm
+FROM library/ubuntu:20.04
+
+WORKDIR /
+
+RUN apt update && apt install git python3 python3-pip -y && git clone https://github.com/THUDM/ChatGLM-6B.git && rm ChatGLM-6B/web_demo.py && pip3 install -r ChatGLM-6B/requirements.txt && rm -rf ChatGLM-6B/web_demo.py
+
+WORKDIR ChatGLM-6B
+
+ADD web_demo.py .
+
+EXPOSE 7860
+
+ENTRYPOINT ["python3", "web_demo.py"]
diff --git a/ChatGLM-6B/README-THUDM.md b/ChatGLM-6B/README-THUDM.md
@@ -0,0 +1,21 @@
+# ChatGLM-6B on Akash Network
+
+the original code repo is [here](https://github.com/THUDM/ChatGLM-6B)
+
+## Introduction
+
+ChatGLM-6B is an open bilingual language model based on [General Language Model (GLM)](https://github.com/THUDM/GLM) framework, with 6.2 billion parameters. With the quantization technique, users can deploy locally on consumer-grade graphics cards (only 6GB of GPU memory is required at the INT4 quantization level).
+
+ChatGLM-6B uses technology similar to ChatGPT, optimized for Chinese QA and dialogue. The model is trained for about 1T tokens of Chinese and English corpus, supplemented by supervised fine-tuning, feedback bootstrap, and reinforcement learning with human feedback. With only about 6.2 billion parameters, the model is able to generate answers that are in line with human preference.
+
+In order to facilitate downstream developers to customize the model for their own application scenarios, we also implements an parameter-efficient tuning method based on [P-Tuning v2](https://github.com/THUDM/P-tuning-v2)[(Guidelines)](ptuning/README_en.md). Tuning requires at least 7GB of GPU memory at INT4 quantization level.
+
+Try the [online demo](https://huggingface.co/spaces/ysharma/ChatGLM-6b_Gradio_Streaming) on Huggingface Spaces.
+
+## Web UI
+![image](https://github.com/Normalnoise/awesome-akash/assets/102578774/149d62bd-39ec-4a5b-9a28-30a3b84ef9ee)
+
+
+## Demo Video
+https://github.com/Normalnoise/awesome-akash/assets/102578774/f4079b2d-a813-4ca6-b8ce-2a4523510fc4
+
diff --git a/ChatGLM-6B/deploy.yaml b/ChatGLM-6B/deploy.yaml
@@ -0,0 +1,36 @@
+version: '2.0'
+services:
+  gpu-app:
+    image: sxk1633/gpu-chat-thudm:6b
+    expose:
+      - port: 7860
+        as: 80
+        to:
+          - global: true
+profiles:
+  compute:
+    gpu-app:
+      resources:
+        cpu:
+          units: 1
+        memory:
+          size: 20Gi
+        gpu:
+          units: 1
+          attributes:
+            vendor:
+              nvidia:
+                - model: 3080
+        storage:
+          - size: 30Gi
+  placement:
+    westcoast:
+      pricing:
+        gpu-app:
+          denom: uakt
+          amount: 100000
+deployment:
+  gpu-app:
+    westcoast:
+      profile: gpu-app
+      count: 1
diff --git a/ChatGLM-6B/web_demo.py b/ChatGLM-6B/web_demo.py
@@ -0,0 +1,102 @@
+from transformers import AutoModel, AutoTokenizer
+import gradio as gr
+import mdtex2html
+
+tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
+model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).quantize(8).half().cuda()
+#model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).float()
+model = model.eval()
+
+"""Override Chatbot.postprocessafdadfafasd"""
+
+
+def postprocess(self, y):
+    if y is None:
+        return []
+    for i, (message, response) in enumerate(y):
+        y[i] = (
+            None if message is None else mdtex2html.convert((message)),
+            None if response is None else mdtex2html.convert(response),
+        )
+    return y
+
+
+gr.Chatbot.postprocess = postprocess
+
+
+def parse_text(text):
+    """copy from https://github.com/GaiZhenbiao/ChuanhuChatGPT/"""
+    lines = text.split("\n")
+    lines = [line for line in lines if line != ""]
+    count = 0
+    for i, line in enumerate(lines):
+        if "```" in line:
+            count += 1
+            items = line.split('`')
+            if count % 2 == 1:
+                lines[i] = f'<pre><code class="language-{items[-1]}">'
+            else:
+                lines[i] = f'<br></code></pre>'
+        else:
+            if i > 0:
+                if count % 2 == 1:
+                    line = line.replace("`", "\`")
+                    line = line.replace("<", "&lt;")
+                    line = line.replace(">", "&gt;")
+                    line = line.replace(" ", "&nbsp;")
+                    line = line.replace("*", "&ast;")
+                    line = line.replace("_", "&lowbar;")
+                    line = line.replace("-", "&#45;")
+                    line = line.replace(".", "&#46;")
+                    line = line.replace("!", "&#33;")
+                    line = line.replace("(", "&#40;")
+                    line = line.replace(")", "&#41;")
+                    line = line.replace("$", "&#36;")
+                lines[i] = "<br>"+line
+    text = "".join(lines)
+    return text
+
+
+def predict(input, chatbot, max_length, top_p, temperature, history):
+    chatbot.append((parse_text(input), ""))
+    for response, history in model.stream_chat(tokenizer, input, history, max_length=max_length, top_p=top_p,
+                                               temperature=temperature):
+        chatbot[-1] = (parse_text(input), parse_text(response))       
+
+        yield chatbot, history
+
+
+def reset_user_input():
+    return gr.update(value='')
+
+
+def reset_state():
+    return [], []
+
+
+with gr.Blocks() as demo:
+    gr.HTML("""<h1 align="center">Hi, buddy</h1>""")
+
+    chatbot = gr.Chatbot()
+    with gr.Row():
+        with gr.Column(scale=4):
+            with gr.Column(scale=12):
+                user_input = gr.Textbox(show_label=False, placeholder="Input...", lines=10).style(
+                    container=False)
+            with gr.Column(min_width=32, scale=1):
+                submitBtn = gr.Button("Submit", variant="primary")
+        with gr.Column(scale=1):
+            emptyBtn = gr.Button("Clear History")
+            max_length = gr.Slider(0, 4096, value=2048, step=1.0, label="Maximum length", interactive=True)
+            top_p = gr.Slider(0, 1, value=0.7, step=0.01, label="Top P", interactive=True)
+            temperature = gr.Slider(0, 1, value=0.95, step=0.01, label="Temperature", interactive=True)
+
+    history = gr.State([])
+
+    submitBtn.click(predict, [user_input, chatbot, max_length, top_p, temperature, history], [chatbot, history],
+                    show_progress=True)
+    submitBtn.click(reset_user_input, [], [user_input])
+
+    emptyBtn.click(reset_state, outputs=[chatbot, history], show_progress=True)
+
+demo.queue().launch(server_name='0.0.0.0',share=False, inbrowser=True)
diff --git a/README.md b/README.md
@@ -21,6 +21,7 @@ Also, follow [@akashnet\_](https://twitter.com/akashnet_) to stay in the loop wi
 - [BabyAGI](babyagi)
 - [BabyAGI-UI](babyagi-ui)
 - [ChatChat](chatchat)
+- [ChatGLM-6B](ChatGLM-6B)
 - [ChatGPT Self-Hosted Chat](ai-chat-app)
 - [Daila](daila)
 - [Dolly-v2-12b](dolly-v2-12b)