forked from akash-network/awesome-akash
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Deploy ChatGLM-6B on Akash network by Fogmeta Labs (akash-network#403)
- Loading branch information
1 parent
36123d2
commit b395a98
Showing
5 changed files
with
174 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
##thudm | ||
FROM library/ubuntu:20.04 | ||
|
||
WORKDIR / | ||
|
||
RUN apt update && apt install git python3 python3-pip -y && git clone https://github.com/THUDM/ChatGLM-6B.git && rm ChatGLM-6B/web_demo.py && pip3 install -r ChatGLM-6B/requirements.txt && rm -rf ChatGLM-6B/web_demo.py | ||
|
||
WORKDIR ChatGLM-6B | ||
|
||
ADD web_demo.py . | ||
|
||
EXPOSE 7860 | ||
|
||
ENTRYPOINT ["python3", "web_demo.py"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# ChatGLM-6B on Akash Network | ||
|
||
the original code repo is [here](https://github.com/THUDM/ChatGLM-6B) | ||
|
||
## Introduction | ||
|
||
ChatGLM-6B is an open bilingual language model based on [General Language Model (GLM)](https://github.com/THUDM/GLM) framework, with 6.2 billion parameters. With the quantization technique, users can deploy locally on consumer-grade graphics cards (only 6GB of GPU memory is required at the INT4 quantization level). | ||
|
||
ChatGLM-6B uses technology similar to ChatGPT, optimized for Chinese QA and dialogue. The model is trained for about 1T tokens of Chinese and English corpus, supplemented by supervised fine-tuning, feedback bootstrap, and reinforcement learning with human feedback. With only about 6.2 billion parameters, the model is able to generate answers that are in line with human preference. | ||
|
||
In order to facilitate downstream developers to customize the model for their own application scenarios, we also implements an parameter-efficient tuning method based on [P-Tuning v2](https://github.com/THUDM/P-tuning-v2)[(Guidelines)](ptuning/README_en.md). Tuning requires at least 7GB of GPU memory at INT4 quantization level. | ||
|
||
Try the [online demo](https://huggingface.co/spaces/ysharma/ChatGLM-6b_Gradio_Streaming) on Huggingface Spaces. | ||
|
||
## Web UI | ||
 | ||
|
||
|
||
## Demo Video | ||
https://github.com/Normalnoise/awesome-akash/assets/102578774/f4079b2d-a813-4ca6-b8ce-2a4523510fc4 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
version: '2.0' | ||
services: | ||
gpu-app: | ||
image: sxk1633/gpu-chat-thudm:6b | ||
expose: | ||
- port: 7860 | ||
as: 80 | ||
to: | ||
- global: true | ||
profiles: | ||
compute: | ||
gpu-app: | ||
resources: | ||
cpu: | ||
units: 1 | ||
memory: | ||
size: 20Gi | ||
gpu: | ||
units: 1 | ||
attributes: | ||
vendor: | ||
nvidia: | ||
- model: 3080 | ||
storage: | ||
- size: 30Gi | ||
placement: | ||
westcoast: | ||
pricing: | ||
gpu-app: | ||
denom: uakt | ||
amount: 100000 | ||
deployment: | ||
gpu-app: | ||
westcoast: | ||
profile: gpu-app | ||
count: 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
from transformers import AutoModel, AutoTokenizer | ||
import gradio as gr | ||
import mdtex2html | ||
|
||
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) | ||
model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).quantize(8).half().cuda() | ||
#model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).float() | ||
model = model.eval() | ||
|
||
"""Override Chatbot.postprocessafdadfafasd""" | ||
|
||
|
||
def postprocess(self, y): | ||
if y is None: | ||
return [] | ||
for i, (message, response) in enumerate(y): | ||
y[i] = ( | ||
None if message is None else mdtex2html.convert((message)), | ||
None if response is None else mdtex2html.convert(response), | ||
) | ||
return y | ||
|
||
|
||
gr.Chatbot.postprocess = postprocess | ||
|
||
|
||
def parse_text(text): | ||
"""copy from https://github.com/GaiZhenbiao/ChuanhuChatGPT/""" | ||
lines = text.split("\n") | ||
lines = [line for line in lines if line != ""] | ||
count = 0 | ||
for i, line in enumerate(lines): | ||
if "```" in line: | ||
count += 1 | ||
items = line.split('`') | ||
if count % 2 == 1: | ||
lines[i] = f'<pre><code class="language-{items[-1]}">' | ||
else: | ||
lines[i] = f'<br></code></pre>' | ||
else: | ||
if i > 0: | ||
if count % 2 == 1: | ||
line = line.replace("`", "\`") | ||
line = line.replace("<", "<") | ||
line = line.replace(">", ">") | ||
line = line.replace(" ", " ") | ||
line = line.replace("*", "*") | ||
line = line.replace("_", "_") | ||
line = line.replace("-", "-") | ||
line = line.replace(".", ".") | ||
line = line.replace("!", "!") | ||
line = line.replace("(", "(") | ||
line = line.replace(")", ")") | ||
line = line.replace("$", "$") | ||
lines[i] = "<br>"+line | ||
text = "".join(lines) | ||
return text | ||
|
||
|
||
def predict(input, chatbot, max_length, top_p, temperature, history): | ||
chatbot.append((parse_text(input), "")) | ||
for response, history in model.stream_chat(tokenizer, input, history, max_length=max_length, top_p=top_p, | ||
temperature=temperature): | ||
chatbot[-1] = (parse_text(input), parse_text(response)) | ||
|
||
yield chatbot, history | ||
|
||
|
||
def reset_user_input(): | ||
return gr.update(value='') | ||
|
||
|
||
def reset_state(): | ||
return [], [] | ||
|
||
|
||
with gr.Blocks() as demo: | ||
gr.HTML("""<h1 align="center">Hi, buddy</h1>""") | ||
|
||
chatbot = gr.Chatbot() | ||
with gr.Row(): | ||
with gr.Column(scale=4): | ||
with gr.Column(scale=12): | ||
user_input = gr.Textbox(show_label=False, placeholder="Input...", lines=10).style( | ||
container=False) | ||
with gr.Column(min_width=32, scale=1): | ||
submitBtn = gr.Button("Submit", variant="primary") | ||
with gr.Column(scale=1): | ||
emptyBtn = gr.Button("Clear History") | ||
max_length = gr.Slider(0, 4096, value=2048, step=1.0, label="Maximum length", interactive=True) | ||
top_p = gr.Slider(0, 1, value=0.7, step=0.01, label="Top P", interactive=True) | ||
temperature = gr.Slider(0, 1, value=0.95, step=0.01, label="Temperature", interactive=True) | ||
|
||
history = gr.State([]) | ||
|
||
submitBtn.click(predict, [user_input, chatbot, max_length, top_p, temperature, history], [chatbot, history], | ||
show_progress=True) | ||
submitBtn.click(reset_user_input, [], [user_input]) | ||
|
||
emptyBtn.click(reset_state, outputs=[chatbot, history], show_progress=True) | ||
|
||
demo.queue().launch(server_name='0.0.0.0',share=False, inbrowser=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters