vikhyat · nie3e · Jan 30, 2024 · Jan 30, 2024 · Jan 30, 2024
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,3 @@
 .venv
-__pycache__
+__pycache__
+.idea/
diff --git a/gradio_demo.py b/gradio_demo.py
@@ -17,12 +17,15 @@
 text_model = TextModel(model_path).to(device=device, dtype=dtype)
 
 
-def moondream(img, prompt):
+def moondream(img, prompt, max_tokens):
     image_embeds = vision_encoder(img)
     streamer = TextIteratorStreamer(text_model.tokenizer, skip_special_tokens=True)
     thread = Thread(
         target=text_model.answer_question,
-        kwargs={"image_embeds": image_embeds, "question": prompt, "streamer": streamer},
+        kwargs={
+            "image_embeds": image_embeds, "question": prompt,
+            "streamer": streamer, "max_new_tokens": max_tokens
+        },
     )
     thread.start()
 
@@ -41,12 +44,15 @@ def moondream(img, prompt):
         """
     )
     with gr.Row():
-        prompt = gr.Textbox(label="Input Prompt", placeholder="Type here...", scale=4)
+        with gr.Column(scale=4):
+            prompt = gr.Textbox(label="Input Prompt", placeholder="Type here...")
+            max_tokens = gr.Slider(label="Max tokens", minimum=128,
+                                   maximum=2048, value=128)
         submit = gr.Button("Submit")
     with gr.Row():
         img = gr.Image(type="pil", label="Upload an Image")
         output = gr.TextArea(label="Response", info="Please wait for a few seconds..")
-    submit.click(moondream, [img, prompt], output)
-    prompt.submit(moondream, [img, prompt], output)
+    submit.click(moondream, [img, prompt, max_tokens], output)
+    prompt.submit(moondream, [img, prompt, max_tokens], output)
 
 demo.queue().launch(debug=True)
diff --git a/moondream/text_model.py b/moondream/text_model.py
@@ -78,14 +78,15 @@ def generate(
         return self.tokenizer.batch_decode(output_ids, skip_special_tokens=True)
 
     def answer_question(
-        self, image_embeds, question, chat_history="", result_queue=None, **kwargs
+        self, image_embeds, question, chat_history="", result_queue=None,
+        max_new_tokens=128, **kwargs
     ):
         prompt = f"<image>\n\n{chat_history}Question: {question}\n\nAnswer:"
         answer = self.generate(
             image_embeds,
             prompt,
             eos_text="<END>",
-            max_new_tokens=128,
+            max_new_tokens=max_new_tokens,
             **kwargs,
         )[0]
         cleaned_answer = re.sub("<$", "", re.sub("END$", "", answer)).strip()