Add support for returning multiple messages from gr.ChatInterface c…

…hat function (#10197) * multiple messages * filepath * add changeset * changes * changes * changes * changes * changes * changes * changes * changes * add test * add changeset * changes * add a lot more tests * changes * chat * change * changes * chat * changes * change demo * remove test * changes * format * fix --------- Co-authored-by: gradio-pr-bot <[email protected]>
gradio-app · Dec 17, 2024 · a95f8ef · a95f8ef
1 parent 2700d18
commit a95f8ef
Show file tree

Hide file tree

Showing 8 changed files with 121 additions and 109 deletions.
diff --git a/.changeset/famous-shoes-lose.md b/.changeset/famous-shoes-lose.md
@@ -0,0 +1,5 @@
+---
+"gradio": minor
+---
+
+feat:Add support for returning multiple messages from `gr.ChatInterface` chat function
diff --git a/demo/chatinterface_echo_multimodal/run.ipynb b/demo/chatinterface_echo_multimodal/run.ipynb
@@ -0,0 +1 @@
+{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: chatinterface_echo_multimodal"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "\n", "def echo_multimodal(message, history):\n", "    response = []\n", "    response.append(\"You wrote: '\" + message[\"text\"] + \"' and uploaded:\")\n", "    if message.get(\"files\"):\n", "        for file in message[\"files\"]:\n", "            response.append(gr.File(value=file))\n", "    return response\n", "\n", "demo = gr.ChatInterface(\n", "    echo_multimodal,\n", "    type=\"messages\",\n", "    multimodal=True,\n", "    textbox=gr.MultimodalTextbox(file_count=\"multiple\"),\n", ")\n", "\n", "if __name__ == \"__main__\":\n", "    demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
diff --git a/demo/chatinterface_echo_multimodal/run.py b/demo/chatinterface_echo_multimodal/run.py
@@ -0,0 +1,19 @@
+import gradio as gr
+
+def echo_multimodal(message, history):
+    response = []
+    response.append("You wrote: '" + message["text"] + "' and uploaded:")
+    if message.get("files"):
+        for file in message["files"]:
+            response.append(gr.File(value=file))
+    return response
+
+demo = gr.ChatInterface(
+    echo_multimodal,
+    type="messages",
+    multimodal=True,
+    textbox=gr.MultimodalTextbox(file_count="multiple"),
+)
+
+if __name__ == "__main__":
+    demo.launch()
diff --git a/demo/chatinterface_options/run.ipynb b/demo/chatinterface_options/run.ipynb
@@ -1 +1 @@
-{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: chatinterface_options"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "\n", "example_code = \"\"\"\n", "Here's the code I generated:\n", "\n", "```python\n", "def greet(x):\n", "    return f\"Hello, {x}!\"\n", "```\n", "\n", "Is this correct?\n", "\"\"\"\n", "\n", "def chat(message, history):\n", "    if message == \"Yes, that's correct.\":\n", "        return \"Great!\"\n", "    else:\n", "        return {\n", "            \"role\": \"assistant\",\n", "            \"content\": example_code,\n", "            \"options\": [\n", "                {\"value\": \"Yes, that's correct.\", \"label\": \"Yes\"},\n", "                {\"value\": \"No\"}\n", "                ]\n", "            }\n", "\n", "demo = gr.ChatInterface(\n", "    chat,\n", "    type=\"messages\",\n", "    examples=[\"Write a Python function that takes a string and returns a greeting.\"]\n", ")\n", "\n", "if __name__ == \"__main__\":\n", "    demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
+{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: chatinterface_options"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import random\n", "\n", "example_code = \"\"\"\n", "Here's an example Python lambda function:\n", "\n", "lambda x: x + {}\n", "\n", "Is this correct?\n", "\"\"\"\n", "\n", "def chat(message, history):\n", "    if message == \"Yes, that's correct.\":\n", "        return \"Great!\"\n", "    else:\n", "        return {\n", "            \"role\": \"assistant\",\n", "            \"content\": example_code.format(random.randint(1, 100)),\n", "            \"options\": [\n", "                {\"value\": \"Yes, that's correct.\", \"label\": \"Yes\"},\n", "                {\"value\": \"No\"}\n", "                ]\n", "            }\n", "\n", "demo = gr.ChatInterface(\n", "    chat,\n", "    type=\"messages\",\n", "    examples=[\"Write an example Python lambda function.\"]\n", ")\n", "\n", "if __name__ == \"__main__\":\n", "    demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
diff --git a/demo/chatinterface_options/run.py b/demo/chatinterface_options/run.py
@@ -1,12 +1,10 @@
 import gradio as gr
+import random
 
 example_code = """
-Here's the code I generated:
+Here's an example Python lambda function:
 
-```python
-def greet(x):
-    return f"Hello, {x}!"
-```
+lambda x: x + {}
 
 Is this correct?
 """
@@ -17,7 +15,7 @@ def chat(message, history):
     else:
         return {
             "role": "assistant",
-            "content": example_code,
+            "content": example_code.format(random.randint(1, 100)),
             "options": [
                 {"value": "Yes, that's correct.", "label": "Yes"},
                 {"value": "No"}
@@ -27,7 +25,7 @@ def chat(message, history):
 demo = gr.ChatInterface(
     chat,
     type="messages",
-    examples=["Write a Python function that takes a string and returns a greeting."]
+    examples=["Write an example Python lambda function."]
 )
 
 if __name__ == "__main__":

diff --git a/gradio/chat_interface.py b/gradio/chat_interface.py
@@ -103,7 +103,7 @@ def __init__(
     ):
         """
         Parameters:
-            fn: the function to wrap the chat interface around. In the default case (assuming `type` is set to "messages"), the function should accept two parameters: a `str` input message and `list` of openai-style dictionary {"role": "user" | "assistant", "content": `str` | {"path": `str`} | `gr.Component`} representing the chat history, and return/yield a `str` (if a simple message) or `dict` (for a complete openai-style message) response.
+            fn: the function to wrap the chat interface around. In the default case (assuming `type` is set to "messages"), the function should accept two parameters: a `str` input message and `list` of openai-style dictionary {"role": "user" | "assistant", "content": `str` | {"path": `str`} | `gr.Component`} representing the chat history, and the function should return/yield a `str` (if a simple message), a supported Gradio component (to return a file), a `dict` (for a complete openai-style message response), or a `list` of such messages.
             multimodal: if True, the chat interface will use a `gr.MultimodalTextbox` component for the input, which allows for the uploading of multimedia files. If False, the chat interface will use a gr.Textbox component for the input. If this is True, the first argument of `fn` should accept not a `str` message but a `dict` message with keys "text" and "files"
             type: The format of the messages passed into the chat history parameter of `fn`. If "messages", passes the history as a list of dictionaries with openai-style "role" and "content" keys. The "content" key's value should be one of the following - (1) strings in valid Markdown (2) a dictionary with a "path" key and value corresponding to the file to display or (3) an instance of a Gradio component: at the moment gr.Image, gr.Plot, gr.Video, gr.Gallery, gr.Audio, and gr.HTML are supported. The "role" key should be one of 'user' or 'assistant'. Any other roles will not be displayed in the output. If this parameter is 'tuples' (deprecated), passes the chat history as a `list[list[str | None | tuple]]`, i.e. a list of lists. The inner list should have 2 elements: the user message and the response message.
             chatbot: an instance of the gr.Chatbot component to use for the chat interface, if you would like to customize the chatbot properties. If not provided, a default gr.Chatbot component will be created.
@@ -564,43 +564,52 @@ def _tuples_to_messages(history_tuples: TupleFormat) -> list[MessageDict]:
 
     def _append_message_to_history(
         self,
-        message: MultimodalPostprocess | str | MessageDict,
+        message: MessageDict | Message | str | Component | MultimodalPostprocess | list,
         history: list[MessageDict] | TupleFormat,
         role: Literal["user", "assistant"] = "user",
     ) -> list[MessageDict] | TupleFormat:
-        if isinstance(message, str):
-            message = {"text": message}
+        message_dicts = self._message_as_message_dict(message, role)
         if self.type == "tuples":
             history = self._tuples_to_messages(history)  # type: ignore
         else:
             history = copy.deepcopy(history)
-
-        if "content" in message:  # in MessageDict format already
-            history.append(message)  # type: ignore
-        else:  # in MultimodalPostprocess format
-            for x in message.get("files", []):
-                if isinstance(x, dict):
-                    x = x.get("path")
-                history.append({"role": role, "content": (x,)})  # type: ignore
-            if message["text"] is None or not isinstance(message["text"], str):
-                pass
-            else:
-                history.append({"role": role, "content": message["text"]})  # type: ignore
-
+        history.extend(message_dicts)  # type: ignore
         if self.type == "tuples":
             history = self._messages_to_tuples(history)  # type: ignore
         return history
 
-    def response_as_dict(
-        self, response: MessageDict | Message | str | Component
-    ) -> MessageDict:
-        if isinstance(response, Message):
-            new_response = response.model_dump()
-        elif isinstance(response, (str, Component)):
-            return {"role": "assistant", "content": response}
-        else:
-            new_response = response
-        return cast(MessageDict, new_response)
+    def _message_as_message_dict(
+        self,
+        message: MessageDict | Message | str | Component | MultimodalPostprocess | list,
+        role: Literal["user", "assistant"],
+    ) -> list[MessageDict]:
+        """
+        Converts a user message, example message, or response from the chat function to a
+        list of MessageDict objects that can be appended to the chat history.
+        """
+        message_dicts = []
+        if not isinstance(message, list):
+            message = [message]
+        for msg in message:
+            if isinstance(msg, Message):
+                message_dicts.append(msg.model_dump())
+            elif isinstance(msg, (str, Component)):
+                message_dicts.append({"role": role, "content": msg})
+            elif (
+                isinstance(msg, dict) and "content" in msg
+            ):  # in MessageDict format already
+                msg["role"] = role
+                message_dicts.append(msg)
+            else:  # in MultimodalPostprocess format
+                for x in msg.get("files", []):
+                    if isinstance(x, dict):
+                        x = x.get("path")
+                    message_dicts.append({"role": role, "content": (x,)})
+                if msg["text"] is None or not isinstance(msg["text"], str):
+                    pass
+                else:
+                    message_dicts.append({"role": role, "content": msg["text"]})
+        return message_dicts
 
     async def _submit_fn(
         self,
@@ -618,13 +627,12 @@ async def _submit_fn(
             response = await anyio.to_thread.run_sync(
                 self.fn, *inputs, limiter=self.limiter
             )
-        if isinstance(response, tuple):
+        if self.additional_outputs:
             response, *additional_outputs = response
         else:
             additional_outputs = None
         history = self._append_message_to_history(message, history, "user")
-        response_ = self.response_as_dict(response)
-        history = self._append_message_to_history(response_, history, "assistant")  # type: ignore
+        history = self._append_message_to_history(response, history, "assistant")
         if additional_outputs:
             return response, history, *additional_outputs
         return response, history
@@ -654,7 +662,7 @@ async def _stream_fn(
         additional_outputs = None
         try:
             first_response = await utils.async_iteration(generator)
-            if isinstance(first_response, tuple):
+            if self.additional_outputs:
                 first_response, *additional_outputs = first_response
             history_ = self._append_message_to_history(
                 first_response, history, "assistant"
@@ -666,7 +674,7 @@ async def _stream_fn(
         except StopIteration:
             yield None, history
         async for response in generator:
-            if isinstance(response, tuple):
+            if self.additional_outputs:
                 response, *additional_outputs = response
             history_ = self._append_message_to_history(response, history, "assistant")
             if not additional_outputs:
@@ -784,7 +792,7 @@ def _pop_last_user_message(
         history: list[MessageDict] | TupleFormat,
     ) -> tuple[list[MessageDict] | TupleFormat, str | MultimodalPostprocess]:
         """
-        Removes the last user message from the chat history and returns it.
+        Removes the message (or set of messages) that the user last sent from the chat history and returns them.
         If self.multimodal is True, returns a MultimodalPostprocess (dict) object with text and files.
         If self.multimodal is False, returns just the message text as a string.
         """
@@ -793,8 +801,9 @@ def _pop_last_user_message(
 
         if self.type == "tuples":
             history = self._tuples_to_messages(history)  # type: ignore
-        # Skip the last message as it's always an assistant message
-        i = len(history) - 2
+        i = len(history) - 1
+        while i >= 0 and history[i]["role"] == "assistant":  # type: ignore
+            i -= 1
         while i >= 0 and history[i]["role"] == "user":  # type: ignore
             i -= 1
         last_messages = history[i + 1 :]

diff --git a/guides/05_chatbots/01_creating-a-chatbot-fast.md b/guides/05_chatbots/01_creating-a-chatbot-fast.md
@@ -272,7 +272,7 @@ $code_chatinterface_artifacts
 
 We mentioned earlier that in the simplest case, your chat function should return a `str` response, which will be rendered as text in the chatbot. However, you can also return more complex responses as we discuss below:
 
-**Returning Gradio components**
+**Returning files or Gradio components**
 
 Currently, the following Gradio components can be displayed inside the chat interface:
 * `gr.Image`
@@ -281,8 +281,9 @@ Currently, the following Gradio components can be displayed inside the chat inte
 * `gr.HTML`
 * `gr.Video`
 * `gr.Gallery`
+* `gr.File`
 
-Simply return one of these components from your function to use it with `gr.ChatInterface`. Here's an example:
+Simply return one of these components from your function to use it with `gr.ChatInterface`. Here's an example that returns an audio file:
 
 ```py
 import gradio as gr
@@ -300,80 +301,24 @@ gr.ChatInterface(
 ).launch()
 ```
 
-
-**Returning image, audio, video, or other files**:
-
-Sometimes, you don't want to return a complete Gradio component, but rather simply an image/audio/video/other file to be displayed inside the chatbot. You can do this by returning a complete openai-style dictionary from your chat function. The dictionary should consist of the following keys:
-
-* `role`: set to `"assistant"`
-* `content`: set to a dictionary with key `path` and value the filepath or URL you'd like to return
-
-Here is an example:
-
-```py
-import gradio as gr
-
-def fake(message, history):
-    if message.strip():
-        return {
-            "role": "assistant", 
-            "content": {
-                "path": "https://github.com/gradio-app/gradio/raw/main/test/test_files/audio_sample.wav"
-                }
-            }
-    else:
-        return "Please provide the name of an artist"
-
-gr.ChatInterface(
-    fake,
-    type="messages",
-    textbox=gr.Textbox(placeholder="Which artist's music do you want to listen to?", scale=7),
-    chatbot=gr.Chatbot(placeholder="Play music by any artist!"),
-).launch()
-```
-
+Similarly, you could return image files with `gr.Image`, video files with `gr.Video`, or arbitrary files with the `gr.File` component.
 
 **Providing preset responses**
 
-You may want to provide preset responses that a user can choose between when conversing with your chatbot. You can add the `options` key to the dictionary returned from your chat function to set these responses. The value corresponding to the `options` key should be a list of dictionaries, each with a `value` (a string that is the value that should be sent to the chat function when this response is clicked) and an optional `label` (if provided, is the text displayed as the preset response instead of the `value`). 
-
-This example illustrates how to use preset responses:
-
-```python
-import gradio as gr
+You may want to provide preset responses that a user can choose between when conversing with your chatbot. To do this, return a complete openai-style message dictionary from your chat function, and add the `options` key to the dictionary returned from your chat function to set these responses. 
 
-example_code = '''
-Here's the code I generated:
+The value corresponding to the `options` key should be a list of dictionaries, each with a `value` (a string that is the value that should be sent to the chat function when this response is clicked) and an optional `label` (if provided, is the text displayed as the preset response instead of the `value`). 
 
-def greet(x):
-    return f"Hello, {x}!"
+This example illustrates how to use preset responses:
 
-Is this correct?
-'''
+$code_chatinterface_options
 
-def chat(message, history):
-    if message == "Yes, that's correct.":
-        return "Great!"
-    else:
-        return {
-            "role": "assistant",
-            "content": example_code,
-            "options": [
-                {"value": "Yes, that's correct.", "label": "Yes"},
-                {"value": "No"}
-                ]
-            }
+**Returning Multiple Messages**
 
-demo = gr.ChatInterface(
-    chat,
-    type="messages",
-    examples=["Write a Python function that takes a string and returns a greeting."]
-)
+You can return multiple assistant messages from your chat function simply by returning a `list` of messages of any of the above types (you can even mix-and-match). This lets you, for example, send a message along with files, as in the following example:
 
-if __name__ == "__main__":
-    demo.launch()
+$code_chatinterface_echo_multimodal
 
-```
 ## Using Your Chatbot via API
 
 Once you've built your Gradio chat interface and are hosting it on [Hugging Face Spaces](https://hf.space) or somewhere else, then you can query it with a simple API at the `/chat` endpoint. The endpoint just expects the user's message (and potentially additional inputs if you have set any using the `additional_inputs` parameter), and will return the response, internally keeping track of the messages sent so far.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: chatinterface_echo_multimodal"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "\n", "def echo_multimodal(message, history):\n", " response = []\n", " response.append(\"You wrote: '\" + message[\"text\"] + \"' and uploaded:\")\n", " if message.get(\"files\"):\n", " for file in message[\"files\"]:\n", " response.append(gr.File(value=file))\n", " return response\n", "\n", "demo = gr.ChatInterface(\n", " echo_multimodal,\n", " type=\"messages\",\n", " multimodal=True,\n", " textbox=gr.MultimodalTextbox(file_count=\"multiple\"),\n", ")\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: chatinterface_options"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "\n", "example_code = \"\"\"\n", "Here's the code I generated:\n", "\n", "```python\n", "def greet(x):\n", " return f\"Hello, {x}!\"\n", "```\n", "\n", "Is this correct?\n", "\"\"\"\n", "\n", "def chat(message, history):\n", " if message == \"Yes, that's correct.\":\n", " return \"Great!\"\n", " else:\n", " return {\n", " \"role\": \"assistant\",\n", " \"content\": example_code,\n", " \"options\": [\n", " {\"value\": \"Yes, that's correct.\", \"label\": \"Yes\"},\n", " {\"value\": \"No\"}\n", " ]\n", " }\n", "\n", "demo = gr.ChatInterface(\n", " chat,\n", " type=\"messages\",\n", " examples=[\"Write a Python function that takes a string and returns a greeting.\"]\n", ")\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
		{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: chatinterface_options"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import random\n", "\n", "example_code = \"\"\"\n", "Here's an example Python lambda function:\n", "\n", "lambda x: x + {}\n", "\n", "Is this correct?\n", "\"\"\"\n", "\n", "def chat(message, history):\n", " if message == \"Yes, that's correct.\":\n", " return \"Great!\"\n", " else:\n", " return {\n", " \"role\": \"assistant\",\n", " \"content\": example_code.format(random.randint(1, 100)),\n", " \"options\": [\n", " {\"value\": \"Yes, that's correct.\", \"label\": \"Yes\"},\n", " {\"value\": \"No\"}\n", " ]\n", " }\n", "\n", "demo = gr.ChatInterface(\n", " chat,\n", " type=\"messages\",\n", " examples=[\"Write an example Python lambda function.\"]\n", ")\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}