diff --git a/berkeley-function-call-leaderboard/CHANGELOG.md b/berkeley-function-call-leaderboard/CHANGELOG.md index b31e6a011..122b2be33 100644 --- a/berkeley-function-call-leaderboard/CHANGELOG.md +++ b/berkeley-function-call-leaderboard/CHANGELOG.md @@ -2,6 +2,11 @@ All notable changes to the Berkeley Function Calling Leaderboard will be documented in this file. +- [Dec 13, 2024] [#832](https://github.com/ShishirPatil/gorilla/pull/832): Add the following new models to the leaderboard: + - `MadeAgents/Hammer2.1-7b` + - `MadeAgents/Hammer2.1-3b` + - `MadeAgents/Hammer2.1-1.5b` + - `MadeAgents/Hammer2.1-0.5b` - [Dec 9, 2024] [#822](https://github.com/ShishirPatil/gorilla/pull/822): Add the following new models to the leaderboard: - `gpt-4o-2024-11-20` - `gpt-4o-2024-11-20-FC` diff --git a/berkeley-function-call-leaderboard/SUPPORTED_MODELS.md b/berkeley-function-call-leaderboard/SUPPORTED_MODELS.md index 6e62d1a76..3b5f350eb 100644 --- a/berkeley-function-call-leaderboard/SUPPORTED_MODELS.md +++ b/berkeley-function-call-leaderboard/SUPPORTED_MODELS.md @@ -79,7 +79,7 @@ Below is a comprehensive table of models supported for running leaderboard evalu |NousResearch/Hermes-2-Pro-Llama-3-{8B,70B} 💻| Function Calling| |NousResearch/Hermes-2-Pro-Mistral-7B 💻| Function Calling| |NousResearch/Hermes-2-Theta-Llama-3-{8B,70B} 💻| Function Calling| -|MadeAgents/Hammer2.0-{7b,3b,1.5b,0.5b} 💻| Function Calling| +|MadeAgents/Hammer2.1-{7b,3b,1.5b,0.5b} 💻| Function Calling| |openbmb/MiniCPM3-4B-FC 💻| Function Calling| |openbmb/MiniCPM3-4B 💻| Prompt| |THUDM/glm-4-9b-chat 💻| Function Calling| diff --git a/berkeley-function-call-leaderboard/bfcl/eval_checker/model_metadata.py b/berkeley-function-call-leaderboard/bfcl/eval_checker/model_metadata.py index 9763ad12a..5e59870f7 100644 --- a/berkeley-function-call-leaderboard/bfcl/eval_checker/model_metadata.py +++ b/berkeley-function-call-leaderboard/bfcl/eval_checker/model_metadata.py @@ -601,27 +601,27 @@ "Salesforce", "cc-by-nc-4.0", ], - "MadeAgents/Hammer2.0-7b": [ - "Hammer2.0-7b (FC)", - "https://huggingface.co/MadeAgents/Hammer2.0-7b", + "MadeAgents/Hammer2.1-7b": [ + "Hammer2.1-7b (FC)", + "https://huggingface.co/MadeAgents/Hammer2.1-7b", "MadeAgents", "cc-by-nc-4.0", ], - "MadeAgents/Hammer2.0-3b": [ - "Hammer2.0-3b (FC)", - "https://huggingface.co/MadeAgents/Hammer2.0-3b", + "MadeAgents/Hammer2.1-3b": [ + "Hammer2.1-3b (FC)", + "https://huggingface.co/MadeAgents/Hammer2.1-3b", "MadeAgents", - "cc-by-nc-4.0", + "qwen-research", ], - "MadeAgents/Hammer2.0-1.5b": [ - "Hammer2.0-1.5b (FC)", - "https://huggingface.co/MadeAgents/Hammer2.0-1.5b", + "MadeAgents/Hammer2.1-1.5b": [ + "Hammer2.1-1.5b (FC)", + "https://huggingface.co/MadeAgents/Hammer2.1-1.5b", "MadeAgents", "cc-by-nc-4.0", ], - "MadeAgents/Hammer2.0-0.5b": [ - "Hammer2.0-0.5b (FC)", - "https://huggingface.co/MadeAgents/Hammer2.0-0.5b", + "MadeAgents/Hammer2.1-0.5b": [ + "Hammer2.1-0.5b (FC)", + "https://huggingface.co/MadeAgents/Hammer2.1-0.5b", "MadeAgents", "cc-by-nc-4.0", ], diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py b/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py index 499a49e33..422323c0d 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py @@ -123,10 +123,10 @@ "NousResearch/Hermes-2-Pro-Llama-3-70B": HermesHandler, "NousResearch/Hermes-2-Theta-Llama-3-70B": HermesHandler, "ibm-granite/granite-20b-functioncalling": GraniteHandler, - "MadeAgents/Hammer2.0-7b": HammerHandler, - "MadeAgents/Hammer2.0-3b": HammerHandler, - "MadeAgents/Hammer2.0-1.5b": HammerHandler, - "MadeAgents/Hammer2.0-0.5b": HammerHandler, + "MadeAgents/Hammer2.1-7b": HammerHandler, + "MadeAgents/Hammer2.1-3b": HammerHandler, + "MadeAgents/Hammer2.1-1.5b": HammerHandler, + "MadeAgents/Hammer2.1-0.5b": HammerHandler, "THUDM/glm-4-9b-chat": GLMHandler, "Qwen/Qwen2-1.5B-Instruct": QwenHandler, "Qwen/Qwen2-7B-Instruct": QwenHandler, diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/hammer.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/hammer.py index 01abca7e7..87a268239 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/hammer.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/hammer.py @@ -11,8 +11,11 @@ 1. Make one or more function/tool calls to meet the request based on the question. 2. If none of the function can be used, point it out and refuse to answer. 3. If the given question lacks the parameters required by the function, also point it out. -""" +The following are characters that may interact with you +1. user: Provides query or additional information. +2. tool: Returns the results of the tool calling. +""" FORMAT_INSTRUCTION = """ The output MUST strictly adhere to the following JSON format, and NO other text MUST be included. The example format is as follows. Please make sure the parameter type is correct. If no function call is needed, please directly output an empty list '[]' @@ -32,7 +35,7 @@ def __init__(self, model_name, temperature) -> None: @override def _format_prompt(self, messages, function): """ - "chat_template": "{% set system_message = 'You are a helpful assistant.' %}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ '<|im_start|>system\n' + system_message + '<|im_end|>\n' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\n' + content + '<|im_end|>\n<|im_start|>assistant\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>' + '\n' }}{% endif %}{% endfor %}", + "chat_template": "{%- set system_message = 'You are a helpful assistant.' %}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] %}\n {%- if messages[1]['role'] == 'system' %}\n {%- set format_message = messages[1]['content'] %}\n {%- set loop_messages = messages[2:] %}\n {%- else %}\n {%- set loop_messages = messages[1:] %}\n {%- endif %}\n{%- else %}\n {%- set loop_messages = messages %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n{%- if system_message is defined %}\n{{- '<|im_start|>system\n' + system_message + '<|im_end|>\n' }}\n{%- endif %}\n\n\n{%- if tools is not none %}\n{% set task_instruction %}You are a tool calling assistant. In order to complete the user's request, you need to select one or more appropriate tools from the following tools and fill in the correct values for the tool parameters. Your specific tasks are:\n1. Make one or more function/tool calls to meet the request based on the question.\n2. If none of the function can be used, point it out and refuse to answer.\n3. If the given question lacks the parameters required by the function, also point it out.\n\nThe following are characters that may interact with you\n1. user: Provides query or additional information.\n2. tool: Returns the results of the tool calling.\n{% endset %}\n\n{% set format_instruction %}\nThe output MUST strictly adhere to the following JSON format, and NO other text MUST be included.\nThe example format is as follows. Please make sure the parameter type is correct. If no function call is needed, please directly output an empty list '[]'\n```\n[\n {\"name\": \"func_name1\", \"arguments\": {\"argument1\": \"value1\", \"argument2\": \"value2\"}},\n ... (more tool calls as required)\n]\n```\n{% endset %}\n{{- '<|im_start|>user\n[BEGIN OF TASK INSTRUCTION]\n' + task_instruction + '\n[END OF TASK INSTRUCTION]\n\n'}}\n {{- '[BEGIN OF AVAILABLE_TOOLS]\n' }}\n {{- tools|string }}\n {{- '\n[END OF AVAILABLE_TOOLS]\n\n' }}\n {{- '\n[BEGIN OF TASK INSTRUCTION]\n' + format_instruction + '\n[END OF TASK INSTRUCTION]\n\n<|im_end|>\n' }}\n{%- endif %}\n\n{%- for message in loop_messages %}\n {%- set role = message['role'] %}\n {%- set content = message['content'] %}\n {{- '<|im_start|>'+ role +'\n' + content + '<|im_end|>\n'}}\n{%- endfor %}\n{{- '<|im_start|>assistant\n' }}", """ def convert_to_format_tool(tools): @@ -73,9 +76,7 @@ def convert_to_format_tool(tools): user_query = "" for message in messages: - user_query += f"{message['role']}: {message['content']}\n" - if messages[-1]["role"] != "user": - user_query += "user: \n" + user_query += f"<|im_start|>{message['role']}\n{message['content']}<|im_end|>\n" content = f"[BEGIN OF TASK INSTRUCTION]\n{TASK_INSTRUCTION}\n[END OF TASK INSTRUCTION]\n\n" content += ( @@ -84,9 +85,7 @@ def convert_to_format_tool(tools): + "\n[END OF AVAILABLE TOOLS]\n\n" ) content += f"[BEGIN OF FORMAT INSTRUCTION]\n{FORMAT_INSTRUCTION}\n[END OF FORMAT INSTRUCTION]\n\n" - content += f"[BEGIN OF QUERY]\n{user_query}\n[END OF QUERY]\n\n" - - return f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{content}<|im_end|>\n<|im_start|>assistant\n" + return f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{content}<|im_end|>\n{user_query}<|im_start|>assistant\n" @override def decode_ast(self, result, language="Python"):