From 2e2e0e5d61921e7772a793aadf93996bd0658f0b Mon Sep 17 00:00:00 2001 From: Alonso Silva Allende Date: Mon, 22 Jul 2024 21:09:51 +0000 Subject: [PATCH 1/3] Add NousResearch/Hermes-2-Pro-Llama-3-8B model --- berkeley-function-call-leaderboard/README.md | 2 +- .../eval_checker/eval_runner_helper.py | 6 ++++++ .../model_handler/constant.py | 1 + .../model_handler/handler_map.py | 1 + .../model_handler/hermes_handler.py | 6 +++--- 5 files changed, 12 insertions(+), 4 deletions(-) diff --git a/berkeley-function-call-leaderboard/README.md b/berkeley-function-call-leaderboard/README.md index 47b02710a..341dd7ea4 100644 --- a/berkeley-function-call-leaderboard/README.md +++ b/berkeley-function-call-leaderboard/README.md @@ -111,7 +111,7 @@ Below is *a table of models we support* to run our leaderboard evaluation agains |mistral-small-2402 | Prompt| |mistral-tiny-2312 | Prompt| |Nexusflow-Raven-v2 | Function Calling| -|NousResearch/Hermes-2-Pro-Mistral-7B 💻| Function Calling| +|NousResearch/Hermes-2-Pro-{Llama-3-8B,Mistral-7B} 💻| Function Calling| |snowflake/arctic | Prompt| |nvidia/nemotron-4-340b-instruct| Prompt| |THUDM/glm-4-9b-chat 💻| Function Calling| diff --git a/berkeley-function-call-leaderboard/eval_checker/eval_runner_helper.py b/berkeley-function-call-leaderboard/eval_checker/eval_runner_helper.py index 283755a4f..a620cf000 100644 --- a/berkeley-function-call-leaderboard/eval_checker/eval_runner_helper.py +++ b/berkeley-function-call-leaderboard/eval_checker/eval_runner_helper.py @@ -319,6 +319,12 @@ "Databricks", "Databricks Open Model", ], + "NousResearch/Hermes-2-Pro-Llama-3-8B": [ + "Hermes-2-Pro-Llama-3-8B (FC)", + "https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B", + "NousResearch", + "apache-2.0", + ], "NousResearch/Hermes-2-Pro-Mistral-7B": [ "Hermes-2-Pro-Mistral-7B (FC)", "https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B", diff --git a/berkeley-function-call-leaderboard/model_handler/constant.py b/berkeley-function-call-leaderboard/model_handler/constant.py index 5ef7571b9..b91a95694 100644 --- a/berkeley-function-call-leaderboard/model_handler/constant.py +++ b/berkeley-function-call-leaderboard/model_handler/constant.py @@ -140,6 +140,7 @@ "meetkai/functionary-medium-v2.2-FC", "meetkai/functionary-small-v2.4-FC", "meetkai/functionary-medium-v2.4-FC", + "NousResearch/Hermes-2-Pro-Llama-3-8B", "NousResearch/Hermes-2-Pro-Mistral-7B", "command-r-plus-FC", "command-r-plus-FC-optimized", diff --git a/berkeley-function-call-leaderboard/model_handler/handler_map.py b/berkeley-function-call-leaderboard/model_handler/handler_map.py index 1ec2eae36..a6b10141a 100644 --- a/berkeley-function-call-leaderboard/model_handler/handler_map.py +++ b/berkeley-function-call-leaderboard/model_handler/handler_map.py @@ -69,6 +69,7 @@ "meetkai/functionary-small-v2.4-FC": FunctionaryHandler, "meetkai/functionary-medium-v2.4-FC": FunctionaryHandler, "databricks-dbrx-instruct": DatabricksHandler, + "NousResearch/Hermes-2-Pro-Llama-3-8B": HermesHandler, "NousResearch/Hermes-2-Pro-Mistral-7B": HermesHandler, "meta-llama/Meta-Llama-3-8B-Instruct": LlamaHandler, "meta-llama/Meta-Llama-3-70B-Instruct": LlamaHandler, diff --git a/berkeley-function-call-leaderboard/model_handler/hermes_handler.py b/berkeley-function-call-leaderboard/model_handler/hermes_handler.py index 312f2864e..65d812ce5 100644 --- a/berkeley-function-call-leaderboard/model_handler/hermes_handler.py +++ b/berkeley-function-call-leaderboard/model_handler/hermes_handler.py @@ -16,8 +16,7 @@ def _format_prompt(prompt, function, test_category): ) pydantic_format = """{"properties": {"arguments": {"title": "Arguments", "type": "object"}, "name": {"title": "Name", "type": "string"}}, "required": ["arguments", "name"], "title": "FunctionCall", "type": "object"}""" tool_call_format = """{"arguments": , "name": }""" - formatted_prompt = """ -<|im_start|>system + formatted_prompt = """<|im_start|>system You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: {function} Use the following pydantic model json schema for each tool call you will make: {pydantic_format} For each function call return a json object with function name and arguments within XML tags as follows: {tool_call_format} @@ -26,7 +25,8 @@ def _format_prompt(prompt, function, test_category): <|im_start|>user {prompt} <|im_end|> - """ +<|im_start|>assistant +""" return formatted_prompt.format( function=function, pydantic_format=pydantic_format, From 82a36a77f28850b9cf1cc927560ab5b958cc285f Mon Sep 17 00:00:00 2001 From: Alonso Silva Allende Date: Tue, 23 Jul 2024 08:40:39 +0000 Subject: [PATCH 2/3] Add NousResearch/Hermes-2-Theta-Llama-3-8B model --- berkeley-function-call-leaderboard/README.md | 2 +- .../eval_checker/eval_runner_helper.py | 6 ++++++ .../model_handler/constant.py | 1 + .../model_handler/handler_map.py | 1 + 4 files changed, 9 insertions(+), 1 deletion(-) diff --git a/berkeley-function-call-leaderboard/README.md b/berkeley-function-call-leaderboard/README.md index 341dd7ea4..dffd7d357 100644 --- a/berkeley-function-call-leaderboard/README.md +++ b/berkeley-function-call-leaderboard/README.md @@ -111,7 +111,7 @@ Below is *a table of models we support* to run our leaderboard evaluation agains |mistral-small-2402 | Prompt| |mistral-tiny-2312 | Prompt| |Nexusflow-Raven-v2 | Function Calling| -|NousResearch/Hermes-2-Pro-{Llama-3-8B,Mistral-7B} 💻| Function Calling| +|NousResearch/Hermes-2-{Pro-Llama-3-8B,Pro-Mistral-7B,Theta-Llama-3-8B} 💻| Function Calling| |snowflake/arctic | Prompt| |nvidia/nemotron-4-340b-instruct| Prompt| |THUDM/glm-4-9b-chat 💻| Function Calling| diff --git a/berkeley-function-call-leaderboard/eval_checker/eval_runner_helper.py b/berkeley-function-call-leaderboard/eval_checker/eval_runner_helper.py index a620cf000..fb7c80ea5 100644 --- a/berkeley-function-call-leaderboard/eval_checker/eval_runner_helper.py +++ b/berkeley-function-call-leaderboard/eval_checker/eval_runner_helper.py @@ -331,6 +331,12 @@ "NousResearch", "apache-2.0", ], + "NousResearch/Hermes-2-Theta-Llama-3-8B": [ + "Hermes-2-Theta-Llama-3-8B (FC)", + "https://huggingface.co/NousResearch/Hermes-2-Theta-Llama-3-8B", + "NousResearch", + "apache-2.0", + ], "meta-llama/Meta-Llama-3-8B-Instruct": [ "Meta-Llama-3-8B-Instruct (Prompt)", "https://llama.meta.com/llama3", diff --git a/berkeley-function-call-leaderboard/model_handler/constant.py b/berkeley-function-call-leaderboard/model_handler/constant.py index b91a95694..6c7349533 100644 --- a/berkeley-function-call-leaderboard/model_handler/constant.py +++ b/berkeley-function-call-leaderboard/model_handler/constant.py @@ -142,6 +142,7 @@ "meetkai/functionary-medium-v2.4-FC", "NousResearch/Hermes-2-Pro-Llama-3-8B", "NousResearch/Hermes-2-Pro-Mistral-7B", + "NousResearch/Hermes-2-Theta-Llama-3-8B", "command-r-plus-FC", "command-r-plus-FC-optimized", "THUDM/glm-4-9b-chat", diff --git a/berkeley-function-call-leaderboard/model_handler/handler_map.py b/berkeley-function-call-leaderboard/model_handler/handler_map.py index a6b10141a..127191258 100644 --- a/berkeley-function-call-leaderboard/model_handler/handler_map.py +++ b/berkeley-function-call-leaderboard/model_handler/handler_map.py @@ -71,6 +71,7 @@ "databricks-dbrx-instruct": DatabricksHandler, "NousResearch/Hermes-2-Pro-Llama-3-8B": HermesHandler, "NousResearch/Hermes-2-Pro-Mistral-7B": HermesHandler, + "NousResearch/Hermes-2-Theta-Llama-3-8B": HermesHandler, "meta-llama/Meta-Llama-3-8B-Instruct": LlamaHandler, "meta-llama/Meta-Llama-3-70B-Instruct": LlamaHandler, "command-r-plus-FC": CohereHandler, From e3ba7345a41c47e7cc6c297ac203bcfff952b2c0 Mon Sep 17 00:00:00 2001 From: Huanzhi Mao Date: Wed, 24 Jul 2024 14:41:34 -0700 Subject: [PATCH 3/3] Add latency info --- .../eval_checker/eval_runner_helper.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/berkeley-function-call-leaderboard/eval_checker/eval_runner_helper.py b/berkeley-function-call-leaderboard/eval_checker/eval_runner_helper.py index 68e7b836c..c8e6863ec 100644 --- a/berkeley-function-call-leaderboard/eval_checker/eval_runner_helper.py +++ b/berkeley-function-call-leaderboard/eval_checker/eval_runner_helper.py @@ -487,6 +487,8 @@ "deepseek-ai/deepseek-coder-6.7b-instruct": 909, "google/gemma-7b-it": 95, "NousResearch/Hermes-2-Pro-Mistral-7B": 135, + "NousResearch/Hermes-2-Pro-Llama-3-8B": 77, + "NousResearch/Hermes-2-Theta-Llama-3-8B": 73, "meta-llama/Meta-Llama-3-8B-Instruct": 73, "meta-llama/Meta-Llama-3-70B-Instruct": 307, "gorilla-openfunctions-v2": 83,