Skip to content

Commit

Permalink
[BFCL] Set Model Temperature to 0.001 for All Models (ShishirPatil#574)
Browse files Browse the repository at this point in the history
The current model response generation script uses a default temperature
of 0.7 for inference. This introduces some degree of randomness into the
model output generation, leading to potential variability in the
evaluation scores from run to run.
For benchmarking purposes, we set it to 0.001 for consistency and
reliability of the evaluation results.

resolves ShishirPatil#500 , resolves ShishirPatil#562 

This will affect the leaderboard score. We will update it shortly.

---------

Co-authored-by: Shishir Patil <[email protected]>
  • Loading branch information
HuanzhiMao and ShishirPatil authored Aug 10, 2024
1 parent 46dcf95 commit db53655
Show file tree
Hide file tree
Showing 26 changed files with 29 additions and 51 deletions.
1 change: 1 addition & 0 deletions berkeley-function-call-leaderboard/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ Some companies have proposed some optimization strategies in their models' handl

## Changelog

* [August 8, 2024] [#574](https://github.com/ShishirPatil/gorilla/pull/574): Set temperature to 0.001 for all models for consistency and reproducibility.
* [August 5, 2024] [#568](https://github.com/ShishirPatil/gorilla/pull/568): Rephrase the question prompt for the `executable_parallel_function` category to remove potentially misleading information implying multi-turn function calls.
* [August 4, 2024] [#557](https://github.com/ShishirPatil/gorilla/pull/557): Bug fix in the possible answers.
* simple: 7 affected
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
from model_handler.utils import ast_parse

class ArcticHandler(NvidiaHandler):
def __init__(self, model_name, temperature=0.7, top_p=1, max_tokens=1000) -> None:
def __init__(self, model_name, temperature=0.001, top_p=1, max_tokens=1000) -> None:
super().__init__(model_name, temperature, top_p, max_tokens)

def decode_ast(self, result, language="Python"):
result = result.replace("\n", "")
if not result.startswith("["):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@


class ClaudeFCHandler(BaseHandler):
def __init__(self, model_name, temperature=0.7, top_p=1, max_tokens=1000) -> None:
def __init__(self, model_name, temperature=0.001, top_p=1, max_tokens=1000) -> None:
super().__init__(model_name, temperature, top_p, max_tokens)
self.model_style = ModelStyle.Anthropic_Prompt

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@


class ClaudePromptingHandler(BaseHandler):
def __init__(self, model_name, temperature=0.7, top_p=1, max_tokens=1000) -> None:
def __init__(self, model_name, temperature=0.001, top_p=1, max_tokens=1000) -> None:
super().__init__(model_name, temperature, top_p, max_tokens)
self.model_style = ModelStyle.Anthropic_Prompt

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
class CohereHandler(BaseHandler):
client: cohere.Client

def __init__(self, model_name, temperature=0.7, top_p=1, max_tokens=1000) -> None:
def __init__(self, model_name, temperature=0.001, top_p=1, max_tokens=1000) -> None:
super().__init__(model_name, temperature, top_p, max_tokens)
self.model_style = ModelStyle.COHERE

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,9 @@


class DatabricksHandler(BaseHandler):
def __init__(self, model_name, temperature=0.7, top_p=1, max_tokens=1000) -> None:
self.model_name = model_name
def __init__(self, model_name, temperature=0.001, top_p=1, max_tokens=1000) -> None:
super().__init__(model_name, temperature, top_p, max_tokens)
self.model_style = ModelStyle.OpenAI
self.temperature = temperature
self.top_p = top_p
self.max_tokens = max_tokens

# NOTE: To run the Databricks model, you need to provide your own Databricks API key and your own Azure endpoint URL.
self.client = OpenAI(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


class DeepseekHandler(OSSHandler):
def __init__(self, model_name, temperature=0.7, top_p=1, max_tokens=1000) -> None:
def __init__(self, model_name, temperature=0.001, top_p=1, max_tokens=1000) -> None:
super().__init__(model_name, temperature, top_p, max_tokens)

def _format_prompt(prompt, function, test_category):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,28 +10,15 @@


class FireworkAIHandler(OpenAIHandler):
def __init__(self, model_name, temperature=0.0, top_p=1, max_tokens=1000) -> None:
def __init__(self, model_name, temperature=0.001, top_p=1, max_tokens=1000) -> None:
super().__init__(model_name, temperature, top_p, max_tokens)
self.model_style = ModelStyle.FIREWORK_AI
self.temperature = 0.0

self.client = OpenAI(
base_url="https://api.fireworks.ai/inference/v1",
api_key=os.getenv("FIRE_WORKS_API_KEY"),
)

def write(self, result, file_to_open):
# This method is used to write the result to the file.
if not os.path.exists("./result"):
os.mkdir("./result")
if not os.path.exists(f"./result/{self.model_name}"):
os.mkdir(f"./result/{self.model_name}")
with open(
f"./result/{self.model_name}/"
+ file_to_open.replace(".json", "_result.json"),
"a+",
) as f:
f.write(json.dumps(result) + "\n")

def inference(self, prompt, functions, test_category):
functions = language_specific_pre_processing(functions, test_category)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,8 @@

# For setup instructions, please refer to https://github.com/MeetKai/functionary for setup details.
class FunctionaryHandler(OpenAIHandler):
def __init__(self, model_name, temperature=0.7, top_p=1, max_tokens=1000) -> None:
self.temperature = temperature
self.top_p = top_p
self.max_tokens = max_tokens
self.model_name = model_name
def __init__(self, model_name, temperature=0.001, top_p=1, max_tokens=1000) -> None:
super().__init__(model_name, temperature, top_p, max_tokens)
self.model_style = ModelStyle.OpenAI

self.client = OpenAI(base_url="http://localhost:8000/v1", api_key="functionary")
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@


class GeminiHandler(BaseHandler):
def __init__(self, model_name, temperature=0.7, top_p=1, max_tokens=1000) -> None:
def __init__(self, model_name, temperature=0.001, top_p=1, max_tokens=1000) -> None:
super().__init__(model_name, temperature, top_p, max_tokens)
self.model_style = ModelStyle.Google

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


class GemmaHandler(OSSHandler):
def __init__(self, model_name, temperature=0.7, top_p=1, max_tokens=1000) -> None:
def __init__(self, model_name, temperature=0.001, top_p=1, max_tokens=1000) -> None:
super().__init__(model_name, temperature, top_p, max_tokens)

def _format_prompt(prompt, function, test_category):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


class GlaiveHandler(OSSHandler):
def __init__(self, model_name, temperature=0.7, top_p=1, max_tokens=1000) -> None:
def __init__(self, model_name, temperature=0.001, top_p=1, max_tokens=1000) -> None:
super().__init__(model_name, temperature, top_p, max_tokens)

def _format_prompt(prompt, function, test_category):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@


class GLMHandler(OSSHandler):
def __init__(self, model_name, temperature=0.7, top_p=1, max_tokens=1000) -> None:
def __init__(self, model_name, temperature=0.001, top_p=1, max_tokens=1000) -> None:
super().__init__(model_name, temperature, top_p, max_tokens)
self.max_model_len = 4096
self.stop_token_ids = [151329, 151336, 151338]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@


class GorillaHandler(BaseHandler):
def __init__(self, model_name, temperature=0.7, top_p=1, max_tokens=1000) -> None:
def __init__(self, model_name, temperature=0.001, top_p=1, max_tokens=1000) -> None:
super().__init__(model_name, temperature, top_p, max_tokens)
self.model_style = ModelStyle.Gorilla

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@


class OpenAIHandler(BaseHandler):
def __init__(self, model_name, temperature=0.7, top_p=1, max_tokens=1000) -> None:
def __init__(self, model_name, temperature=0.001, top_p=1, max_tokens=1000) -> None:
super().__init__(model_name, temperature, top_p, max_tokens)
self.model_style = ModelStyle.OpenAI
self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@


class GraniteHandler(OSSHandler):
def __init__(self, model_name, temperature=0.7, top_p=1, max_tokens=1000) -> None:
temperature = 0.001
def __init__(self, model_name, temperature=0.001, top_p=1, max_tokens=1000) -> None:
super().__init__(model_name, temperature, top_p, max_tokens)

def _format_prompt(prompt, function, test_category):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ class BaseHandler:
model_name: str
model_style: ModelStyle

def __init__(self, model_name, temperature=0.7, top_p=1, max_tokens=1000) -> None:
def __init__(self, model_name, temperature=0.001, top_p=1, max_tokens=1000) -> None:
self.model_name = model_name
self.temperature = temperature
self.top_p = top_p
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@


class HermesHandler(OSSHandler):
def __init__(self, model_name, temperature=0.7, top_p=1, max_tokens=1000) -> None:
def __init__(self, model_name, temperature=0.001, top_p=1, max_tokens=1000) -> None:
super().__init__(model_name, temperature, top_p, max_tokens)

def _format_prompt(prompt, function, test_category):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@


class LlamaHandler(OSSHandler):
def __init__(self, model_name, temperature=0.7, top_p=1, max_tokens=1000) -> None:
def __init__(self, model_name, temperature=0.001, top_p=1, max_tokens=1000) -> None:
super().__init__(model_name, temperature, top_p, max_tokens)

def _format_prompt(prompt, function, test_category):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@


class MistralHandler(BaseHandler):
def __init__(self, model_name, temperature=0.7, top_p=1, max_tokens=1000) -> None:
def __init__(self, model_name, temperature=0.001, top_p=1, max_tokens=1000) -> None:
super().__init__(model_name, temperature, top_p, max_tokens)
self.model_style = ModelStyle.Mistral

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

class NexusHandler(BaseHandler):
def __init__(self, model_name, temperature=0.001, top_p=1, max_tokens=1000) -> None:
temperature = 0.001
super().__init__(model_name, temperature, top_p, max_tokens)
self.model_style = ModelStyle.NEXUS

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,14 @@
)

class NvidiaHandler(BaseHandler):
def __init__(self, model_name, temperature=0.7, top_p=1, max_tokens=1000) -> None:
self.model_name = model_name
self.temperature = temperature
self.top_p = top_p
self.max_tokens = max_tokens
def __init__(self, model_name, temperature=0.001, top_p=1, max_tokens=1000) -> None:
super().__init__(model_name, temperature, top_p, max_tokens)
self.model_style = ModelStyle.OpenAI
self.client = OpenAI(
base_url = "https://integrate.api.nvidia.com/v1",
api_key = os.getenv("NVIDIA_API_KEY")
)

def inference(self, prompt, functions, test_category):
prompt = augment_prompt_by_languge(prompt,test_category)
functions = language_specific_pre_processing(functions,test_category)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@


class OSSHandler(BaseHandler):
def __init__(self, model_name, temperature=0.7, top_p=1, max_tokens=1000, dtype="float16") -> None:
def __init__(self, model_name, temperature=0.001, top_p=1, max_tokens=1000, dtype="float16") -> None:
super().__init__(model_name, temperature, top_p, max_tokens)
self.model_style = ModelStyle.OSSMODEL
self.dtype = dtype
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@

class xLAMHandler(OSSHandler):
def __init__(self, model_name, temperature=0.001, top_p=1, max_tokens=512, dtype="bfloat16") -> None:
temperature = 0.001
super().__init__(model_name, temperature, top_p, max_tokens, dtype)
self.model_style = ModelStyle.OSSMODEL

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@


class YiHandler(BaseHandler):
def __init__(self, model_name, temperature=0.0, top_p=1, max_tokens=1000) -> None:
def __init__(self, model_name, temperature=0.001, top_p=1, max_tokens=1000) -> None:
super().__init__(model_name, temperature, top_p, max_tokens)
self.model_style = ModelStyle.OpenAI
self.base_url = "https://api.01.ai/v1"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def get_args():
parser.add_argument("--test-category", type=str, default="all", nargs="+")

# Parameters for the model that you want to test.
parser.add_argument("--temperature", type=float, default=0.7)
parser.add_argument("--temperature", type=float, default=0.001)
parser.add_argument("--top-p", type=float, default=1)
parser.add_argument("--max-tokens", type=int, default=1200)
parser.add_argument("--num-gpus", default=1, type=int)
Expand Down

0 comments on commit db53655

Please sign in to comment.