-
Notifications
You must be signed in to change notification settings - Fork 100
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: Radek Ježek <[email protected]>
- Loading branch information
Showing
13 changed files
with
1,977 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
""" | ||
lm_eval | ||
.. admonition:: Before you start | ||
:class: important | ||
To use the following extension, first install it by running | ||
:bash:`pip install 'ibm-generative-ai[lm_eval]'`. | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
""" | ||
lm-evaluation-harness CLI usage | ||
The recommended way to run benchmarks is through CLI. | ||
In your python environment with 'ibm-generative-ai[lm-eval]' installed: | ||
Example:: | ||
python -m genai.extensions.lm_eval \\ | ||
--model="ibm_genai" \\ | ||
--model_args="model_id=tiiuae/falcon-40b,temperature=0" \\ | ||
--task="hellaswag" \\ | ||
--num_fewshot=10 \\ | ||
--output_path="falcon-40b_hellaswag.json" | ||
""" | ||
|
||
import subprocess | ||
|
||
subprocess.run( | ||
[ | ||
"python", | ||
"-m", | ||
"genai.extensions.lm_eval", | ||
"--model=ibm_genai", | ||
"--model_args=model_id=tiiuae/falcon-40b,temperature=0", | ||
"--task=hellaswag", | ||
"--num_fewshot=10", | ||
"--limit=10", # WARNING: only for debug purposes, remove for full testing dataset | ||
], | ||
check=True, | ||
text=True, | ||
capture_output=False, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
""" | ||
lm-evaluation-harness advanced usage | ||
Use lm-evaluation extension from code to have additional control over concurrency or execution options | ||
Note: | ||
This is for advanced usage only, use CLI in most cases (lm_eval_cli example) | ||
""" | ||
|
||
import logging | ||
from pprint import pprint | ||
|
||
from dotenv import load_dotenv | ||
from lm_eval import simple_evaluate | ||
|
||
from genai import Client, Credentials | ||
from genai.extensions.lm_eval.model import IBMGenAILMEval | ||
from genai.schema import TextGenerationParameters | ||
|
||
load_dotenv() | ||
|
||
logging.getLogger("httpx").setLevel(logging.WARN) | ||
logging.getLogger("genai").setLevel(logging.WARN) | ||
|
||
task_name = "arc_challenge" | ||
model_id = "tiiuae/falcon-40b" | ||
num_fewshot = 25 | ||
limit = 10 # WARNING: only for debug purposes, set None for full testing dataset | ||
|
||
client = Client( | ||
credentials=Credentials.from_env(), | ||
config={"api_client_config": {"transport_options": {"retries": 999}}}, | ||
) | ||
model = IBMGenAILMEval( | ||
client=client, | ||
model_id=model_id, | ||
show_progressbar=True, | ||
parameters=TextGenerationParameters(temperature=0), | ||
) | ||
results = simple_evaluate(model, tasks=[task_name], num_fewshot=num_fewshot, log_samples=False, limit=limit) | ||
|
||
# add info about the model and few shot config | ||
# "model_kwargs": model_kwargs, | ||
results["config"] = {"model": model_id, "use_cache": False, "limit": limit, "model_kwargs": model.dump_parameters()} | ||
|
||
pprint(results) |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
import logging | ||
import signal | ||
|
||
from genai import handle_shutdown_event | ||
from genai.extensions.lm_eval.model import initialize_model | ||
|
||
try: | ||
# load dotenv if installed | ||
from dotenv import load_dotenv | ||
|
||
load_dotenv() | ||
except ImportError: | ||
... | ||
|
||
|
||
try: | ||
from lm_eval.__main__ import cli_evaluate | ||
except ImportError: | ||
raise ImportError("Could not import lm_eval: Please install ibm-generative-ai[lm-eval] extension.") # noqa: B904 | ||
|
||
|
||
initialize_model() | ||
|
||
signal.signal(signal.SIGINT, handle_shutdown_event) | ||
signal.signal(signal.SIGTERM, handle_shutdown_event) | ||
|
||
logging.getLogger("httpx").setLevel(logging.WARN) | ||
logging.getLogger("genai").setLevel(logging.WARN) | ||
|
||
cli_evaluate() |
Oops, something went wrong.