From a13c2685c89bdf34da1f6aa01d437b08d9017f17 Mon Sep 17 00:00:00 2001 From: zhangsongyang Date: Mon, 6 Jan 2025 12:38:09 +0000 Subject: [PATCH 1/3] Update LiveMathBench --- opencompass/datasets/livemathbench/livemathbench.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opencompass/datasets/livemathbench/livemathbench.py b/opencompass/datasets/livemathbench/livemathbench.py index d2b4b93b5..08b3ce515 100644 --- a/opencompass/datasets/livemathbench/livemathbench.py +++ b/opencompass/datasets/livemathbench/livemathbench.py @@ -10,8 +10,8 @@ import jsonlines import mmengine import numpy as np -from datasets import Dataset, load_dataset +from datasets import Dataset, load_dataset from opencompass.datasets.math import MATHAgentEvaluator, math_postprocess_v2 from opencompass.models import OpenAISDK from opencompass.openicl.icl_evaluator import GPassKEvaluator From 73cfeb80b3c8b4313f298cc934705f1adf54e0b9 Mon Sep 17 00:00:00 2001 From: zhangsongyang Date: Mon, 6 Jan 2025 13:29:07 +0000 Subject: [PATCH 2/3] Update New O1 Evaluation --- opencompass/datasets/livemathbench/livemathbench.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opencompass/datasets/livemathbench/livemathbench.py b/opencompass/datasets/livemathbench/livemathbench.py index 08b3ce515..d2b4b93b5 100644 --- a/opencompass/datasets/livemathbench/livemathbench.py +++ b/opencompass/datasets/livemathbench/livemathbench.py @@ -10,8 +10,8 @@ import jsonlines import mmengine import numpy as np - from datasets import Dataset, load_dataset + from opencompass.datasets.math import MATHAgentEvaluator, math_postprocess_v2 from opencompass.models import OpenAISDK from opencompass.openicl.icl_evaluator import GPassKEvaluator From 876bea723002ba8e5692894fec04473434d7ffc5 Mon Sep 17 00:00:00 2001 From: zhangsongyang Date: Tue, 7 Jan 2025 11:14:40 +0000 Subject: [PATCH 3/3] Update O1 evaluation --- .../livemathbench_greedy_gen_efb20d.py | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 opencompass/configs/datasets/livemathbench/livemathbench_greedy_gen_efb20d.py diff --git a/opencompass/configs/datasets/livemathbench/livemathbench_greedy_gen_efb20d.py b/opencompass/configs/datasets/livemathbench/livemathbench_greedy_gen_efb20d.py new file mode 100644 index 000000000..d6acd7c0d --- /dev/null +++ b/opencompass/configs/datasets/livemathbench/livemathbench_greedy_gen_efb20d.py @@ -0,0 +1,51 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer + +from opencompass.datasets.livemathbench import LiveMathBenchDataset, LiveMathBenchEvaluator + + +livemathbench_dataset = dict( + abbr='LiveMathBench-v202412-greedy', # If you change the K and replication, you need to change the dataset name. + type=LiveMathBenchDataset, + path='opencompass/LiveMathBench', + k=1, + replication=1, + dataset_splits=['CNMO', 'CCEE', 'AMC', 'WLPMC'], + dataset_languages=['cn', 'en'], + cot=False, + version='202412', + reader_cfg=dict( + input_columns=['prompt'], + output_column='answer' + ), + infer_cfg=dict( + prompt_template=dict( + type=PromptTemplate, + template=dict( + round=[ + dict(role='HUMAN', prompt='{prompt}'), + ] + ) + ), + retriever=dict(type=ZeroRetriever), + inferencer=dict( + type=GenInferencer, + max_out_len=16384, + ), + ), + eval_cfg=dict( + evaluator=dict( + type=LiveMathBenchEvaluator, + model_name='', + url=[], + use_extract_model=False, + extract_url=[], + extract_model_name='', + k=[1], + replication=1, + thresholds=[0.0, 0.25, 0.5, 0.75, 1.0] + ) + ) +) +livemathbench_datasets = [livemathbench_dataset] \ No newline at end of file