-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathconfigs.py
264 lines (243 loc) · 12.7 KB
/
configs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
# Copyright (C) 2024 AIDC-AI
import os
_DEFAULT_MAX_LENGTH = 2048
MAX_GEN_TOKS = 1024
MODEL_PATH = 'your_models_path/models'
DATA_PATH = './data'
OUTPUT_PATH = 'your_outputs_path/outputs'
datasets = \
['hellaswag', 'mmlu', 'arc_challenge', 'arc_easy', 'winogrande', 'truthfulqa_mc1', 'truthfulqa_mc2', 'aclue', 'anli', 'boolq', 'cb', 'cmmlu', 'cola', 'crows_pairs', 'copa', 'glue', 'lambada', 'mathqa', 'mnli', 'mrpc', 'openbookqa', 'piqa'] + \
['bbh', 'gsm8k', 'drop', 'logieval', 'eq_bench', 'nq_open', 'realworldqa'] + \
['mmmu_val', 'ccbench', 'mme', 'ai2d_test', 'ccbench', 'coco_val', 'hallusionbench', 'mathvista_mini', 'mme', 'mmstar', 'mmvet', 'ocrbench', 'realworldqa', 'scienceqa_test', 'scienceqa_val', 'seedbench_img', 'mmbench_dev_cn', 'mmbench_dev_en', 'mmbench_test_cn', 'mmbench_test_en'] + \
['cs_mmlu_kw_3', 'cs_mmlu_kw_5', 'cs_mmlu_kw_10', 'cs_mmlu_kw_20', 'cs_mmlu_st_3', 'cs_mmlu_st_5', 'cs_mmlu_st_10', 'cs_mmlu_st_20'] + \
['agieval', 'arc_multilingual', 'hellaswag_multilingual', 'truthfulqa_multilingual_mc2', 'belebele', 'xcopa', 'translation', 'xstorycloze', 'truthfulqa_multilingual_mc1'] + \
['hellaswag_multilingual_sampled', 'truthfulqa_multilingual_mc2_sampled', 'arc_multilingual_sampled', 'xstorycloze_sampled'] + \
['multimodal_complexity', 'mc']
_MODULE2MODEL = {
'multimodal_llm.deepseek_ai.deepseek_vl': ['deepseek-vl-1.3b-chat', 'deepseek-vl-7b-chat'],
'multimodal_llm.echo840.monkey': ['Monkey', 'Monkey-Chat'],
'multimodal_llm.thudm.cogvlm': ['cogvlm2-llama3-chat-19B', 'cogvlm-chat-hf'],
'multimodal_llm.thudm.glm_4v': ['glm-4v-9b'],
'multimodal_llm.qwen.qwen_vl': ['Qwen-VL'],
'multimodal_llm.qwen.qwen_vl_chat': ['Qwen-VL-Chat'],
'multimodal_llm.01_ai.yi_vl': ['Yi-VL-6B'],
'multimodal_llm.microsoft.phi3_vision': ['Phi-3-vision-128k-instruct', 'Phi-3.5-vision-instruct'],
'multimodal_llm.microsoft.e5_v': ['e5-v'],
'multimodal_llm.openbmb.vision_cair': ['MiniGPT-4'],
'multimodal_llm.openbmb.minicpm_v': ['MiniCPM-V', 'MiniCPM-V-2'],
'multimodal_llm.openbmb.minicpm_llama3_v': ['MiniCPM-Llama3-V-2_5'],
'multimodal_llm.openbmb.minicpm_v_2_6': ['MiniCPM-V-2_6'],
'multimodal_llm.liuhaotian.llava_v1_5': ['llava-v1.5-7b'],
'multimodal_llm.baai.bunny_llama3_v': ['Bunny-Llama-3-8B-V'],
'multimodal_llm.huggingfacem4.idefics2': ['idefics2-8b'],
'multimodal_llm.aidc_ai.ovis': ['Ovis-Clip-Llama3-8B', 'Ovis-Clip-Qwen1_5-7B'],
'multimodal_llm.aidc_ai.ovis1_6': ['Ovis1.6-Gemma2-9B'],
'multimodal_llm.internlm.internlm_xcomposer2': ['internlm-xcomposer2-vl-1_8b', 'internlm-xcomposer2-vl-7b', 'internlm-xcomposer2-7b'],
'multimodal_llm.lamda_llm.wings': ['Wings'],
'llm.qwen.qwen1_5_chat': ['Qwen1.5-0.5B-Chat', 'Qwen1.5-1.8B-Chat', 'Qwen1.5-4B-Chat', 'Qwen1.5-MoE-A2.7B-Chat', 'Qwen1.5-7B-Chat', 'Qwen2-0.5B-Instruct', 'Qwen2-1.5B-Instruct', 'Qwen2-7B-Instruct', 'Qwen2-72B-Instruct'],
'llm.qwen.qwen_base': ['Qwen-1_8B', 'Qwen-7B'],
'llm.qwen.qwen_chat': ['Qwen-1_8B-Chat', 'Qwen-7B-Chat'],
'llm.01_ai.yi_chat': ['Yi-1.5-6B-Chat', 'Yi-1.5-9B-Chat', 'Yi-6B-Chat'],
'llm.deepseek_ai.deepseek_llm': ['deepseek-llm-7b-chat'],
'llm.internlm.internlm_chat': ['internlm2-chat-7b', 'internlm-chat-7b'],
'llm.thudm.glm': ['glm-10b'],
'llm.thudm.glm_4_chat': ['glm-4-9b-chat'],
'llm.thudm.chatglm': ['chatglm-6b', 'chatglm2-6b', 'chatglm3-6b'],
'llm.microsoft.phi_instruct': ['Phi-3-mini-4k-instruct', 'Phi-3-mini-128k-instruct', 'Phi-3-small-8k-instruct', 'Phi-3-small-128k-instruct'],
'llm.microsoft.ocra2': ['Orca-2-7b'],
'llm.microsoft.phi2': ['phi-2'],
'llm.microsoft.phi': ['phi-1', 'phi-1_5'],
'llm.huggingfaceh4.zephyr': ['zephyr-7b-alpha', 'zephyr-7b-beta', 'mistral-7b-sft-beta'],
'llm.huggingfaceh4.zephyr_gemma': ['zephyr-7b-gemma-v0.1', 'zephyr-7b-gemma-sft-v0.1'],
'llm.tiiuae.falcon': ['falcon-7b-instruct'],
'llm.tinyllama.tinyllama': ['TinyLlama-1.1B-Chat-v1.0'],
'llm.open_ocra.open_ocra': ['Mistral-7B-OpenOrca'],
'llm.stabilityai.stablebeluga2': ['StableBeluga2'],
'llm.meta_llama.opt': ['opt-125m', 'opt-1.3b'],
'llm.meta_llama.llama3': ['Meta-Llama-3-8B'],
'llm.meta_llama.llama3_it': ['Meta-Llama-3-8B-Instruct'],
'llm.meta_llama.llama3_1_it': ['Meta-Llama-3.1-8B-Instruct'],
'llm.mlabonne.neuraldaredevil': ['NeuralDaredevil-7B'],
'llm.xenon1.metamodel_moex8': ['MetaModel_moex8'],
'llm.togethercomputer.redpajama': ['RedPajama-INCITE-Chat-3B-v1'],
'llm.openai.gpt': ['gpt-4-turbo-128k', 'gpt-4o-0513'],
'llm.openai.gpt2': ['gpt2-large', 'gpt2-medium'],
'llm.google.gemma': ['gemma-2b', 'gemma-2b-it', 'gemma-7b-it', 'gemma-2-9b-it'],
'llm.bigscience.bloomz': ['bloomz-560m', 'bloom-1b7'],
'llm.baichuan_inc.baichuan2_chat': ['Baichuan2-7B-Chat'],
'llm.openxlab.claude': ['claude-3-opus-20240229'],
'llm.baichuan_inc.baichuan': ['Baichuan-7B'],
'llm.ensemble.packllm': ['PackLLM'],
'llm.learnware.xranker': ['xranker'],
'llm.alibaba_nlp.gte_qwen2_it': ['gte-Qwen2-7B-instruct', 'gte-Qwen2-1.5B-instruct']
}
_MODULE2DATASET = {
'coco': ['coco_val'],
'ocrbench': ['ocrbench'],
'eq_bench': ['eq_bench'],
'mme': ['mme'],
'hallusionbench': ['hallusionbench'],
'mm_cc_bench': ['mmbench', 'ccbench'],
'truthfulqa_mc2': ['truthfulqa_mc2', 'truthfulqa_multilingual_mc2'],
'bbh': ['bbh'],
'drop': ['drop']
}
STANDARD_DATASET2SHOTS = {
'arc_challenge': 25,
'arc_easy': 25,
'hellaswag': 10,
'mmlu': 5,
'winogrande': 5,
'gsm8k': 5,
'bbh': 3
}
MODEL2MODULE = {}
for module_type, models in _MODULE2MODEL.items():
for model in models:
MODEL2MODULE[model] = module_type
DATASET2MODULE = {}
for dataset_module, dataset_names in _MODULE2DATASET.items():
for dataset in dataset_names:
DATASET2MODULE[dataset] = dataset_module
if dataset in STANDARD_DATASET2SHOTS:
DATASET2MODULE[f'{dataset}_{STANDARD_DATASET2SHOTS[dataset]}'] = dataset_module
GEN_DATASET2UNTIL = {
'bbh': ["</s>", "Q", "\n\n"],
'gsm8k': ["</s>", "Question", "<|im_end|>", "<|endoftext|>"],
'drop': ['.'],
'eq_bench': ['\n\n'],
'logieval': ['\n\n'],
'nq_open': ['\n', '.', ','],
}
GEN_DO_SAMPLE = False
GEN_TEMPERATURE = 0.0
DATASET2FILE = {
i: f'{i}.json' for i in datasets
}
for d in STANDARD_DATASET2SHOTS.keys():
d_shot = STANDARD_DATASET2SHOTS[d]
d_new = f'{d}_{d_shot}'
if d in GEN_DATASET2UNTIL.keys():
GEN_DATASET2UNTIL[d_new] = GEN_DATASET2UNTIL[d]
try:
DATASET2FILE[d_new] = DATASET2FILE[d].replace(d, d_new)
except Exception as e:
pass
TYPE2LANGUAGE2PROMPT = {
'multiple_choice': {
'EN': 'Please select the correct answer from the options above.\n',
'ZH': '请直接选择正确选项的字母。\n',
'AR': 'الرجاء اختيار الإجابة الصحيحة من الخيارات أعلاه.\n',
'RU': 'Пожалуйста, выберите букву правильного варианта напрямую.\n'
},
'open': {
'EN': 'Please answer the question directly.\n',
'ZH': '请直接回答问题。\n',
'AR': 'يرجى الإجابة على السؤال مباشرة.\n',
'RU': 'Пожалуйста, ответьте на вопрос прямо.\n'
},
'yes_or_no': {
'EN': 'Please answer Yes or No.\n',
'ZH': '请回答是或否。\n',
'AR': 'من فضلك أجب بنعم أو لا.\n',
'RU': 'Пожалуйста, ответьте Да или Нет.\n',
},
}
FILTER_TYPE2LANGUAGE2PROMPT = {
'multiple_choice': {
'EN': '''
Please help me match an answer to the multiple choices of the question.
The option is ABCDEF.
You get a question and an answer,
You need to figure out options from ABCDEF that is most similar to the answer.
If the meaning of all options is significantly different from the answer, we output Unknown.
You should output one or more options from the following :ABCDEF.
Example 1:
Question: Which of the following numbers are positive? \nA.0\nB.-1\nC.5\nD.102\nE.-56\nF.33
Answer: The answers are 5,102 and 33. \nYour output:CDF
Example 2:
Question: Which of these countries is landlocked? \nA.Mongolia \nB.United States \nC.China \nD.Japan
Answer: A.Mongolia is A landlocked country. \nYour output:A
Here are the target questions and answers.\n
''',
'ZH': f'''
请帮我把一个答案与问题的多个选项相匹配。
选项有<possibilites>。
你会得到一个问题和一个答案,
你需要找出哪几个选项<possibilites>与答案最相似。
如果所有选项的含义都与答案显著不同,则输出Unknown。
你应该从以下几个个选项中输出一个或多个选项:<possibilites>。
示例1:
问题:下面那几个数字是正数?\nA.0\nB.-1\nC.5\nD.102\nE.-56\nF.33
答案:答案是5,102和33。\n你的输出:CDF
示例2:
问题:下面哪个国家是内陆国家?\nA.蒙古\nB.美国\nC.中国\nD.日本
答案:A.蒙古国是内陆国家。\n你的输出:A
下面是目标的问题和答案。\n
''',
},
'open': {
'EN': '''
Please help me extract the answers to the given questions from the answers given.
You get a question and an answer,
If the answer and question are not relevant, the output is Unknown.
Example 1:
Question: What color is the sky?
Answer: The sky is blue most of the time.\nYour output: The sky is blue
Example 2:
Question: Who invented the electric light?
Answer: Edison is often credited with inventing the light bulb. In fact, he only improved it. The actual inventor of the light bulb was Henry Goebbels.\nYour output: Henry Goebbels
Here are the target questions and answers. \n
''',
'ZH': '''
请帮我从给出的答案中提取出给出问题的回答。
你会得到一个问题和一个答案,
如果答案和问题不相关,则输出Unknown。
示例1:
问题:天空是什么颜色的?
答案:天空是在大多数时候是蓝色的。\n你的输出:天空是蓝色的
示例2:
问题:是谁发明了电灯?
答案:人们通常认为是爱迪生发明了电灯泡,实际上不然,他只是改进了电灯泡。电灯泡的实际发明人是亨利·戈培尔。\n你的输出:亨利·戈培尔
下面是目标的问题和答案。\n
'''
},
'yes_or_no': {
'ZH': '''
请帮我把一个答案与问题的两个选项相匹配。
选项只有“是/否”。
你会得到一个问题和一个答案,
你需要找出哪个选项(是/否)与答案最相似。
如果所有选项的含义都与答案显著不同,则输出Unknown。
你应该从以下3个选项中输出一个单词:Yes, No, Unknown。
示例1:
问题:图像中的单词是“Hello”吗?
答案:这个图像中的单词是“Hello”。\n你的输出:Yes
示例2:
问题:图像中的单词是“Hello”吗?
答案:这个图像中的单词不是“Hello”。\n你的输出:No
下面是目标的问题和答案。\n
''',
'EN': '''
Please help me to match an answer with two options of a question.
The options are only Yes / No.
You are provided with a question and an answer,
and you need to find which option (Yes / No) is most similar to the answer.
If the meaning of all options are significantly different from the answer, output Unknown.
Your should output a single word among the following 3 choices: Yes, No, Unknown.
Example 1:
Question: Is the word in this image 'Hello'?
Answer: The word in this image is 'Hello'.\nYour output: Yes
Example 2:
Question: Is the word in this image 'Hello'?
Answer: The word in this image is not 'Hello'.\nYour output: No\n
Now here are the target's Question and Answer:\n
'''
},
}
DATASET2DEFAULT_IMAGE_TOKEN = {
'mmmu_val': ['<image 1>', '<image 2>', '<image 3>', '<image 4>', '<image 5>', '<image 6>', '<image 7>', '<image 8>'],
}
DATASET2DEFAULT_IMAGE_TOKEN.update(
{d: ['<image>'] for d in datasets if d not in DATASET2DEFAULT_IMAGE_TOKEN.keys()}
)