-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtest_llm.py
31 lines (21 loc) · 1.08 KB
/
test_llm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM
# quant_config = BitsAndBytesConfig(
# load_in_4bit=True,
# bnb_4bit_quant_type="nf4",
# bnb_4bit_compute_dtype=torch.float16,
# bnb_4bit_use_double_quant=False
# )
# quant_config = BitsAndBytesConfig(
# load_in_8bit=True,
# llm_int8_threshold=6.0
# )
# tokenizer = AutoTokenizer.from_pretrained("EleutherAI/llemma_7b")
# model = AutoModelForCausalLM.from_pretrained("EleutherAI/llemma_7b", quantization_config=quant_config, device_map={"": 0})
# tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
# model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", quantization_config=quant_config,
# device_map={"": 0})
tokenizer = AutoTokenizer.from_pretrained('deepseek-ai/deepseek-math-7b-base')
model = AutoModelForCausalLM.from_pretrained('deepseek-ai/deepseek-math-7b-base', device_map='auto')
output = model.generate(tokenizer.encode(['Hello']).input_ids)
print (tokenizer.decode(output))