From c5726edf4582d2ed732d1c4a9bba598d8cb37792 Mon Sep 17 00:00:00 2001 From: Jee Jee Li Date: Wed, 8 Jan 2025 13:08:48 +0800 Subject: [PATCH] [Bugfix] Add checks for LoRA and CPU offload (#11810) Signed-off-by: Jee Jee Li --- vllm/config.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/vllm/config.py b/vllm/config.py index 8b824a1fca511..a9b6d6b19127f 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -2051,6 +2051,11 @@ def __post_init__(self): f"max_cpu_loras ({self.max_cpu_loras}) must be >= " f"max_loras ({self.max_loras})") + def verify_with_cache_config(self, cache_config: CacheConfig): + # TODO LoRA supports CPU offload. + if cache_config.cpu_offload_gb > 0: + raise ValueError("CPU offload is not supported with LoRA yet.") + def verify_with_model_config(self, model_config: ModelConfig): if self.lora_dtype in (None, "auto"): self.lora_dtype = model_config.dtype @@ -3138,6 +3143,7 @@ def __post_init__(self): self.cache_config.verify_with_parallel_config(self.parallel_config) if self.lora_config: + self.lora_config.verify_with_cache_config(self.cache_config) self.lora_config.verify_with_model_config(self.model_config) self.lora_config.verify_with_scheduler_config( self.scheduler_config)