From fb756c8be49695f000896756dddef97e52973a33 Mon Sep 17 00:00:00 2001 From: Isotr0py <2037008807@qq.com> Date: Sat, 31 Aug 2024 19:05:58 +0800 Subject: [PATCH 1/5] accept gguf model without .gguf extension --- vllm/transformers_utils/config.py | 5 +++-- vllm/transformers_utils/tokenizer.py | 4 ++-- vllm/transformers_utils/utils.py | 11 +++++++++++ 3 files changed, 16 insertions(+), 4 deletions(-) create mode 100644 vllm/transformers_utils/utils.py diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index f3ac8d3178d4e..83586a05b1232 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -16,6 +16,7 @@ MedusaConfig, MLPSpeculatorConfig, MPTConfig, NemotronConfig, RWConfig, UltravoxConfig) +from vllm.transformers_utils.utils import check_gguf_file if VLLM_USE_MODELSCOPE: from modelscope import AutoConfig @@ -56,7 +57,7 @@ def get_config( ) -> PretrainedConfig: # Separate model folder from file path for GGUF models - is_gguf = Path(model).is_file() and Path(model).suffix == ".gguf" + is_gguf = check_gguf_file(Path(model)) if is_gguf: kwargs["gguf_file"] = Path(model).name model = Path(model).parent @@ -112,7 +113,7 @@ def get_hf_image_processor_config( if VLLM_USE_MODELSCOPE: return dict() # Separate model folder from file path for GGUF models - if Path(model).is_file() and Path(model).suffix == ".gguf": + if check_gguf_file(Path(model)): model = Path(model).parent return get_image_processor_config(model, revision=revision, **kwargs) diff --git a/vllm/transformers_utils/tokenizer.py b/vllm/transformers_utils/tokenizer.py index 2866975850db3..5b7b3638ddece 100644 --- a/vllm/transformers_utils/tokenizer.py +++ b/vllm/transformers_utils/tokenizer.py @@ -12,6 +12,7 @@ from vllm.lora.request import LoRARequest from vllm.transformers_utils.tokenizers import (BaichuanTokenizer, MistralTokenizer) +from vllm.transformers_utils.utils import check_gguf_file from vllm.utils import make_async logger = init_logger(__name__) @@ -96,8 +97,7 @@ def get_tokenizer( kwargs["truncation_side"] = "left" # Separate model folder from file path for GGUF models - is_gguf = Path(tokenizer_name).is_file() and Path( - tokenizer_name).suffix == ".gguf" + is_gguf = check_gguf_file(Path(tokenizer_name)) if is_gguf: kwargs["gguf_file"] = Path(tokenizer_name).name tokenizer_name = Path(tokenizer_name).parent diff --git a/vllm/transformers_utils/utils.py b/vllm/transformers_utils/utils.py new file mode 100644 index 0000000000000..729a5f449ed5a --- /dev/null +++ b/vllm/transformers_utils/utils.py @@ -0,0 +1,11 @@ +from pathlib import Path + +def check_gguf_file(model: Path) -> bool: + """ + Check if the file is a GGUF model and extract the file name + """ + if model.is_file(): + with open(model, "rb") as f: + header = f.read(4) + return header == b"GGUF" + return False From 8d73c77b10573abb663205e836d955bccdd68fbf Mon Sep 17 00:00:00 2001 From: Isotr0py <2037008807@qq.com> Date: Sat, 31 Aug 2024 19:09:24 +0800 Subject: [PATCH 2/5] code format --- vllm/transformers_utils/utils.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/vllm/transformers_utils/utils.py b/vllm/transformers_utils/utils.py index 729a5f449ed5a..ba943ac48b14e 100644 --- a/vllm/transformers_utils/utils.py +++ b/vllm/transformers_utils/utils.py @@ -1,9 +1,8 @@ from pathlib import Path + def check_gguf_file(model: Path) -> bool: - """ - Check if the file is a GGUF model and extract the file name - """ + """Check if the file is a GGUF model.""" if model.is_file(): with open(model, "rb") as f: header = f.read(4) From 95314a45367d7d304ad9d37189da867697dd8edb Mon Sep 17 00:00:00 2001 From: Isotr0py <2037008807@qq.com> Date: Sat, 31 Aug 2024 20:17:02 +0800 Subject: [PATCH 3/5] fix unmatch load format and quantization --- vllm/engine/arg_utils.py | 3 ++- vllm/transformers_utils/config.py | 4 ++-- vllm/transformers_utils/tokenizer.py | 2 +- vllm/transformers_utils/utils.py | 5 ++++- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index d98f57bc2d353..8dbe6504d21bd 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -16,6 +16,7 @@ from vllm.executor.executor_base import ExecutorBase from vllm.logger import init_logger from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS +from vllm.transformers_utils.utils import check_gguf_file from vllm.utils import FlexibleArgumentParser if TYPE_CHECKING: @@ -753,7 +754,7 @@ def from_cli_args(cls, args: argparse.Namespace): def create_engine_config(self) -> EngineConfig: # gguf file needs a specific model loader and doesn't use hf_repo - if self.model.endswith(".gguf"): + if check_gguf_file(self.model): self.quantization = self.load_format = "gguf" # bitsandbytes quantization needs a specific model loader diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index 83586a05b1232..dfe83ddb731d4 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -57,7 +57,7 @@ def get_config( ) -> PretrainedConfig: # Separate model folder from file path for GGUF models - is_gguf = check_gguf_file(Path(model)) + is_gguf = check_gguf_file(model) if is_gguf: kwargs["gguf_file"] = Path(model).name model = Path(model).parent @@ -113,7 +113,7 @@ def get_hf_image_processor_config( if VLLM_USE_MODELSCOPE: return dict() # Separate model folder from file path for GGUF models - if check_gguf_file(Path(model)): + if check_gguf_file(model): model = Path(model).parent return get_image_processor_config(model, revision=revision, **kwargs) diff --git a/vllm/transformers_utils/tokenizer.py b/vllm/transformers_utils/tokenizer.py index 5b7b3638ddece..f9fb8d1e103b7 100644 --- a/vllm/transformers_utils/tokenizer.py +++ b/vllm/transformers_utils/tokenizer.py @@ -97,7 +97,7 @@ def get_tokenizer( kwargs["truncation_side"] = "left" # Separate model folder from file path for GGUF models - is_gguf = check_gguf_file(Path(tokenizer_name)) + is_gguf = check_gguf_file(tokenizer_name) if is_gguf: kwargs["gguf_file"] = Path(tokenizer_name).name tokenizer_name = Path(tokenizer_name).parent diff --git a/vllm/transformers_utils/utils.py b/vllm/transformers_utils/utils.py index ba943ac48b14e..dc8d21161babb 100644 --- a/vllm/transformers_utils/utils.py +++ b/vllm/transformers_utils/utils.py @@ -1,8 +1,11 @@ +from os import PathLike from pathlib import Path +from typing import Union -def check_gguf_file(model: Path) -> bool: +def check_gguf_file(model: Union[str, PathLike]) -> bool: """Check if the file is a GGUF model.""" + model = Path(model) if model.is_file(): with open(model, "rb") as f: header = f.read(4) From a17a28bbf6c36af341b32465146b0d7a57b386c6 Mon Sep 17 00:00:00 2001 From: Isotr0py <2037008807@qq.com> Date: Sat, 31 Aug 2024 21:57:35 +0800 Subject: [PATCH 4/5] add optimisation as suggest --- vllm/transformers_utils/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vllm/transformers_utils/utils.py b/vllm/transformers_utils/utils.py index dc8d21161babb..1102bf4e22a64 100644 --- a/vllm/transformers_utils/utils.py +++ b/vllm/transformers_utils/utils.py @@ -6,7 +6,9 @@ def check_gguf_file(model: Union[str, PathLike]) -> bool: """Check if the file is a GGUF model.""" model = Path(model) - if model.is_file(): + if model.is_file() and model.suffix == ".gguf": + return True + elif model.is_file(): with open(model, "rb") as f: header = f.read(4) return header == b"GGUF" From cdb6f717398a44dfa7039233326a4fa17d1c8e6b Mon Sep 17 00:00:00 2001 From: Isotr0py <2037008807@qq.com> Date: Sat, 31 Aug 2024 22:03:17 +0800 Subject: [PATCH 5/5] add optimisation as suggest --- vllm/transformers_utils/utils.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/vllm/transformers_utils/utils.py b/vllm/transformers_utils/utils.py index 1102bf4e22a64..7a9041b04fbb9 100644 --- a/vllm/transformers_utils/utils.py +++ b/vllm/transformers_utils/utils.py @@ -6,10 +6,11 @@ def check_gguf_file(model: Union[str, PathLike]) -> bool: """Check if the file is a GGUF model.""" model = Path(model) - if model.is_file() and model.suffix == ".gguf": + if not model.is_file(): + return False + elif model.suffix == ".gguf": return True - elif model.is_file(): - with open(model, "rb") as f: - header = f.read(4) - return header == b"GGUF" - return False + + with open(model, "rb") as f: + header = f.read(4) + return header == b"GGUF"