From fb756c8be49695f000896756dddef97e52973a33 Mon Sep 17 00:00:00 2001
From: Isotr0py <2037008807@qq.com>
Date: Sat, 31 Aug 2024 19:05:58 +0800
Subject: [PATCH 1/5] accept gguf model without .gguf extension

---
 vllm/transformers_utils/config.py    |  5 +++--
 vllm/transformers_utils/tokenizer.py |  4 ++--
 vllm/transformers_utils/utils.py     | 11 +++++++++++
 3 files changed, 16 insertions(+), 4 deletions(-)
 create mode 100644 vllm/transformers_utils/utils.py

diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py
index f3ac8d3178d4e..83586a05b1232 100644
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@@ -16,6 +16,7 @@
                                              MedusaConfig, MLPSpeculatorConfig,
                                              MPTConfig, NemotronConfig,
                                              RWConfig, UltravoxConfig)
+from vllm.transformers_utils.utils import check_gguf_file
 
 if VLLM_USE_MODELSCOPE:
     from modelscope import AutoConfig
@@ -56,7 +57,7 @@ def get_config(
 ) -> PretrainedConfig:
 
     # Separate model folder from file path for GGUF models
-    is_gguf = Path(model).is_file() and Path(model).suffix == ".gguf"
+    is_gguf = check_gguf_file(Path(model))
     if is_gguf:
         kwargs["gguf_file"] = Path(model).name
         model = Path(model).parent
@@ -112,7 +113,7 @@ def get_hf_image_processor_config(
     if VLLM_USE_MODELSCOPE:
         return dict()
     # Separate model folder from file path for GGUF models
-    if Path(model).is_file() and Path(model).suffix == ".gguf":
+    if check_gguf_file(Path(model)):
         model = Path(model).parent
     return get_image_processor_config(model, revision=revision, **kwargs)
 
diff --git a/vllm/transformers_utils/tokenizer.py b/vllm/transformers_utils/tokenizer.py
index 2866975850db3..5b7b3638ddece 100644
--- a/vllm/transformers_utils/tokenizer.py
+++ b/vllm/transformers_utils/tokenizer.py
@@ -12,6 +12,7 @@
 from vllm.lora.request import LoRARequest
 from vllm.transformers_utils.tokenizers import (BaichuanTokenizer,
                                                 MistralTokenizer)
+from vllm.transformers_utils.utils import check_gguf_file
 from vllm.utils import make_async
 
 logger = init_logger(__name__)
@@ -96,8 +97,7 @@ def get_tokenizer(
         kwargs["truncation_side"] = "left"
 
     # Separate model folder from file path for GGUF models
-    is_gguf = Path(tokenizer_name).is_file() and Path(
-        tokenizer_name).suffix == ".gguf"
+    is_gguf = check_gguf_file(Path(tokenizer_name))
     if is_gguf:
         kwargs["gguf_file"] = Path(tokenizer_name).name
         tokenizer_name = Path(tokenizer_name).parent
diff --git a/vllm/transformers_utils/utils.py b/vllm/transformers_utils/utils.py
new file mode 100644
index 0000000000000..729a5f449ed5a
--- /dev/null
+++ b/vllm/transformers_utils/utils.py
@@ -0,0 +1,11 @@
+from pathlib import Path
+
+def check_gguf_file(model: Path) -> bool:
+    """
+    Check if the file is a GGUF model and extract the file name
+    """
+    if model.is_file():
+        with open(model, "rb") as f:
+            header = f.read(4)
+        return header == b"GGUF"
+    return False

From 8d73c77b10573abb663205e836d955bccdd68fbf Mon Sep 17 00:00:00 2001
From: Isotr0py <2037008807@qq.com>
Date: Sat, 31 Aug 2024 19:09:24 +0800
Subject: [PATCH 2/5] code format

---
 vllm/transformers_utils/utils.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/vllm/transformers_utils/utils.py b/vllm/transformers_utils/utils.py
index 729a5f449ed5a..ba943ac48b14e 100644
--- a/vllm/transformers_utils/utils.py
+++ b/vllm/transformers_utils/utils.py
@@ -1,9 +1,8 @@
 from pathlib import Path
 
+
 def check_gguf_file(model: Path) -> bool:
-    """
-    Check if the file is a GGUF model and extract the file name
-    """
+    """Check if the file is a GGUF model."""
     if model.is_file():
         with open(model, "rb") as f:
             header = f.read(4)

From 95314a45367d7d304ad9d37189da867697dd8edb Mon Sep 17 00:00:00 2001
From: Isotr0py <2037008807@qq.com>
Date: Sat, 31 Aug 2024 20:17:02 +0800
Subject: [PATCH 3/5] fix unmatch load format and quantization

---
 vllm/engine/arg_utils.py             | 3 ++-
 vllm/transformers_utils/config.py    | 4 ++--
 vllm/transformers_utils/tokenizer.py | 2 +-
 vllm/transformers_utils/utils.py     | 5 ++++-
 4 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index d98f57bc2d353..8dbe6504d21bd 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -16,6 +16,7 @@
 from vllm.executor.executor_base import ExecutorBase
 from vllm.logger import init_logger
 from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
+from vllm.transformers_utils.utils import check_gguf_file
 from vllm.utils import FlexibleArgumentParser
 
 if TYPE_CHECKING:
@@ -753,7 +754,7 @@ def from_cli_args(cls, args: argparse.Namespace):
 
     def create_engine_config(self) -> EngineConfig:
         # gguf file needs a specific model loader and doesn't use hf_repo
-        if self.model.endswith(".gguf"):
+        if check_gguf_file(self.model):
             self.quantization = self.load_format = "gguf"
 
         # bitsandbytes quantization needs a specific model loader
diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py
index 83586a05b1232..dfe83ddb731d4 100644
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@@ -57,7 +57,7 @@ def get_config(
 ) -> PretrainedConfig:
 
     # Separate model folder from file path for GGUF models
-    is_gguf = check_gguf_file(Path(model))
+    is_gguf = check_gguf_file(model)
     if is_gguf:
         kwargs["gguf_file"] = Path(model).name
         model = Path(model).parent
@@ -113,7 +113,7 @@ def get_hf_image_processor_config(
     if VLLM_USE_MODELSCOPE:
         return dict()
     # Separate model folder from file path for GGUF models
-    if check_gguf_file(Path(model)):
+    if check_gguf_file(model):
         model = Path(model).parent
     return get_image_processor_config(model, revision=revision, **kwargs)
 
diff --git a/vllm/transformers_utils/tokenizer.py b/vllm/transformers_utils/tokenizer.py
index 5b7b3638ddece..f9fb8d1e103b7 100644
--- a/vllm/transformers_utils/tokenizer.py
+++ b/vllm/transformers_utils/tokenizer.py
@@ -97,7 +97,7 @@ def get_tokenizer(
         kwargs["truncation_side"] = "left"
 
     # Separate model folder from file path for GGUF models
-    is_gguf = check_gguf_file(Path(tokenizer_name))
+    is_gguf = check_gguf_file(tokenizer_name)
     if is_gguf:
         kwargs["gguf_file"] = Path(tokenizer_name).name
         tokenizer_name = Path(tokenizer_name).parent
diff --git a/vllm/transformers_utils/utils.py b/vllm/transformers_utils/utils.py
index ba943ac48b14e..dc8d21161babb 100644
--- a/vllm/transformers_utils/utils.py
+++ b/vllm/transformers_utils/utils.py
@@ -1,8 +1,11 @@
+from os import PathLike
 from pathlib import Path
+from typing import Union
 
 
-def check_gguf_file(model: Path) -> bool:
+def check_gguf_file(model: Union[str, PathLike]) -> bool:
     """Check if the file is a GGUF model."""
+    model = Path(model)
     if model.is_file():
         with open(model, "rb") as f:
             header = f.read(4)

From a17a28bbf6c36af341b32465146b0d7a57b386c6 Mon Sep 17 00:00:00 2001
From: Isotr0py <2037008807@qq.com>
Date: Sat, 31 Aug 2024 21:57:35 +0800
Subject: [PATCH 4/5] add optimisation as suggest

---
 vllm/transformers_utils/utils.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/vllm/transformers_utils/utils.py b/vllm/transformers_utils/utils.py
index dc8d21161babb..1102bf4e22a64 100644
--- a/vllm/transformers_utils/utils.py
+++ b/vllm/transformers_utils/utils.py
@@ -6,7 +6,9 @@
 def check_gguf_file(model: Union[str, PathLike]) -> bool:
     """Check if the file is a GGUF model."""
     model = Path(model)
-    if model.is_file():
+    if model.is_file() and model.suffix == ".gguf":
+        return True
+    elif model.is_file():
         with open(model, "rb") as f:
             header = f.read(4)
         return header == b"GGUF"

From cdb6f717398a44dfa7039233326a4fa17d1c8e6b Mon Sep 17 00:00:00 2001
From: Isotr0py <2037008807@qq.com>
Date: Sat, 31 Aug 2024 22:03:17 +0800
Subject: [PATCH 5/5] add optimisation as suggest

---
 vllm/transformers_utils/utils.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/vllm/transformers_utils/utils.py b/vllm/transformers_utils/utils.py
index 1102bf4e22a64..7a9041b04fbb9 100644
--- a/vllm/transformers_utils/utils.py
+++ b/vllm/transformers_utils/utils.py
@@ -6,10 +6,11 @@
 def check_gguf_file(model: Union[str, PathLike]) -> bool:
     """Check if the file is a GGUF model."""
     model = Path(model)
-    if model.is_file() and model.suffix == ".gguf":
+    if not model.is_file():
+        return False
+    elif model.suffix == ".gguf":
         return True
-    elif model.is_file():
-        with open(model, "rb") as f:
-            header = f.read(4)
-        return header == b"GGUF"
-    return False
+
+    with open(model, "rb") as f:
+        header = f.read(4)
+    return header == b"GGUF"