ModelCloud · Qubitium · Dec 16, 2024 · Dec 16, 2024
diff --git a/gptqmodel/models/loader.py b/gptqmodel/models/loader.py
@@ -227,7 +227,7 @@ def from_quantized(
             **cached_file_kwargs,
         )
 
-        if torch_dtype is None or torch_dtype == "auto":
+        if torch_dtype is None or torch_dtype == "auto" or device == DEVICE.XPU:
             # TODO FIX ME for `dynamic`, non-quantized modules should be in native type
             torch_dtype = torch.float16
             # auto_dtype_from_config(config=config, device=device, device_map=device_map)