From 84bee4bd5c41896d626186c9265f30824b928f7a Mon Sep 17 00:00:00 2001 From: Jee Jee Li Date: Thu, 23 Jan 2025 00:56:54 +0800 Subject: [PATCH] [Misc] Improve the readability of BNB error messages (#12320) Signed-off-by: Jee Jee Li --- vllm/model_executor/model_loader/loader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/model_loader/loader.py b/vllm/model_executor/model_loader/loader.py index f697c3245f098..e9779878710ee 100644 --- a/vllm/model_executor/model_loader/loader.py +++ b/vllm/model_executor/model_loader/loader.py @@ -1076,8 +1076,8 @@ def _load_weights(self, model_config: ModelConfig, # weight tensor. So TP does not work with pre_quantized bnb models. if pre_quant and get_tensor_model_parallel_world_size() > 1: raise ValueError( - "Prequant BitsAndBytes models with TP is not supported." - "Please try with PP.") + "Prequant BitsAndBytes models with tensor parallelism is not " + "supported. Please try with pipeline parallelism.") load_8bit = False if pre_quant: