From 61fed92c7e646d6f2ec5d9de54568a860870e6a4 Mon Sep 17 00:00:00 2001 From: ZincCat <52513999+zinccat@users.noreply.github.com> Date: Fri, 3 Jan 2025 13:02:34 -0800 Subject: [PATCH] [Bugfix] Fix ColumnParallelLinearWithLoRA slice (#11708) Signed-off-by: ZincCat --- vllm/lora/layers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/lora/layers.py b/vllm/lora/layers.py index 85164c2165a3c..102e40d3f448d 100644 --- a/vllm/lora/layers.py +++ b/vllm/lora/layers.py @@ -479,7 +479,7 @@ def slice_lora_b(self, lora_b: torch.Tensor) -> torch.Tensor: # ColumnParallelLinear. else: tensor_model_parallel_rank = get_tensor_model_parallel_rank() - shard_size = self.output_dim + shard_size = self.output_size start_idx = tensor_model_parallel_rank * shard_size end_idx = (tensor_model_parallel_rank + 1) * shard_size lora_b = lora_b[:, start_idx:end_idx] @@ -490,7 +490,7 @@ def slice_bias(self, bias: torch.Tensor) -> torch.Tensor: if bias is None: return bias tensor_model_parallel_rank = get_tensor_model_parallel_rank() - shard_size = self.output_dim + shard_size = self.output_size start_idx = tensor_model_parallel_rank * shard_size end_idx = (tensor_model_parallel_rank + 1) * shard_size bias = bias[start_idx:end_idx]