diff --git a/vllm/lora/layers.py b/vllm/lora/layers.py index 85164c2165a3..102e40d3f448 100644 --- a/vllm/lora/layers.py +++ b/vllm/lora/layers.py @@ -479,7 +479,7 @@ def slice_lora_b(self, lora_b: torch.Tensor) -> torch.Tensor: # ColumnParallelLinear. else: tensor_model_parallel_rank = get_tensor_model_parallel_rank() - shard_size = self.output_dim + shard_size = self.output_size start_idx = tensor_model_parallel_rank * shard_size end_idx = (tensor_model_parallel_rank + 1) * shard_size lora_b = lora_b[:, start_idx:end_idx] @@ -490,7 +490,7 @@ def slice_bias(self, bias: torch.Tensor) -> torch.Tensor: if bias is None: return bias tensor_model_parallel_rank = get_tensor_model_parallel_rank() - shard_size = self.output_dim + shard_size = self.output_size start_idx = tensor_model_parallel_rank * shard_size end_idx = (tensor_model_parallel_rank + 1) * shard_size bias = bias[start_idx:end_idx]