Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions tensorrt_llm/_torch/peft/lora/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,6 @@ def forward(
module_idx = int(module_idx)
if module_idx in lora_params[layer_idx]:
active_lora_module_ids.append(module_idx)
# TODO (dafrimi): needs to pass this is_dora arg
lora_params[layer_idx][module_idx]['is_dora']
lora_ranks.append(
lora_params[layer_idx][module_idx]['adapter_size'])
lora_weight_pointers.append(
Expand Down
4 changes: 3 additions & 1 deletion tensorrt_llm/_torch/pyexecutor/llm_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,9 @@ def __init__(
self.py_decoding_iter = 0
self.is_attention_dp_dummy = False
self.is_cuda_graph_dummy = False
self.py_lora_task_layer_module_configs = None
self.py_lora_task_layer_module_configs: list[
tensorrt_llm.bindings.internal.runtime.
TaskLayerModuleConfig] | None = None

self.py_return_log_probs = return_log_probs
self.py_return_context_logits = return_context_logits
Expand Down
102 changes: 38 additions & 64 deletions tensorrt_llm/_torch/pyexecutor/model_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -1971,7 +1971,6 @@ def _get_lora_params_from_requests(self,
module_id: dict
{
adapter_size: torch tensor: int
is_dora: torch tensor: bool
weight_pointers: torch tensor: int64
}
}
Expand All @@ -1990,88 +1989,63 @@ def _get_lora_params_from_requests(self,
for module in request.py_lora_task_layer_module_configs:
module_id = module.module_id
layer_id = module.layer_id
adapter_size = module.adapter_size
is_dora = module.scaling_vec_pointer == 0
weights_in_pointer = module.weights_in_pointer
weights_out_pointer = module.weights_out_pointer
scaling_vec_pointer = module.scaling_vec_pointer
if weights_in_pointer is None:
weights_in_pointer = 0
if weights_out_pointer is None:
weights_out_pointer = 0
if scaling_vec_pointer is None:
scaling_vec_pointer = 0

if layer_id not in lora_params:
lora_params[layer_id] = {}
if module_id not in lora_params[layer_id]:
lora_params[layer_id][module_id] = {}

if 'adapter_size' not in lora_params[layer_id][module_id]:
lora_params[layer_id][module_id]['adapter_size'] = []
if 'is_dora' not in lora_params[layer_id][module_id]:
lora_params[layer_id][module_id]['is_dora'] = []
if 'weight_pointers' not in lora_params[layer_id][module_id]:
lora_params[layer_id][module_id]['weight_pointers'] = []

tmp_lora_params[
f'{request.py_request_id}_{layer_id}_{module_id}_adapter_size'] = [
adapter_size
]
tmp_lora_params[
f'{request.py_request_id}_{layer_id}_{module_id}_is_dora'] = [
is_dora
]
tmp_lora_params[
f'{request.py_request_id}_{layer_id}_{module_id}_weights_pointer'] = [
weights_in_pointer, weights_out_pointer,
scaling_vec_pointer
]
lora_params[layer_id][module_id] = {
'adapter_size': [],
'weight_pointers': [],
}

scaling_vec_pointer = module.scaling_vec_pointer
if scaling_vec_pointer is None:
scaling_vec_pointer = 0
tmp_lora_params[(request.py_request_id, layer_id,
module_id)] = {
'adapter_size': [module.adapter_size],
'weight_pointers': [
module.weights_in_pointer,
module.weights_out_pointer,
scaling_vec_pointer
],
}

for request in request_list:
# Need to set default values for this case
if request.py_lora_task_layer_module_configs is None:
for layer_id in lora_params:
for module_id in lora_params[layer_id]:
lora_params[layer_id][module_id]['adapter_size'].append(
0)
lora_params[layer_id][module_id]['is_dora'].append(
False)
lora_params[layer_id][module_id]['weight_pointers'] += [
0, 0, 0
]
current_lora_params = lora_params[layer_id][module_id]
current_lora_params['adapter_size'].append(0)
current_lora_params['weight_pointers'] += [0, 0, 0]

else:
for layer_id in lora_params:
for module_id in lora_params[layer_id]:
if f'{request.py_request_id}_{layer_id}_{module_id}_adapter_size' not in tmp_lora_params:
lora_params[layer_id][module_id][
'adapter_size'].append(0)
lora_params[layer_id][module_id]['is_dora'].append(
False)
lora_params[layer_id][module_id][
'weight_pointers'] += [0, 0, 0]
current_tmp_lora_params = tmp_lora_params.get(
(request.py_request_id, layer_id, module_id), None)
current_lora_params = lora_params[layer_id][module_id]
if current_tmp_lora_params is None:
current_lora_params['adapter_size'].append(0)
current_lora_params['weight_pointers'] += [0, 0, 0]
else:
lora_params[layer_id][module_id][
'adapter_size'] += tmp_lora_params[
f'{request.py_request_id}_{layer_id}_{module_id}_adapter_size']
lora_params[layer_id][module_id][
'is_dora'] += tmp_lora_params[
f'{request.py_request_id}_{layer_id}_{module_id}_is_dora']
lora_params[layer_id][module_id][
'weight_pointers'] += tmp_lora_params[
f'{request.py_request_id}_{layer_id}_{module_id}_weights_pointer']
current_lora_params[
'adapter_size'] += current_tmp_lora_params[
'adapter_size']
current_lora_params[
'weight_pointers'] += current_tmp_lora_params[
'weight_pointers']

for layer_id in lora_params:
for module_id in lora_params[layer_id]:
lora_params[layer_id][module_id][
'adapter_size'] = torch.IntTensor(
lora_params[layer_id][module_id]['adapter_size'])
lora_params[layer_id][module_id][
'weight_pointers'] = torch.LongTensor(
lora_params[layer_id][module_id]['weight_pointers'])
current_lora_params = lora_params[layer_id][module_id]
current_lora_params['adapter_size'] = torch.IntTensor(
current_lora_params['adapter_size'])
current_lora_params['weight_pointers'] = torch.LongTensor(
current_lora_params['weight_pointers'])

if bool(lora_params):
if lora_params:
lora_params['host_request_types'] = attn_metadata.host_request_types
lora_params['prompt_lens_cpu'] = attn_metadata.prompt_lens_cpu
lora_params['num_seqs'] = attn_metadata.num_seqs
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -419,31 +419,23 @@ def test_lora_attention(self):
lora_params['lora_ranks'],
'weight_pointers':
lora_params['lora_weights_pointers'],
'is_dora':
False,
},
LoraModuleType.ATTENTION_K: {
'adapter_size':
lora_params['lora_ranks'],
'weight_pointers': lora_params['lora_weights_pointers'],
'is_dora':
False,
},
LoraModuleType.ATTENTION_V: {
'adapter_size':
lora_params['lora_ranks'],
'weight_pointers':
lora_params['lora_weights_pointers'],
'is_dora':
False,
},
LoraModuleType.ATTENTION_DENSE: {
'adapter_size':
lora_params['lora_ranks'],
'weight_pointers':
lora_params['lora_weights_pointers'],
'is_dora':
False,
}
}
}
Expand Down