Skip to content

Commit d8803d1

Browse files
nataxcantdoublep
authored andcommitted
bug fix (repositioning function now correct)
Signed-off-by: Nathan Ordonez <[email protected]>
1 parent dac47a6 commit d8803d1

File tree

1 file changed

+4
-5
lines changed

1 file changed

+4
-5
lines changed

vllm/v1/worker/gpu_model_runner.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1610,16 +1610,15 @@ def _perform_repositioning(self,
16101610
self._repositionings_handler(blocks_to_reposition)
16111611
else:
16121612
bs = 400
1613-
for i in range(len(blocks_to_reposition) // bs):
1614-
j = bs if i + bs * 2 < len(
1615-
blocks_to_reposition) else i + bs * 2
1616-
repo_batch = blocks_to_reposition[i:j]
1613+
for i in range(0, len(blocks_to_reposition), bs):
1614+
repo_batch = blocks_to_reposition[i:i+bs]
16171615
self._repositionings_handler(repo_batch)
16181616
if envs.VLLM_V1_SPANS_DEBUG and repo_count > 0:
16191617
torch.cuda.synchronize()
16201618
t_repo = time.time() - ts_repo
16211619
print(f'[SPANS -> gpu_model_runner] repositioning' \
1622-
f' speed: {repo_count/t_repo:.2f} (blocks/s)')
1620+
f' speed: {repo_count/t_repo:.2f} (blocks/s)'\
1621+
f' (total {repo_count})')
16231622

16241623
@torch.inference_mode()
16251624
def _repositionings_handler(self, blocks_to_reposition):

0 commit comments

Comments
 (0)