Skip to content

Commit 1bb5fb8

Browse files
committed
Fix nsys capturing
1 parent c878b84 commit 1bb5fb8

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

examples/disaggregated/slurm/disagg_profiler/job_manager.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -672,7 +672,7 @@ def _build_server_launch_command(self, config, hostnames, gpu_indices,
672672
envs += " TLLM_NVTX_DEBUG=1"
673673
if 'TLLM_PROFILE_START_STOP' in config['nsys']:
674674
envs += f" TLLM_PROFILE_START_STOP={config['nsys']['TLLM_PROFILE_START_STOP']}"
675-
nsys_file = os.path.join(self.output_folder, f"{log_file}.nsys-rep")
675+
nsys_file = os.path.join(self.output_folder, f"{log_file}.$SLURM_PROCID.nsys-rep")
676676
nsys_prefix = f"nsys profile -e \"NSYS_MPI_STORE_TEAMS_PER_RANK=1\" -o {nsys_file} -f true -t cuda,nvtx,python-gil -c cudaProfilerApi --cuda-graph-trace node --capture-range-end=stop --gpu-metrics-devices=none"
677677
log_file = os.path.join(self.output_folder, log_file)
678678

0 commit comments

Comments
 (0)