Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions vllm/model_executor/layers/fused_moe/fused_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -872,8 +872,10 @@ def get_moe_configs(
for config_file_path in config_file_paths:
if os.path.exists(config_file_path):
with open(config_file_path) as f:
logger.info(
"Using configuration from %s for MoE layer.", config_file_path
logger.info_once(
"Using configuration from %s for MoE layer.",
config_file_path,
scope="global",
)
# If a configuration has been found, return it
tuned_config = json.load(f)
Expand Down
10 changes: 7 additions & 3 deletions vllm/model_executor/layers/quantization/fp8.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,9 +162,11 @@ def get_fp8_moe_backend(block_quant: bool) -> Fp8MoeBackend:
# deepGEMM on supported platforms with block-quantized weights
if envs.VLLM_USE_DEEP_GEMM and envs.VLLM_MOE_USE_DEEP_GEMM and block_quant:
if not has_deep_gemm():
logger.warning_once("DeepGEMM backend requested but not available.")
logger.warning_once(
"DeepGEMM backend requested but not available.", scope="local"
)
elif is_deep_gemm_supported():
logger.info_once("Using DeepGEMM backend for FP8 MoE")
logger.info_once("Using DeepGEMM backend for FP8 MoE", scope="local")
return Fp8MoeBackend.DEEPGEMM

# CUTLASS BlockScaled GroupedGemm on SM100 with block-quantized weights
Expand All @@ -173,7 +175,9 @@ def get_fp8_moe_backend(block_quant: bool) -> Fp8MoeBackend:
and current_platform.is_device_capability(100)
and block_quant
):
logger.info_once("Using Cutlass BlockScaled GroupedGemm backend for FP8 MoE")
logger.info_once(
"Using Cutlass BlockScaled GroupedGemm backend for FP8 MoE", scope="local"
)
return Fp8MoeBackend.CUTLASS_BLOCK_SCALED_GROUPED_GEMM

# default to Triton
Expand Down
3 changes: 2 additions & 1 deletion vllm/v1/core/kv_cache_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1231,10 +1231,11 @@ def _report_kv_cache_config(
max_concurrency = get_max_concurrency_for_kv_cache_config(
vllm_config, kv_cache_config
)
logger.info(
logger.info_once(
"Maximum concurrency for %s tokens per request: %.2fx",
max_model_len_str,
max_concurrency,
scope="local",
)


Expand Down
25 changes: 13 additions & 12 deletions vllm/v1/worker/gpu_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,18 +91,19 @@ def __init__(
if envs.VLLM_TORCH_PROFILER_DIR:
torch_profiler_trace_dir = envs.VLLM_TORCH_PROFILER_DIR
worker_name = f"{vllm_config.instance_id}-rank-{self.rank}"
logger.info(
"Profiling enabled. Traces will be saved to: %s",
torch_profiler_trace_dir,
)
logger.debug(
"Profiler config: record_shapes=%s,"
"profile_memory=%s,with_stack=%s,with_flops=%s",
envs.VLLM_TORCH_PROFILER_RECORD_SHAPES,
envs.VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY,
envs.VLLM_TORCH_PROFILER_WITH_STACK,
envs.VLLM_TORCH_PROFILER_WITH_FLOPS,
)
if getattr(self.parallel_config, "data_parallel_rank", 0) == 0:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we need to use get_attr here since it should always be an attribute.

Also we should maybe use data_parallel_rank_local instead so that this is logged in each node?
e.g.

Suggested change
if getattr(self.parallel_config, "data_parallel_rank", 0) == 0:
if self.parallel_config.data_parallel_rank_local in (None, 0):

logger.info(
"Profiling enabled. Traces will be saved to: %s",
torch_profiler_trace_dir,
)
logger.debug(
"Profiler config: record_shapes=%s,"
"profile_memory=%s,with_stack=%s,with_flops=%s",
envs.VLLM_TORCH_PROFILER_RECORD_SHAPES,
envs.VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY,
envs.VLLM_TORCH_PROFILER_WITH_STACK,
envs.VLLM_TORCH_PROFILER_WITH_FLOPS,
)
self.profiler = torch.profiler.profile(
activities=[
torch.profiler.ProfilerActivity.CPU,
Expand Down
Loading