vllm-project · yewentao256 · Nov 18, 2025 · Nov 18, 2025 · Nov 19, 2025 · njhill
@@ -872,8 +872,10 @@ def get_moe_configs(
     for config_file_path in config_file_paths:
         if os.path.exists(config_file_path):
             with open(config_file_path) as f:
-                logger.info(
-                    "Using configuration from %s for MoE layer.", config_file_path
+                logger.info_once(
+                    "Using configuration from %s for MoE layer.",
+                    config_file_path,
+                    scope="global",
                 )
                 # If a configuration has been found, return it
                 tuned_config = json.load(f)

@@ -162,9 +162,11 @@ def get_fp8_moe_backend(block_quant: bool) -> Fp8MoeBackend:
     # deepGEMM on supported platforms with block-quantized weights
     if envs.VLLM_USE_DEEP_GEMM and envs.VLLM_MOE_USE_DEEP_GEMM and block_quant:
         if not has_deep_gemm():
-            logger.warning_once("DeepGEMM backend requested but not available.")
+            logger.warning_once(
+                "DeepGEMM backend requested but not available.", scope="local"
+            )
         elif is_deep_gemm_supported():
-            logger.info_once("Using DeepGEMM backend for FP8 MoE")
+            logger.info_once("Using DeepGEMM backend for FP8 MoE", scope="local")
             return Fp8MoeBackend.DEEPGEMM
 
     # CUTLASS BlockScaled GroupedGemm on SM100 with block-quantized weights
@@ -173,7 +175,9 @@ def get_fp8_moe_backend(block_quant: bool) -> Fp8MoeBackend:
         and current_platform.is_device_capability(100)
         and block_quant
     ):
-        logger.info_once("Using Cutlass BlockScaled GroupedGemm backend for FP8 MoE")
+        logger.info_once(
+            "Using Cutlass BlockScaled GroupedGemm backend for FP8 MoE", scope="local"
+        )
         return Fp8MoeBackend.CUTLASS_BLOCK_SCALED_GROUPED_GEMM
 
     # default to Triton

@@ -1231,10 +1231,11 @@ def _report_kv_cache_config(
     max_concurrency = get_max_concurrency_for_kv_cache_config(
         vllm_config, kv_cache_config
     )
-    logger.info(
+    logger.info_once(
         "Maximum concurrency for %s tokens per request: %.2fx",
         max_model_len_str,
         max_concurrency,
+        scope="local",
     )
 
 

diff --git a/vllm/v1/worker/gpu_worker.py b/vllm/v1/worker/gpu_worker.py
@@ -91,18 +91,19 @@ def __init__(
         if envs.VLLM_TORCH_PROFILER_DIR:
             torch_profiler_trace_dir = envs.VLLM_TORCH_PROFILER_DIR
             worker_name = f"{vllm_config.instance_id}-rank-{self.rank}"
-            logger.info(
-                "Profiling enabled. Traces will be saved to: %s",
-                torch_profiler_trace_dir,
-            )
-            logger.debug(
-                "Profiler config: record_shapes=%s,"
-                "profile_memory=%s,with_stack=%s,with_flops=%s",
-                envs.VLLM_TORCH_PROFILER_RECORD_SHAPES,
-                envs.VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY,
-                envs.VLLM_TORCH_PROFILER_WITH_STACK,
-                envs.VLLM_TORCH_PROFILER_WITH_FLOPS,
-            )
+            if getattr(self.parallel_config, "data_parallel_rank", 0) == 0:
-            if getattr(self.parallel_config, "data_parallel_rank", 0) == 0:
+            if self.parallel_config.data_parallel_rank_local in (None, 0):
-            if getattr(self.parallel_config, "data_parallel_rank", 0) == 0:
+            if self.parallel_config.data_parallel_rank_local in (None, 0):
+                logger.info(
+                    "Profiling enabled. Traces will be saved to: %s",
+                    torch_profiler_trace_dir,
+                )
+                logger.debug(
+                    "Profiler config: record_shapes=%s,"
+                    "profile_memory=%s,with_stack=%s,with_flops=%s",
+                    envs.VLLM_TORCH_PROFILER_RECORD_SHAPES,
+                    envs.VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY,
+                    envs.VLLM_TORCH_PROFILER_WITH_STACK,
+                    envs.VLLM_TORCH_PROFILER_WITH_FLOPS,
+                )
             self.profiler = torch.profiler.profile(
                 activities=[
                     torch.profiler.ProfilerActivity.CPU,