From fc6c520cb1fc19bd33342d5d0b713d365a2d513e Mon Sep 17 00:00:00 2001
From: Didier Durand <durand.didier@gmail.com>
Date: Tue, 2 Sep 2025 18:32:01 +0200
Subject: [PATCH] [Doc]: fix typos in Python comments

Signed-off-by: Didier Durand <durand.didier@gmail.com>
---
 .buildkite/nightly-benchmarks/scripts/compare-json-results.py | 2 +-
 benchmarks/benchmark_serving.py                               | 2 +-
 benchmarks/benchmark_serving_structured_output.py             | 2 +-
 benchmarks/benchmark_throughput.py                            | 2 +-
 tools/profiler/visualize_layerwise_profile.py                 | 2 +-
 vllm/compilation/collective_fusion.py                         | 2 +-
 vllm/engine/multiprocessing/engine.py                         | 2 +-
 vllm/model_executor/layers/quantization/utils/w8a8_utils.py   | 2 +-
 vllm/model_executor/model_loader/default_loader.py            | 2 +-
 vllm/v1/worker/xpu_worker.py                                  | 2 +-
 vllm/worker/worker.py                                         | 2 +-
 11 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/.buildkite/nightly-benchmarks/scripts/compare-json-results.py b/.buildkite/nightly-benchmarks/scripts/compare-json-results.py
index 50431d0cd4c5..5ea5a50a258a 100644
--- a/.buildkite/nightly-benchmarks/scripts/compare-json-results.py
+++ b/.buildkite/nightly-benchmarks/scripts/compare-json-results.py
@@ -218,7 +218,7 @@ def split_json_by_tp_pp(
         "--xaxis",
         type=str,
         default="# of max concurrency.",
-        help="column name to use as X Axis in comparision graph",
+        help="column name to use as X Axis in comparison graph",
     )
     args = parser.parse_args()
 
diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py
index 02f5f585c0c1..934df05efac1 100644
--- a/benchmarks/benchmark_serving.py
+++ b/benchmarks/benchmark_serving.py
@@ -1104,7 +1104,7 @@ def create_argument_parser():
         "--percentile-metrics",
         type=str,
         default="ttft,tpot,itl",
-        help="Comma-separated list of selected metrics to report percentils. "
+        help="Comma-separated list of selected metrics to report percentiles. "
         "This argument specifies the metrics to report percentiles. "
         'Allowed metric names are "ttft", "tpot", "itl", "e2el". '
         'Default value is "ttft,tpot,itl".',
diff --git a/benchmarks/benchmark_serving_structured_output.py b/benchmarks/benchmark_serving_structured_output.py
index ca6843a72aa3..4aae755eb4e4 100644
--- a/benchmarks/benchmark_serving_structured_output.py
+++ b/benchmarks/benchmark_serving_structured_output.py
@@ -998,7 +998,7 @@ def create_argument_parser():
         "--percentile-metrics",
         type=str,
         default="ttft,tpot,itl",
-        help="Comma-separated list of selected metrics to report percentils. "
+        help="Comma-separated list of selected metrics to report percentiles. "
         "This argument specifies the metrics to report percentiles. "
         'Allowed metric names are "ttft", "tpot", "itl", "e2el". '
         'Default value is "ttft,tpot,itl".',
diff --git a/benchmarks/benchmark_throughput.py b/benchmarks/benchmark_throughput.py
index 6b24b8c8f3c6..34a525f00d91 100644
--- a/benchmarks/benchmark_throughput.py
+++ b/benchmarks/benchmark_throughput.py
@@ -719,7 +719,7 @@ def create_argument_parser():
         "[length * (1 - range_ratio), length * (1 + range_ratio)].",
     )
 
-    # hf dtaset
+    # hf dataset
     parser.add_argument(
         "--hf-subset", type=str, default=None, help="Subset of the HF dataset."
     )
diff --git a/tools/profiler/visualize_layerwise_profile.py b/tools/profiler/visualize_layerwise_profile.py
index 038d3c44f043..30d6547073d3 100644
--- a/tools/profiler/visualize_layerwise_profile.py
+++ b/tools/profiler/visualize_layerwise_profile.py
@@ -119,7 +119,7 @@ def all_the_same(items) -> bool:
              if not all_the_same(trace_eles)), None)
 
         if first_trace_difference is None:
-            # can't create a unique name, leave them names as the
+            # can't create a unique name, leave the names as they
             # are they will get aggregated by the pivot_table call
             continue
 
diff --git a/vllm/compilation/collective_fusion.py b/vllm/compilation/collective_fusion.py
index 7a99aaff707d..71274420c342 100644
--- a/vllm/compilation/collective_fusion.py
+++ b/vllm/compilation/collective_fusion.py
@@ -513,7 +513,7 @@ def call_trtllm_fused_allreduce_norm(
                         torch.ops._C.static_scaled_fp8_quant(
                             quant_out, norm_out, scale_factor)
             if scale_factor is None or norm_out is not None:
-                # we need to return allreduce outpput
+                # we need to return allreduce output
                 # in cases of non quant fused AR + RMS norm
                 # and fused AR + RMS norm + quant without fused add
                 allreduce_in.copy_(allreduce_out)
diff --git a/vllm/engine/multiprocessing/engine.py b/vllm/engine/multiprocessing/engine.py
index 343b8df7e87b..138283d4c8a7 100644
--- a/vllm/engine/multiprocessing/engine.py
+++ b/vllm/engine/multiprocessing/engine.py
@@ -49,7 +49,7 @@ class MQLLMEngine:
 
     This class is used to wrap the
     [`LLMEngine`][vllm.engine.llm_engine.LLMEngine] class to enable use
-    in concurrnet manner. It runs a background loop and uses zeromq to
+    in concurrent manner. It runs a background loop and uses zeromq to
     receive new requests and stream outputs incrementally via ipc.
 
     The [`LLMEngine`][vllm.engine.llm_engine.LLMEngine] generate or encode
diff --git a/vllm/model_executor/layers/quantization/utils/w8a8_utils.py b/vllm/model_executor/layers/quantization/utils/w8a8_utils.py
index 5333bbd310ff..ecdcc573935c 100644
--- a/vllm/model_executor/layers/quantization/utils/w8a8_utils.py
+++ b/vllm/model_executor/layers/quantization/utils/w8a8_utils.py
@@ -23,7 +23,7 @@
 # The condition to determine if it is on a platform that supports
 # torch._scaled_mm rowwise feature.
 # The condition is determined once as the operations
-# are time consuming.
+# are time-consuming.
 USE_ROWWISE_TORCH_SCALED_MM = (current_platform.is_rocm() and version.parse(
     torch.__version__) >= version.parse("2.7")
                                and current_platform.has_device_capability(94))
diff --git a/vllm/model_executor/model_loader/default_loader.py b/vllm/model_executor/model_loader/default_loader.py
index 34b8d8e4ed62..1e5aa9e571ed 100644
--- a/vllm/model_executor/model_loader/default_loader.py
+++ b/vllm/model_executor/model_loader/default_loader.py
@@ -211,7 +211,7 @@ def _get_weights_iterator(
 
             if not USE_TPU_COMMONS:
                 # In PyTorch XLA, we should call `xm.mark_step`
-                # requently so that not too many ops are accumulated
+                # frequently so that not too many ops are accumulated
                 # in the XLA program. import torch_xla.core.xla_model
                 # as xm
                 import torch_xla.core.xla_model as xm
diff --git a/vllm/v1/worker/xpu_worker.py b/vllm/v1/worker/xpu_worker.py
index 17288cda8ecc..7355206f30f5 100644
--- a/vllm/v1/worker/xpu_worker.py
+++ b/vllm/v1/worker/xpu_worker.py
@@ -84,7 +84,7 @@ def determine_available_memory(self) -> int:
         """Profiles the peak memory usage of the model to determine how many
         KV blocks may be allocated without OOMs.
         The engine will first conduct a profiling of the existing memory usage.
-        Then, it calculate the maximum possible number of GPU and CPU blocks
+        Then, it calculates the maximum possible number of GPU and CPU blocks
         that can be allocated with the remaining free memory.
         .. tip::
             You may limit the usage of GPU memory
diff --git a/vllm/worker/worker.py b/vllm/worker/worker.py
index 2e20c89c632c..e2cfeb1b7ba2 100644
--- a/vllm/worker/worker.py
+++ b/vllm/worker/worker.py
@@ -232,7 +232,7 @@ def determine_num_available_blocks(self) -> Tuple[int, int]:
         KV blocks may be allocated without OOMs.
 
         The engine will first conduct a profiling of the existing memory usage.
-        Then, it calculate the maximum possible number of GPU and CPU blocks
+        Then, it calculates the maximum possible number of GPU and CPU blocks
         that can be allocated with the remaining free memory.
 
         Tip: