From fc6c520cb1fc19bd33342d5d0b713d365a2d513e Mon Sep 17 00:00:00 2001 From: Didier Durand Date: Tue, 2 Sep 2025 18:32:01 +0200 Subject: [PATCH] [Doc]: fix typos in Python comments Signed-off-by: Didier Durand --- .buildkite/nightly-benchmarks/scripts/compare-json-results.py | 2 +- benchmarks/benchmark_serving.py | 2 +- benchmarks/benchmark_serving_structured_output.py | 2 +- benchmarks/benchmark_throughput.py | 2 +- tools/profiler/visualize_layerwise_profile.py | 2 +- vllm/compilation/collective_fusion.py | 2 +- vllm/engine/multiprocessing/engine.py | 2 +- vllm/model_executor/layers/quantization/utils/w8a8_utils.py | 2 +- vllm/model_executor/model_loader/default_loader.py | 2 +- vllm/v1/worker/xpu_worker.py | 2 +- vllm/worker/worker.py | 2 +- 11 files changed, 11 insertions(+), 11 deletions(-) diff --git a/.buildkite/nightly-benchmarks/scripts/compare-json-results.py b/.buildkite/nightly-benchmarks/scripts/compare-json-results.py index 50431d0cd4c5..5ea5a50a258a 100644 --- a/.buildkite/nightly-benchmarks/scripts/compare-json-results.py +++ b/.buildkite/nightly-benchmarks/scripts/compare-json-results.py @@ -218,7 +218,7 @@ def split_json_by_tp_pp( "--xaxis", type=str, default="# of max concurrency.", - help="column name to use as X Axis in comparision graph", + help="column name to use as X Axis in comparison graph", ) args = parser.parse_args() diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py index 02f5f585c0c1..934df05efac1 100644 --- a/benchmarks/benchmark_serving.py +++ b/benchmarks/benchmark_serving.py @@ -1104,7 +1104,7 @@ def create_argument_parser(): "--percentile-metrics", type=str, default="ttft,tpot,itl", - help="Comma-separated list of selected metrics to report percentils. " + help="Comma-separated list of selected metrics to report percentiles. " "This argument specifies the metrics to report percentiles. " 'Allowed metric names are "ttft", "tpot", "itl", "e2el". ' 'Default value is "ttft,tpot,itl".', diff --git a/benchmarks/benchmark_serving_structured_output.py b/benchmarks/benchmark_serving_structured_output.py index ca6843a72aa3..4aae755eb4e4 100644 --- a/benchmarks/benchmark_serving_structured_output.py +++ b/benchmarks/benchmark_serving_structured_output.py @@ -998,7 +998,7 @@ def create_argument_parser(): "--percentile-metrics", type=str, default="ttft,tpot,itl", - help="Comma-separated list of selected metrics to report percentils. " + help="Comma-separated list of selected metrics to report percentiles. " "This argument specifies the metrics to report percentiles. " 'Allowed metric names are "ttft", "tpot", "itl", "e2el". ' 'Default value is "ttft,tpot,itl".', diff --git a/benchmarks/benchmark_throughput.py b/benchmarks/benchmark_throughput.py index 6b24b8c8f3c6..34a525f00d91 100644 --- a/benchmarks/benchmark_throughput.py +++ b/benchmarks/benchmark_throughput.py @@ -719,7 +719,7 @@ def create_argument_parser(): "[length * (1 - range_ratio), length * (1 + range_ratio)].", ) - # hf dtaset + # hf dataset parser.add_argument( "--hf-subset", type=str, default=None, help="Subset of the HF dataset." ) diff --git a/tools/profiler/visualize_layerwise_profile.py b/tools/profiler/visualize_layerwise_profile.py index 038d3c44f043..30d6547073d3 100644 --- a/tools/profiler/visualize_layerwise_profile.py +++ b/tools/profiler/visualize_layerwise_profile.py @@ -119,7 +119,7 @@ def all_the_same(items) -> bool: if not all_the_same(trace_eles)), None) if first_trace_difference is None: - # can't create a unique name, leave them names as the + # can't create a unique name, leave the names as they # are they will get aggregated by the pivot_table call continue diff --git a/vllm/compilation/collective_fusion.py b/vllm/compilation/collective_fusion.py index 7a99aaff707d..71274420c342 100644 --- a/vllm/compilation/collective_fusion.py +++ b/vllm/compilation/collective_fusion.py @@ -513,7 +513,7 @@ def call_trtllm_fused_allreduce_norm( torch.ops._C.static_scaled_fp8_quant( quant_out, norm_out, scale_factor) if scale_factor is None or norm_out is not None: - # we need to return allreduce outpput + # we need to return allreduce output # in cases of non quant fused AR + RMS norm # and fused AR + RMS norm + quant without fused add allreduce_in.copy_(allreduce_out) diff --git a/vllm/engine/multiprocessing/engine.py b/vllm/engine/multiprocessing/engine.py index 343b8df7e87b..138283d4c8a7 100644 --- a/vllm/engine/multiprocessing/engine.py +++ b/vllm/engine/multiprocessing/engine.py @@ -49,7 +49,7 @@ class MQLLMEngine: This class is used to wrap the [`LLMEngine`][vllm.engine.llm_engine.LLMEngine] class to enable use - in concurrnet manner. It runs a background loop and uses zeromq to + in concurrent manner. It runs a background loop and uses zeromq to receive new requests and stream outputs incrementally via ipc. The [`LLMEngine`][vllm.engine.llm_engine.LLMEngine] generate or encode diff --git a/vllm/model_executor/layers/quantization/utils/w8a8_utils.py b/vllm/model_executor/layers/quantization/utils/w8a8_utils.py index 5333bbd310ff..ecdcc573935c 100644 --- a/vllm/model_executor/layers/quantization/utils/w8a8_utils.py +++ b/vllm/model_executor/layers/quantization/utils/w8a8_utils.py @@ -23,7 +23,7 @@ # The condition to determine if it is on a platform that supports # torch._scaled_mm rowwise feature. # The condition is determined once as the operations -# are time consuming. +# are time-consuming. USE_ROWWISE_TORCH_SCALED_MM = (current_platform.is_rocm() and version.parse( torch.__version__) >= version.parse("2.7") and current_platform.has_device_capability(94)) diff --git a/vllm/model_executor/model_loader/default_loader.py b/vllm/model_executor/model_loader/default_loader.py index 34b8d8e4ed62..1e5aa9e571ed 100644 --- a/vllm/model_executor/model_loader/default_loader.py +++ b/vllm/model_executor/model_loader/default_loader.py @@ -211,7 +211,7 @@ def _get_weights_iterator( if not USE_TPU_COMMONS: # In PyTorch XLA, we should call `xm.mark_step` - # requently so that not too many ops are accumulated + # frequently so that not too many ops are accumulated # in the XLA program. import torch_xla.core.xla_model # as xm import torch_xla.core.xla_model as xm diff --git a/vllm/v1/worker/xpu_worker.py b/vllm/v1/worker/xpu_worker.py index 17288cda8ecc..7355206f30f5 100644 --- a/vllm/v1/worker/xpu_worker.py +++ b/vllm/v1/worker/xpu_worker.py @@ -84,7 +84,7 @@ def determine_available_memory(self) -> int: """Profiles the peak memory usage of the model to determine how many KV blocks may be allocated without OOMs. The engine will first conduct a profiling of the existing memory usage. - Then, it calculate the maximum possible number of GPU and CPU blocks + Then, it calculates the maximum possible number of GPU and CPU blocks that can be allocated with the remaining free memory. .. tip:: You may limit the usage of GPU memory diff --git a/vllm/worker/worker.py b/vllm/worker/worker.py index 2e20c89c632c..e2cfeb1b7ba2 100644 --- a/vllm/worker/worker.py +++ b/vllm/worker/worker.py @@ -232,7 +232,7 @@ def determine_num_available_blocks(self) -> Tuple[int, int]: KV blocks may be allocated without OOMs. The engine will first conduct a profiling of the existing memory usage. - Then, it calculate the maximum possible number of GPU and CPU blocks + Then, it calculates the maximum possible number of GPU and CPU blocks that can be allocated with the remaining free memory. Tip: