Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ def split_json_by_tp_pp(
"--xaxis",
type=str,
default="# of max concurrency.",
help="column name to use as X Axis in comparision graph",
help="column name to use as X Axis in comparison graph",
)
args = parser.parse_args()

Expand Down
2 changes: 1 addition & 1 deletion benchmarks/benchmark_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -1104,7 +1104,7 @@ def create_argument_parser():
"--percentile-metrics",
type=str,
default="ttft,tpot,itl",
help="Comma-separated list of selected metrics to report percentils. "
help="Comma-separated list of selected metrics to report percentiles. "
"This argument specifies the metrics to report percentiles. "
'Allowed metric names are "ttft", "tpot", "itl", "e2el". '
'Default value is "ttft,tpot,itl".',
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/benchmark_serving_structured_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -998,7 +998,7 @@ def create_argument_parser():
"--percentile-metrics",
type=str,
default="ttft,tpot,itl",
help="Comma-separated list of selected metrics to report percentils. "
help="Comma-separated list of selected metrics to report percentiles. "
"This argument specifies the metrics to report percentiles. "
'Allowed metric names are "ttft", "tpot", "itl", "e2el". '
'Default value is "ttft,tpot,itl".',
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/benchmark_throughput.py
Original file line number Diff line number Diff line change
Expand Up @@ -719,7 +719,7 @@ def create_argument_parser():
"[length * (1 - range_ratio), length * (1 + range_ratio)].",
)

# hf dtaset
# hf dataset
parser.add_argument(
"--hf-subset", type=str, default=None, help="Subset of the HF dataset."
)
Expand Down
2 changes: 1 addition & 1 deletion tools/profiler/visualize_layerwise_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def all_the_same(items) -> bool:
if not all_the_same(trace_eles)), None)

if first_trace_difference is None:
# can't create a unique name, leave them names as the
# can't create a unique name, leave the names as they
# are they will get aggregated by the pivot_table call
continue

Expand Down
2 changes: 1 addition & 1 deletion vllm/compilation/collective_fusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -513,7 +513,7 @@ def call_trtllm_fused_allreduce_norm(
torch.ops._C.static_scaled_fp8_quant(
quant_out, norm_out, scale_factor)
if scale_factor is None or norm_out is not None:
# we need to return allreduce outpput
# we need to return allreduce output
# in cases of non quant fused AR + RMS norm
# and fused AR + RMS norm + quant without fused add
allreduce_in.copy_(allreduce_out)
Expand Down
2 changes: 1 addition & 1 deletion vllm/engine/multiprocessing/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class MQLLMEngine:

This class is used to wrap the
[`LLMEngine`][vllm.engine.llm_engine.LLMEngine] class to enable use
in concurrnet manner. It runs a background loop and uses zeromq to
in concurrent manner. It runs a background loop and uses zeromq to
receive new requests and stream outputs incrementally via ipc.

The [`LLMEngine`][vllm.engine.llm_engine.LLMEngine] generate or encode
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
# The condition to determine if it is on a platform that supports
# torch._scaled_mm rowwise feature.
# The condition is determined once as the operations
# are time consuming.
# are time-consuming.
USE_ROWWISE_TORCH_SCALED_MM = (current_platform.is_rocm() and version.parse(
torch.__version__) >= version.parse("2.7")
and current_platform.has_device_capability(94))
Expand Down
2 changes: 1 addition & 1 deletion vllm/model_executor/model_loader/default_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ def _get_weights_iterator(

if not USE_TPU_COMMONS:
# In PyTorch XLA, we should call `xm.mark_step`
# requently so that not too many ops are accumulated
# frequently so that not too many ops are accumulated
# in the XLA program. import torch_xla.core.xla_model
# as xm
import torch_xla.core.xla_model as xm
Expand Down
2 changes: 1 addition & 1 deletion vllm/v1/worker/xpu_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def determine_available_memory(self) -> int:
"""Profiles the peak memory usage of the model to determine how many
KV blocks may be allocated without OOMs.
The engine will first conduct a profiling of the existing memory usage.
Then, it calculate the maximum possible number of GPU and CPU blocks
Then, it calculates the maximum possible number of GPU and CPU blocks
that can be allocated with the remaining free memory.
.. tip::
You may limit the usage of GPU memory
Expand Down
2 changes: 1 addition & 1 deletion vllm/worker/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ def determine_num_available_blocks(self) -> Tuple[int, int]:
KV blocks may be allocated without OOMs.

The engine will first conduct a profiling of the existing memory usage.
Then, it calculate the maximum possible number of GPU and CPU blocks
Then, it calculates the maximum possible number of GPU and CPU blocks
that can be allocated with the remaining free memory.

Tip:
Expand Down