-
Notifications
You must be signed in to change notification settings - Fork 375
Open
Labels
Description
Installed the following in the latest vllm docker image.
RUN uv pip install --system -U torchao==0.13.0 fbgemm-gpu-genai numpy==2.2 torch==2.8.0
And tried to run vLLM with a w4afp8 model.
(EngineCore_DP0 pid=283)
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] EngineCore failed to start.
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] Traceback (most recent call last):
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core.py", line 705, in run_engine_core
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] engine_core = DPEngineCoreProc(*args, **kwargs)
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core.py", line 975, in __init__
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] super().__init__(vllm_config, local_client, handshake_address,
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core.py", line 505, in __init__
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] super().__init__(vllm_config, executor_class, log_stats,
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core.py", line 82, in __init__
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] self.model_executor = executor_class(vllm_config)
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] ^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] File "/usr/local/lib/python3.12/dist-packages/vllm/executor/executor_base.py", line 54, in __init__
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] self._init_executor()
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] File "/usr/local/lib/python3.12/dist-packages/vllm/executor/uniproc_executor.py", line 49, in _init_executor
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] self.collective_rpc("load_model")
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] File "/usr/local/lib/python3.12/dist-packages/vllm/executor/uniproc_executor.py", line 58, in collective_rpc
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] answer = run_method(self.driver_worker, method, args, kwargs)
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] File "/usr/local/lib/python3.12/dist-packages/vllm/utils/__init__.py", line 3060, in run_method
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] return func(*args, **kwargs)
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] ^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] File "/usr/local/lib/python3.12/dist-packages/vllm/v1/worker/gpu_worker.py", line 213, in load_model
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] self.model_runner.load_model(eep_scale_up=eep_scale_up)
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] File "/usr/local/lib/python3.12/dist-packages/vllm/v1/worker/gpu_model_runner.py", line 2371, in load_model
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] self.model = model_loader.load_model(
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] ^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/base_loader.py", line 50, in load_model
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] self.load_weights(model, model_config)
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/default_loader.py", line 265, in load_weights
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] loaded_weights = model.load_weights(
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] ^^^^^^^^^^^^^^^^^^^
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/gemma3.py", line 542, in load_weights
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] return loader.load_weights(weights)
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/utils.py", line 291, in load_weights
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] autoloaded_weights = set(self._load_module("", self.module, weights))
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/utils.py", line 249, in _load_module
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] yield from self._load_module(prefix,
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/utils.py", line 222, in _load_module
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] loaded_params = module_load_weights(weights)
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/gemma3.py", line 460, in load_weights
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] weight_loader(param, loaded_weight, shard_id)
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/layers/linear.py", line 1132, in weight_loader
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] param_data = param_data.narrow(output_dim, shard_offset,
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] File "/usr/local/lib/python3.12/dist-packages/torchao/utils.py", line 638, in _dispatch__torch_function__
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] return func(*args, **kwargs)
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] ^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] File "/usr/local/lib/python3.12/dist-packages/torchao/utils.py", line 658, in _dispatch__torch_dispatch__
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] raise NotImplementedError(
(EngineCore_DP2 pid=285) ERROR 10-10 01:49:55 [core.py:718] NotImplementedError: Int4PreshuffledTensor dispatch: attempting to run unimplemented operator/function: func=<OpOverload(op='aten.slice', overload='Tensor')>, types=(<class 'torchao.quantization.Int4PreshuffledTensor'>,), arg_types=(<class 'torchao.quantization.Int4PreshuffledTensor'>, <class 'int'>, <cl
ass 'int'>, <class 'int'>), kwarg_types={}