We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent a4d3b37 commit e2e75ebCopy full SHA for e2e75eb
vllm/config.py
@@ -212,7 +212,7 @@ def _verify_quantization(self) -> None:
212
f"{self.quantization} quantization is currently not "
213
f"supported in ROCm.")
214
if (self.quantization
215
- not in ["marlin", "gptq_marlin_24", "gptq_marlin"]):
+ not in ("fp8", "marlin", "gptq_marlin_24", "gptq_marlin")):
216
logger.warning(
217
"%s quantization is not fully "
218
"optimized yet. The speed can be slower than "
0 commit comments