We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
dynamic_scaled_fp8_quant
static_scaled_fp8_quant
1 parent b361f14 commit e0e58f9Copy full SHA for e0e58f9
vllm/_custom_ops.py
@@ -1282,10 +1282,11 @@ def scaled_fp8_quant(
1282
output, input.contiguous(), scale, scale_ub)
1283
else:
1284
scale = torch.zeros(1, device=input.device, dtype=torch.float32)
1285
- torch.ops._C.dynamic_scaled_fp8_quant(output, input, scale)
+ torch.ops._C.dynamic_scaled_fp8_quant(output, input.contiguous(),
1286
+ scale)
1287
1288
assert scale.numel() == 1, f"{scale.shape}"
- torch.ops._C.static_scaled_fp8_quant(output, input, scale)
1289
+ torch.ops._C.static_scaled_fp8_quant(output, input.contiguous(), scale)
1290
1291
return output, scale
1292
0 commit comments