only round if casting to int type

brian-dellabetta · brian-dellabetta · commit 125ff36f4d8e · 2025-04-02T20:20:19.000Z
Signed-off-by: Brian Dellabetta &lt;bdellabe@redhat.com&gt;
diff --git a/src/compressed_tensors/quantization/utils/helpers.py b/src/compressed_tensors/quantization/utils/helpers.py
@@ -84,9 +84,12 @@ def calculate_qparams(
         scales = (max_vals - min_vals) / float(bit_range)
         scales = torch.clamp(scales, min=torch.finfo(torch.float32).eps)
         zero_points = bit_min - (min_vals / scales)
-        zero_points = torch.clamp(torch.round(zero_points), bit_min, bit_max)
+        zero_points = torch.clamp(zero_points, bit_min, bit_max)
 
     # match zero-points to quantized type
+    # if casting to int, use round instead of truncate
+    if quantization_args.type == QuantizationType.INT:
+        zero_points = torch.round(zero_points)
     zero_points = zero_points.to(zp_dtype)
 
     if scales.ndim == 0: