We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 6faab89 commit 125ff36Copy full SHA for 125ff36
src/compressed_tensors/quantization/utils/helpers.py
@@ -84,9 +84,12 @@ def calculate_qparams(
84
scales = (max_vals - min_vals) / float(bit_range)
85
scales = torch.clamp(scales, min=torch.finfo(torch.float32).eps)
86
zero_points = bit_min - (min_vals / scales)
87
- zero_points = torch.clamp(torch.round(zero_points), bit_min, bit_max)
+ zero_points = torch.clamp(zero_points, bit_min, bit_max)
88
89
# match zero-points to quantized type
90
+ # if casting to int, use round instead of truncate
91
+ if quantization_args.type == QuantizationType.INT:
92
+ zero_points = torch.round(zero_points)
93
zero_points = zero_points.to(zp_dtype)
94
95
if scales.ndim == 0:
0 commit comments