Skip to content

Commit 2c489f8

Browse files
committed
reduce memory usage for nf4
1 parent 85a01b0 commit 2c489f8

File tree

1 file changed

+2
-0
lines changed

1 file changed

+2
-0
lines changed

bitsandbytes/backends/cpu_xpu_common.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,7 @@ def quantize_4bit_impl(
377377
int(lowp_mode),
378378
-1, # act_quant_mode. -1 means don't quant activation
379379
)
380+
state.absmax = torch.Tensor()
380381
return torch.Tensor(), state
381382

382383
return out, state
@@ -444,6 +445,7 @@ def dequantize_4bit_impl(
444445
assert quant_state.op_context is not None
445446
A = quant_state.op_context.to_public(quant_state.op_context.get_weight())
446447
A = A.reshape(-1)
448+
absmax = quant_state.op_context.get_scales().reshape(-1)
447449

448450
if out is None:
449451
out = torch.empty(quant_state.shape, dtype=quant_state.dtype, device=A.device)

0 commit comments

Comments
 (0)