Skip to content

Commit fa6d9c3

Browse files
metascroyjainapurva
authored andcommitted
Optimize 3-bit packing
Differential Revision: D64010666 Pull Request resolved: #1029
1 parent 101d731 commit fa6d9c3

File tree

4 files changed

+143
-211
lines changed

4 files changed

+143
-211
lines changed

torchao/experimental/kernels/cpu/aarch64/benchmarks/benchmark_bitpacking.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <torchao/experimental/kernels/cpu/aarch64/bitpacking/uint2.h>
1515
#include <torchao/experimental/kernels/cpu/aarch64/bitpacking/uint3.h>
1616
#include <torchao/experimental/kernels/cpu/aarch64/bitpacking/uint4.h>
17+
#include <torchao/experimental/kernels/cpu/aarch64/bitpacking/uint5.h>
1718
#include <torchao/experimental/kernels/cpu/aarch64/bitpacking/uint6.h>
1819
#include <torchao/experimental/kernels/cpu/aarch64/tests/test_utils.h>
1920
#include <cassert>

torchao/experimental/kernels/cpu/aarch64/bitpacking/bitpack.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ TORCHAO_ALWAYS_INLINE inline void vec_pack_32_lowbit_values(
109109
vget_high_u8(shifted0),
110110
vget_low_u8(shifted1),
111111
vget_high_u8(shifted1));
112+
break;
112113
case 3:
113114
uint8_t buffer3[32];
114115
vst1q_u8(buffer3, shifted0);
@@ -185,6 +186,7 @@ TORCHAO_ALWAYS_INLINE inline void vec_unpack_32_lowbit_values(
185186
shifted0_low, shifted0_high, shifted1_low, shifted1_high, packed);
186187
shifted0 = vcombine_u8(shifted0_low, shifted0_high);
187188
shifted1 = vcombine_u8(shifted1_low, shifted1_high);
189+
break;
188190
case 3:
189191
uint8_t buffer3[32];
190192
torchao::bitpacking::internal::unpack_8_uint3_values(buffer3, packed);

0 commit comments

Comments
 (0)