Skip to content

Commit 2fec78b

Browse files
Update on "[executorch] Add coreml quant recipes"
Fixing tests for stack that got reverted: #13265 Adds coreml quant recipes after FP32/16 recipes added in #13121 Recipes added: PT2E_INT8_STATIC PT2E_INT8_WEIGHT_ONLY INT4_WEIGHT_ONLY_PER_CHANNEL INT4_WEIGHT_ONLY_PER_GROUP INT8_WEIGHT_ONLY_PER_CHANNEL INT8_WEIGHT_ONLY_PER_GROUP CODEBOOK_WEIGHT_ONLY Differential Revision: [D80206542](https://our.internmc.facebook.com/intern/diff/D80206542/) [ghstack-poisoned]
2 parents 984e7e1 + 3151642 commit 2fec78b

File tree

80 files changed

+2403
-1230
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

80 files changed

+2403
-1230
lines changed

.ci/scripts/zephyr-utils.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
# LICENSE file in the root directory of this source tree.
77

88
download_arm_zephyr_sdk () {
9-
wget https://github.com/zephyrproject-rtos/sdk-ng/releases/download/v0.16.0/zephyr-sdk-0.16.0_linux-x86_64.tar.xz
10-
tar -xf zephyr-sdk-0.16.0_linux-x86_64.tar.xz
11-
rm -f zephyr-sdk-0.16.0_linux-x86_64.tar.xz
9+
wget https://github.com/zephyrproject-rtos/sdk-ng/releases/download/v0.17.2/zephyr-sdk-0.17.2_linux-x86_64.tar.xz
10+
tar -xf zephyr-sdk-0.17.2_linux-x86_64.tar.xz
11+
rm -f zephyr-sdk-0.17.2_linux-x86_64.tar.xz
1212
}
1313

1414
setup_zephyr_et_module () {

.github/workflows/add-unanswered-to-project.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
name: Add Open External Contributor PRs and Issues to PyTorch Org Project 136
22

33
on:
4-
schedule:
5-
- cron: '0 * * * *'
4+
# schedule:
5+
# - cron: '0 * * * *'
66
workflow_dispatch:
77

88
jobs:
@@ -12,7 +12,7 @@ jobs:
1212
- name: Add open issues and open, non-draft PRs to org project (excluding certain authors)
1313
uses: actions/github-script@v7
1414
with:
15-
github-token: ${{ secrets.PYTORCH_PROJECT_PAT }}
15+
github-token: ${{ secrets.GITHUB_TOKEN }}
1616
script: |
1717
const projectId = "PVT_kwDOAUB9vs4A_PUL"; // PyTorch org project 136
1818
const owner = 'pytorch';

.github/workflows/trunk.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ jobs:
9292
9393
# TODO @Bujji: Should see if this can be moved into the docker image itself
9494
download_arm_zephyr_sdk
95-
./zephyr-sdk-0.16.0/setup.sh -c -t arm-zephyr-eabi
95+
./zephyr-sdk-0.17.2/setup.sh -c -t arm-zephyr-eabi
9696
cd $ZEPHYR_PROJ_ROOT
9797
setup_zephyr_et_module
9898

CMakeLists.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,10 @@
5050
cmake_minimum_required(VERSION 3.29)
5151
project(executorch)
5252

53+
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR})
54+
5355
include(${PROJECT_SOURCE_DIR}/tools/cmake/common/preset.cmake)
56+
include(${PROJECT_SOURCE_DIR}/tools/cmake/Codegen.cmake)
5457
include(${PROJECT_SOURCE_DIR}/tools/cmake/Utils.cmake)
5558
include(CMakeDependentOption)
5659
include(ExternalProject)
@@ -123,8 +126,6 @@ set(CMAKE_INSTALL_RPATH_USE_LINK_PATH ON)
123126
# Instead please use `find_package(executorch REQUIRED)` in the example
124127
# directory and add a new executable in the example `CMakeLists.txt`.
125128

126-
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR})
127-
128129
if(NOT EXECUTORCH_ENABLE_LOGGING)
129130
# Avoid pulling in the logging strings, which can be large. Note that this
130131
# will set the compiler flag for all targets in this directory, and for all
@@ -320,6 +321,7 @@ if(NOT EXECUTORCH_SRCS_FILE)
320321
message(STATUS "executorch: Generating source lists")
321322
set(EXECUTORCH_SRCS_FILE "${CMAKE_CURRENT_BINARY_DIR}/executorch_srcs.cmake")
322323
extract_sources(${EXECUTORCH_SRCS_FILE})
324+
executorch_validate_build_variables()
323325
endif()
324326

325327
# This file defines the `_<target>__srcs` variables used below.

CMakePresets.json

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,36 @@
66
"hidden": true,
77
"binaryDir": "${sourceDir}/cmake-out"
88
},
9+
{
10+
"name": "android-arm64-v8a",
11+
"displayName": "Build executorch core and JNI bindings on android arm64-v8a",
12+
"inherits": ["common"],
13+
"binaryDir": "${sourceDir}/cmake-out-android-arm64-v8a",
14+
"cacheVariables": {
15+
"EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/android.cmake",
16+
"ANDROID_ABI": "arm64-v8a"
17+
},
18+
"condition": {
19+
"type": "inList",
20+
"string": "${hostSystemName}",
21+
"list": ["Darwin", "Linux", "Windows"]
22+
}
23+
},
24+
{
25+
"name": "android-x86_64",
26+
"displayName": "Build executorch core and JNI bindings on android x86_64",
27+
"inherits": ["common"],
28+
"binaryDir": "${sourceDir}/cmake-out-android-x86_64",
29+
"cacheVariables": {
30+
"EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/android.cmake",
31+
"ANDROID_ABI": "x86_64"
32+
},
33+
"condition": {
34+
"type": "inList",
35+
"string": "${hostSystemName}",
36+
"list": ["Darwin", "Linux", "Windows"]
37+
}
38+
},
939
{
1040
"name": "macos",
1141
"displayName": "Build ExecuTorch for macOS",

backends/cadence/aot/functions.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,16 @@
229229
- arg_meta: null
230230
kernel_name: impl::reference::quantized_linear_per_tensor_out
231231

232+
- func: cadence::quantized_linear_asym8sxasym8s_asym8s.per_tensor_out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
233+
kernels:
234+
- arg_meta: null
235+
kernel_name: impl::reference::quantized_linear_asym8sxasym8s_asym8s_per_tensor_out
236+
237+
- func: cadence::quantized_linear_asym8uxasym8u_asym8u.per_tensor_out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
238+
kernels:
239+
- arg_meta: null
240+
kernel_name: impl::reference::quantized_linear_asym8uxasym8u_asym8u_per_tensor_out
241+
232242
- func: cadence::im2row.out(Tensor input, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride, Tensor in_zero_point, bool channel_last=False, *, Tensor(a!) out) -> Tensor(a!)
233243
kernels:
234244
- arg_meta: null

backends/cadence/aot/functions_hifi.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,16 @@
314314
- arg_meta: null
315315
kernel_name: cadence::impl::HiFi::quantized_linear_per_tensor_out
316316

317+
- func: cadence::quantized_linear_asym8sxasym8s_asym8s.per_tensor_out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
318+
kernels:
319+
- arg_meta: null
320+
kernel_name: cadence::impl::HiFi::quantized_linear_asym8sxasym8s_asym8s_per_tensor_out
321+
322+
- func: cadence::quantized_linear_asym8uxasym8u_asym8u.per_tensor_out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
323+
kernels:
324+
- arg_meta: null
325+
kernel_name: cadence::impl::HiFi::quantized_linear_asym8uxasym8u_asym8u_per_tensor_out
326+
317327
- func: cadence::quantized_relu_per_tensor.out(Tensor X, Tensor X_zero_point, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!)
318328
kernels:
319329
- arg_meta: null

backends/cadence/aot/ops_registrations.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,26 @@
5656
lib.define(
5757
"quantized_linear.per_tensor_out(Tensor src, Tensor weight, Tensor bias, SymInt src_zero_point, SymInt weight_zero_point, SymInt out_multiplier, SymInt out_shift, SymInt out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)"
5858
)
59+
lib.define(
60+
"quantized_linear_asym8sxasym8s_asym8s.per_tensor_out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, "
61+
"int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)"
62+
)
63+
lib.define(
64+
"quantized_linear_asym8uxasym8u_asym8u.per_tensor_out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, "
65+
"int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)"
66+
)
5967
lib.define(
6068
"quantized_linear.per_tensor(Tensor src, Tensor weight, Tensor bias, SymInt src_zero_point, "
6169
"SymInt weight_zero_point, SymInt out_multiplier, SymInt out_shift, SymInt out_zero_point, Tensor? offset) -> Tensor"
6270
)
71+
lib.define(
72+
"quantized_linear_asym8sxasym8s_asym8s.per_tensor(Tensor src, Tensor weight, Tensor bias, int src_zero_point, "
73+
"int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset) -> (Tensor Z)"
74+
)
75+
lib.define(
76+
"quantized_linear_asym8uxasym8u_asym8u.per_tensor(Tensor src, Tensor weight, Tensor bias, int src_zero_point, "
77+
"int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset) -> (Tensor Z)"
78+
)
6379

6480
lib.define(
6581
"quantized_relu(Tensor X, Tensor X_zero_point, int out_zero_point, Tensor out_multiplier, Tensor out_shift) -> (Tensor Y)"
@@ -446,6 +462,50 @@ def quantized_linear_per_tensor_meta(
446462
return src.new_empty(out_size, dtype=src.dtype)
447463

448464

465+
@register_fake("cadence::quantized_linear_asym8sxasym8s_asym8s.per_tensor")
466+
def quantized_linear_asym8sxasym8s_asym8s_per_tensor_meta(
467+
src: torch.Tensor,
468+
weight: torch.Tensor,
469+
bias: torch.Tensor,
470+
in_zero_point: int,
471+
weight_zero_point: int,
472+
out_multiplier: int,
473+
out_shift: int,
474+
out_zero_point: int,
475+
offset: Optional[torch.Tensor],
476+
) -> torch.Tensor:
477+
# src comes in shape [leading_dims, in_dim]
478+
# weight comes in shape [out_dim, in_dim]
479+
# output comes in empty with shape [leading_dims, out_dim]
480+
out_size = list(src.size())
481+
weight_size = list(weight.size())
482+
assert len(weight_size) == 2
483+
out_size[-1] = weight_size[0]
484+
return src.new_empty(out_size, dtype=src.dtype)
485+
486+
487+
@register_fake("cadence::quantized_linear_asym8uxasym8u_asym8u.per_tensor")
488+
def quantized_linear_asym8uxasym8u_asym8u_per_tensor_meta(
489+
src: torch.Tensor,
490+
weight: torch.Tensor,
491+
bias: torch.Tensor,
492+
in_zero_point: int,
493+
weight_zero_point: int,
494+
out_multiplier: int,
495+
out_shift: int,
496+
out_zero_point: int,
497+
offset: Optional[torch.Tensor],
498+
) -> torch.Tensor:
499+
# src comes in shape [leading_dims, in_dim]
500+
# weight comes in shape [out_dim, in_dim]
501+
# output comes in empty with shape [leading_dims, out_dim]
502+
out_size = list(src.size())
503+
weight_size = list(weight.size())
504+
assert len(weight_size) == 2
505+
out_size[-1] = weight_size[0]
506+
return src.new_empty(out_size, dtype=src.dtype)
507+
508+
449509
@register_fake("cadence::quantized_conv")
450510
def quantized_conv_meta(
451511
input: torch.Tensor,

backends/cadence/aot/tests/test_type_dispatch_passes.py

Lines changed: 54 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919

2020
class TestTypeDispatchPasses(unittest.TestCase):
21-
def test_int8_dispatch(self) -> None:
21+
def test_int8_dispatch_quantized_fully_connected(self) -> None:
2222
"""Test int8 x int8 inputs should dispatch to asym8sxasym8s_asym8s variant"""
2323
x = torch.randint(-128, 127, (2, 3), dtype=torch.int8)
2424
w = torch.randint(-128, 127, (4, 3), dtype=torch.int8)
@@ -44,7 +44,7 @@ def test_int8_dispatch(self) -> None:
4444
1,
4545
)
4646

47-
def test_uint8_dispatch(self) -> None:
47+
def test_uint8_dispatch_quantized_fully_connected(self) -> None:
4848
"""Test uint8 x uint8 inputs should dispatch to asym8uxasym8u_asym8u variant"""
4949
x = torch.randint(0, 255, (2, 3), dtype=torch.uint8)
5050
w = torch.randint(0, 255, (4, 3), dtype=torch.uint8)
@@ -70,6 +70,58 @@ def test_uint8_dispatch(self) -> None:
7070
1,
7171
)
7272

73+
def test_int8_dispatch_quantized_linear(self) -> None:
74+
"""Test int8 x int8 inputs should dispatch to asym8sxasym8s_asym8s variant for quantized_linear"""
75+
x = torch.randint(-128, 127, (2, 3), dtype=torch.int8)
76+
w = torch.randint(-128, 127, (4, 3), dtype=torch.int8)
77+
b = torch.randint(-2147483648, 2147483647, (4,), dtype=torch.int32)
78+
gm = single_op_builder(
79+
placeholders=(x, w, b),
80+
op=exir_ops.edge.cadence.quantized_linear.per_tensor,
81+
args=(x, w, b, 0, 0, 1, 0, 0, None),
82+
)
83+
p = CompileTimeTypeDispatchPass()
84+
gm = cast(PassResult, p(gm)).graph_module
85+
# Original op should be replaced
86+
self.assertEqual(
87+
count_node(gm, exir_ops.edge.cadence.quantized_linear.per_tensor),
88+
0,
89+
)
90+
# Should be replaced with int8 specific variant
91+
self.assertEqual(
92+
count_node(
93+
gm,
94+
exir_ops.edge.cadence.quantized_linear_asym8sxasym8s_asym8s.per_tensor,
95+
),
96+
1,
97+
)
98+
99+
def test_uint8_quantized_linear_dispatch(self) -> None:
100+
"""Test uint8 x uint8 inputs should dispatch to asym8uxasym8u_asym8u variant for quantized_linear"""
101+
x = torch.randint(0, 255, (2, 3), dtype=torch.uint8)
102+
w = torch.randint(0, 255, (4, 3), dtype=torch.uint8)
103+
b = torch.randint(-2147483648, 2147483647, (4,), dtype=torch.int32)
104+
gm = single_op_builder(
105+
placeholders=(x, w, b),
106+
op=exir_ops.edge.cadence.quantized_linear.per_tensor,
107+
args=(x, w, b, 0, 0, 1, 0, 0, None),
108+
)
109+
p = CompileTimeTypeDispatchPass()
110+
gm = cast(PassResult, p(gm)).graph_module
111+
# Original op should be replaced
112+
self.assertEqual(
113+
count_node(gm, exir_ops.edge.cadence.quantized_linear.per_tensor),
114+
0,
115+
)
116+
# Should be replaced with uint8 specific variant
117+
self.assertEqual(
118+
count_node(
119+
gm,
120+
exir_ops.edge.cadence.quantized_linear_asym8uxasym8u_asym8u.per_tensor,
121+
),
122+
1,
123+
)
124+
73125
def test_mixed_types_error(self) -> None:
74126
"""Test mixed int8/uint8 inputs should raise RuntimeError"""
75127
x = torch.randint(-128, 127, (2, 3), dtype=torch.int8)

backends/cadence/aot/type_dispatch.py

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -23,40 +23,40 @@ class CompileTimeTypeDispatchPass(ExportPass):
2323
Replaces generic ops with ops that have explicit types.
2424
"""
2525

26+
_TYPE_DISPATCH_MAP: dict[tuple[torch.dtype, torch.dtype], str] = {
27+
(torch.int8, torch.int8): "asym8sxasym8s_asym8s",
28+
(torch.uint8, torch.uint8): "asym8uxasym8u_asym8u",
29+
}
30+
31+
_SUPPORTED_OPS: dict[OpOverload, str] = {
32+
exir_ops.edge.cadence.quantized_fully_connected.per_tensor: "quantized_fully_connected",
33+
exir_ops.edge.cadence.quantized_linear.per_tensor: "quantized_linear",
34+
}
35+
2636
def call_operator(
2737
self,
2838
op: OpOverload,
2939
args: tuple[Argument, ...],
3040
kwargs: dict[str, Argument],
3141
meta: NodeMetadata,
3242
) -> ProxyValue:
33-
if op not in {
34-
exir_ops.edge.cadence.quantized_fully_connected.per_tensor,
35-
}:
43+
if op not in self._SUPPORTED_OPS:
3644
return super().call_operator(op, args, kwargs, meta)
3745

38-
if (
39-
# pyre-ignore[16]: None has no attribute `to_tensor`.
40-
args[0].to_tensor().dtype == torch.int8
41-
and args[1].to_tensor().dtype == torch.int8
42-
):
43-
return super().call_operator(
44-
exir_ops.edge.cadence.quantized_fully_connected_asym8sxasym8s_asym8s.per_tensor,
45-
args,
46-
kwargs,
47-
meta,
48-
)
49-
elif (
50-
args[0].to_tensor().dtype == torch.uint8
51-
and args[1].to_tensor().dtype == torch.uint8
52-
):
53-
return super().call_operator(
54-
exir_ops.edge.cadence.quantized_fully_connected_asym8uxasym8u_asym8u.per_tensor,
55-
args,
56-
kwargs,
57-
meta,
58-
)
59-
else:
46+
# pyre-ignore[16]: None has no attribute `to_tensor`.
47+
input_dtype = args[0].to_tensor().dtype
48+
weight_dtype = args[1].to_tensor().dtype
49+
dtype_pair = (input_dtype, weight_dtype)
50+
51+
if dtype_pair not in self._TYPE_DISPATCH_MAP:
6052
raise RuntimeError(
61-
f"Unsupported input types for {op}: {args[0].to_tensor().dtype} and {args[1].to_tensor().dtype}"
53+
f"Unsupported input types for {op}: {input_dtype} and {weight_dtype}"
6254
)
55+
56+
base_op_name = self._SUPPORTED_OPS[op]
57+
type_suffix = self._TYPE_DISPATCH_MAP[dtype_pair]
58+
59+
typed_op_name = f"{base_op_name}_{type_suffix}"
60+
typed_op = getattr(exir_ops.edge.cadence, typed_op_name).per_tensor
61+
62+
return super().call_operator(typed_op, args, kwargs, meta)

0 commit comments

Comments
 (0)