diff --git a/CMakeLists.txt b/CMakeLists.txt index 10e2eb437e3..f5091a2af2e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -605,15 +605,23 @@ if(EXECUTORCH_BUILD_CORTEX_M) list(APPEND _executorch_backends coretex_m_backend) endif() -if(EXECUTORCH_BUILD_CUDA) - # Build common AOTI functionality (required for CUDA) +# Build common AOTI functionality if needed by CUDA or Metal backends +if(EXECUTORCH_BUILD_CUDA OR EXECUTORCH_BUILD_METAL) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/aoti) +endif() + +if(EXECUTORCH_BUILD_CUDA) # Build CUDA-specific AOTI functionality add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/cuda) # Add aoti_cuda to backends - it already depends on aoti_common list(APPEND _executorch_backends aoti_cuda) endif() +if(EXECUTORCH_BUILD_METAL) + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/apple/metal) + list(APPEND _executorch_backends metal_backend) +endif() + if(EXECUTORCH_BUILD_EXTENSION_APPLE) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/apple) endif() diff --git a/backends/aoti/CMakeLists.txt b/backends/aoti/CMakeLists.txt index fcabb0a3f2b..32dee1e3071 100644 --- a/backends/aoti/CMakeLists.txt +++ b/backends/aoti/CMakeLists.txt @@ -42,9 +42,13 @@ target_compile_options( $<$>:-fexceptions -frtti -fPIC> ) # Ensure symbols are exported properly -target_link_options( - aoti_common PUBLIC $<$>:-Wl,--export-dynamic> -) +if(APPLE) + target_link_options(aoti_common PUBLIC -Wl,-export_dynamic) +else() + target_link_options( + aoti_common PUBLIC $<$>:-Wl,--export-dynamic> + ) +endif() # Link against ExecuTorch libraries and standard libraries target_link_libraries(aoti_common PUBLIC extension_tensor ${CMAKE_DL_LIBS}) diff --git a/backends/apple/metal/CMakeLists.txt b/backends/apple/metal/CMakeLists.txt new file mode 100644 index 00000000000..7bdf142041d --- /dev/null +++ b/backends/apple/metal/CMakeLists.txt @@ -0,0 +1,120 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +# +# Build AOTI Metal backend for runtime. +# +# ### Editing this file ### +# +# This file should be formatted with +# ~~~ +# cmake-format -i CMakeLists.txt +# ~~~ +# It should also be cmake-lint clean. +# +cmake_minimum_required(VERSION 3.29) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +if(NOT APPLE) + message(FATAL_ERROR "Metal backend requires macOS") +endif() + +# Source root directory for executorch. +if(NOT EXECUTORCH_ROOT) + set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..) +endif() + +include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake) +# Use full torch package to get library paths, but only link specific libraries +find_package_torch() + +set(_aoti_metal_sources + runtime/metal_backend.cpp + runtime/shims/memory.cpp + runtime/shims/et_metal.mm + runtime/shims/et_metal_ops.mm + runtime/shims/shim_mps.mm + runtime/shims/tensor_attribute.cpp + runtime/shims/utils.cpp +) + +add_library(metal_backend STATIC ${_aoti_metal_sources}) +target_include_directories( + metal_backend + PUBLIC $ $ + # PyTorch AOTI headers from ExecutorTorch's torch detection + ${TORCH_INCLUDE_DIRS} +) + +# Link Metal framework +find_library(METAL_LIBRARY Metal REQUIRED) +find_library(FOUNDATION_LIBRARY Foundation REQUIRED) +find_library(METALPERFORMANCESHADERS_LIBRARY MetalPerformanceShaders REQUIRED) +find_library( + METALPERFORMANCESHADERSGRAPH_LIBRARY MetalPerformanceShadersGraph REQUIRED +) +target_link_libraries( + metal_backend + PUBLIC ${METAL_LIBRARY} ${FOUNDATION_LIBRARY} + ${METALPERFORMANCESHADERS_LIBRARY} + ${METALPERFORMANCESHADERSGRAPH_LIBRARY} +) + +target_compile_options(metal_backend PUBLIC -fexceptions -frtti -fPIC) + +target_link_options(metal_backend PUBLIC -Wl,-export_dynamic) + +# Find PyTorch's OpenMP library specifically for libtorch-less AOTI +get_torch_base_path(TORCH_BASE_PATH) +find_library( + TORCH_OMP_LIBRARY + NAMES omp libomp + PATHS "${TORCH_BASE_PATH}/lib" + NO_DEFAULT_PATH +) + +if(TORCH_OMP_LIBRARY) + message(STATUS "Found PyTorch OpenMP library: ${TORCH_OMP_LIBRARY}") + # Get the directory containing the OpenMP library for rpath + get_filename_component(TORCH_OMP_LIB_DIR ${TORCH_OMP_LIBRARY} DIRECTORY) + message(STATUS "OpenMP library directory: ${TORCH_OMP_LIB_DIR}") +else() + message( + WARNING "PyTorch OpenMP library not found, may cause runtime linking issues" + ) +endif() + +# Link against appropriate backends and standard libraries +target_link_libraries( + metal_backend PUBLIC aoti_common extension_tensor ${CMAKE_DL_LIBS} + ${TORCH_OMP_LIBRARY} +) + +# Set rpath for OpenMP library to avoid runtime linking issues +if(TORCH_OMP_LIBRARY AND TORCH_OMP_LIB_DIR) + # Add the OpenMP library directory to the rpath + set_target_properties( + metal_backend PROPERTIES BUILD_RPATH "${TORCH_OMP_LIB_DIR}" + INSTALL_RPATH "${TORCH_OMP_LIB_DIR}" + ) + # Also try common OpenMP library locations + target_link_options( + metal_backend PUBLIC -Wl,-rpath,${TORCH_OMP_LIB_DIR} + -Wl,-rpath,/usr/local/opt/libomp/lib + -Wl,-rpath,/opt/homebrew/opt/libomp/lib + ) + message(STATUS "Added rpath for OpenMP library: ${TORCH_OMP_LIB_DIR}") +endif() + +executorch_target_link_options_shared_lib(metal_backend) +install( + TARGETS metal_backend + EXPORT ExecuTorchTargets + DESTINATION lib +) diff --git a/backends/apple/metal/runtime/metal_backend.cpp b/backends/apple/metal/runtime/metal_backend.cpp new file mode 100644 index 00000000000..1ef365a9332 --- /dev/null +++ b/backends/apple/metal/runtime/metal_backend.cpp @@ -0,0 +1,547 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +// Include AOTI common headers (from aoti_common library) +#include +#include + +// Include our Metal-specific shim layer headers +#include +#include +#include +#include +#include + +namespace executorch::backends::metal { + +#define LOAD_SYMBOL(name, handle) \ + do { \ + name = reinterpret_cast(dlsym(handle, #name)); \ + ET_CHECK_OR_RETURN_ERROR( \ + name != nullptr, AccessFailed, "Failed to load " #name); \ + } while (0) + +using namespace std; +using namespace aoti; + +using executorch::aten::ScalarType; +using executorch::runtime::ArrayRef; +using executorch::runtime::Backend; +using executorch::runtime::BackendExecutionContext; +using executorch::runtime::BackendInitContext; +using executorch::runtime::CompileSpec; +using executorch::runtime::DelegateHandle; +using executorch::runtime::Error; +using executorch::runtime::EValue; +using executorch::runtime::FreeableBuffer; +using executorch::runtime::MemoryAllocator; +using executorch::runtime::NamedDataMap; +using executorch::runtime::Result; +using executorch::runtime::Span; +using executorch::runtime::etensor::Tensor; + +class ET_EXPERIMENTAL MetalBackend final + : public ::executorch::runtime::BackendInterface { + private: + Error register_shared_library_functions(void* so_handle) const { + ET_LOG( + Debug, + "MetalBackend::register_shared_library_functions - Loading symbols"); + + LOAD_SYMBOL(AOTInductorModelContainerCreateWithDevice, so_handle); + ET_LOG( + Debug, + "MetalBackend::register_shared_library_functions - Loaded AOTInductorModelContainerCreateWithDevice"); + + LOAD_SYMBOL(AOTInductorModelContainerDelete, so_handle); + ET_LOG( + Debug, + "MetalBackend::register_shared_library_functions - Loaded AOTInductorModelContainerDelete"); + + LOAD_SYMBOL(AOTInductorModelContainerGetNumInputs, so_handle); + ET_LOG( + Debug, + "MetalBackend::register_shared_library_functions - Loaded AOTInductorModelContainerGetNumInputs"); + + LOAD_SYMBOL(AOTInductorModelContainerGetNumConstants, so_handle); + ET_LOG( + Debug, + "MetalBackend::register_shared_library_functions - Loaded AOTInductorModelContainerGetNumConstants"); + + LOAD_SYMBOL(AOTInductorModelContainerGetInputName, so_handle); + ET_LOG( + Debug, + "MetalBackend::register_shared_library_functions - Loaded AOTInductorModelContainerGetInputName"); + + LOAD_SYMBOL(AOTInductorModelContainerGetNumOutputs, so_handle); + ET_LOG( + Debug, + "MetalBackend::register_shared_library_functions - Loaded AOTInductorModelContainerGetNumOutputs"); + + LOAD_SYMBOL(AOTInductorModelContainerRun, so_handle); + ET_LOG( + Debug, + "MetalBackend::register_shared_library_functions - Loaded AOTInductorModelContainerRun"); + + ET_LOG( + Debug, + "MetalBackend::register_shared_library_functions - All symbols loaded successfully"); + return Error::Ok; + } + + public: + // Once in program + MetalBackend() { + ET_LOG(Debug, "MetalBackend ctor"); + } + + bool is_available() const override { + return 1; + } + + // Once per loaded binary blob + Result init( + BackendInitContext& context, + FreeableBuffer* processed, // This will be a empty buffer + ArrayRef compile_specs // This will be my empty list + ) const override { + ET_LOG(Info, "MetalBackend::init - Starting initialization"); + + std::string method_name; + for (const CompileSpec& spec : compile_specs) { + if (std::strcmp(spec.key, "method_name") == 0) { + method_name.assign( + static_cast(spec.value.buffer), + spec.value.nbytes); // no nullptr guarantee, so pass size + break; + } + } + + std::string so_blob_key = + method_name.empty() ? "so_blob" : method_name + "_so_blob"; + ET_LOG(Info, "MetalBackend::init - so_blob_key: %s", so_blob_key.c_str()); + + const NamedDataMap* named_data_map = context.get_named_data_map(); + ET_LOG(Info, "MetalBackend::init - Got named data map: %p", named_data_map); + + ET_LOG( + Info, + "MetalBackend::init - Looking for blob key: %s", + so_blob_key.c_str()); + + auto aoti_metal_buffer = named_data_map->get_data(so_blob_key.c_str()); + ET_CHECK_OR_RETURN_ERROR( + aoti_metal_buffer.ok(), + Internal, + "Failed to get data for key %s: 0x%x", + so_blob_key.c_str(), + static_cast(aoti_metal_buffer.error())); + + ET_LOG( + Info, + "MetalBackend::init - Buffer is OK, size: %zu", + aoti_metal_buffer->size()); + + if (aoti_metal_buffer->data() == nullptr) { + ET_LOG(Error, "MetalBackend::init - Buffer data is null"); + return Error::InvalidArgument; + } + + ET_LOG( + Info, + "MetalBackend::init - Buffer data pointer: %p", + aoti_metal_buffer->data()); + + // Generate dynamic temporary file path + filesystem::path temp_dir = filesystem::temp_directory_path(); + filesystem::path so_path = + temp_dir / (so_blob_key + to_string(getpid()) + ".so"); + + // Create a temporary file + ET_LOG( + Info, "MetalBackend::init - Creating temp file: %s", so_path.c_str()); + ofstream outfile(so_path.c_str(), ios::binary); + + // Write the ELF buffer to the temporary file + ET_LOG( + Info, + "Writing %zu bytes to %s", + aoti_metal_buffer->size(), + so_path.c_str()); + + outfile.write( + static_cast(aoti_metal_buffer->data()), + aoti_metal_buffer->size()); + + ET_CHECK_OR_RETURN_ERROR( + outfile, AccessFailed, "Failed to write to file %s", so_path.c_str()); + + // Finish writing the file to disk + outfile.close(); + ET_LOG(Info, "MetalBackend::init - File closed successfully"); + + // Load the ELF using dlopen + void* so_handle = dlopen(so_path.c_str(), RTLD_LAZY | RTLD_LOCAL); + ET_CHECK_OR_RETURN_ERROR( + so_handle != nullptr, + AccessFailed, + "Failed to load shared library: %s", + dlerror()); + + processed->Free(); + + // Register all shared library functions + ET_CHECK_OK_OR_RETURN_ERROR(register_shared_library_functions(so_handle)); + + AOTInductorModelContainerHandle container_handle = nullptr; + ET_LOG( + Info, + "MetalBackend::init - About to create AOTI container with device='mps'"); + + ET_CHECK_OK_OR_RETURN_ERROR(AOTInductorModelContainerCreateWithDevice( + &container_handle, 1, "mps", nullptr)); + + ET_LOG(Info, "container_handle = %p", container_handle); + + AOTIDelegateHandle* handle = new AOTIDelegateHandle(); + handle->so_handle = so_handle; + handle->so_path = so_path.string(); + handle->container_handle = container_handle; + + ET_LOG(Info, "MetalBackend::init - Initialization completed successfully"); + return (DelegateHandle*)handle; // Return the handle post-processing + } + + // Once per execution + Error execute( + BackendExecutionContext& context, + DelegateHandle* handle_, + Span args) const override { + ET_LOG(Debug, "MetalBackend execute"); + + AOTIDelegateHandle* handle = (AOTIDelegateHandle*)handle_; + + // Need to re-register all the symbols from the so_handle hosted by this + // MetalBackend instance. The reason is that these symbols are + // static/singleton across the whole process. When we share multiple methods + // (meaning multiple so_handle) in the same process, we need to re-register + // the symbols from the so_handle that is being used in this execution. + ET_CHECK_OK_OR_RETURN_ERROR( + register_shared_library_functions(handle->so_handle)); + + ET_LOG(Debug, "MetalBackend Handle generated"); + + size_t n_inputs; + AOTInductorModelContainerGetNumInputs(handle->container_handle, &n_inputs); + + size_t n_outputs; + AOTInductorModelContainerGetNumOutputs( + handle->container_handle, &n_outputs); + + ET_LOG(Debug, "MetalBackend n_outputs %zd generated", n_outputs); + + ET_CHECK_OR_RETURN_ERROR( + n_inputs + n_outputs == args.size(), + InvalidArgument, + "number of user input %zd and output %zd generated from AOT Inductor does not match ET runner's %zd. Exit.", + n_inputs, + n_outputs, + args.size()) + + ET_LOG( + Debug, + "number of user input %zd and output %zd generated from AOT Inductor matches ET runner's %zd.", + n_inputs, + n_outputs, + args.size()); + + int32_t mps_device_type = aoti_torch_device_type_mps(); // Returns 13 + + // NOTE: ExecutorTorch tensors are always on CPU/host memory + // We need to create GPU copies for Metal kernel execution + std::vector gpu_inputs( + n_inputs); // GPU copies for kernel execution + std::vector gpu_outputs( + n_outputs); // GPU tensors for kernel output + + ET_LOG(Debug, "MetalBackend input/output vectors generated"); + + // Process input tensors: ExecutorTorch provides CPU tensors, create GPU + // copies + for (int i = 0; i < n_inputs; i++) { + ET_LOG(Debug, "Processing input %d from args to inputs vector", i); + ET_LOG( + Debug, "is %d input a tensor input? %d", i, int(args[i]->isTensor())); + + // Get tensor dimensions and properties from ExecutorTorch CPU tensor + auto cpu_tensor = &(args[i]->toTensor()); + auto sizes = cpu_tensor->sizes(); + auto scalar_type = cpu_tensor->scalar_type(); + ET_LOG( + Debug, + "MetalBackend input %d scalar_type=%d", + i, + static_cast(scalar_type)); + + // Create GPU tensor with same shape + std::vector sizes_vec(sizes.begin(), sizes.end()); + + AOTITensorHandle gpu_input_handle; + Error create_err = aoti_torch_empty_strided( + sizes_vec.size(), + sizes_vec.data(), + nullptr, // use default strides + static_cast(scalar_type), + mps_device_type, // device_type = mps + 0, // device_index = 0 + &gpu_input_handle); + + if (create_err != Error::Ok) { + ET_LOG(Error, "Failed to create GPU tensor for input %d", i); + return Error::Internal; + } + + // Log the created GPU tensor scalar type + auto gpu_tensor = reinterpret_cast( + gpu_input_handle); + ET_LOG( + Debug, + "MetalBackend created GPU tensor %d scalar_type=%d", + i, + static_cast(gpu_tensor->scalar_type())); + + gpu_inputs[i] = gpu_input_handle; + + // Log the CPU tensor data before copying to GPU + void* cpu_data = cpu_tensor->mutable_data_ptr(); + if (cpu_data && cpu_tensor->numel() > 0) { + float* cpu_float_data = (float*)cpu_data; + ET_LOG( + Debug, + "CPU input %d data before copy: [%.3f, %.3f, %.3f, ...] (numel=%zd)", + i, + cpu_float_data[0], + cpu_float_data[1], + cpu_float_data[2], + cpu_tensor->numel()); + } + + // Copy data from CPU to GPU + Error copy_err = aoti_torch_copy_(gpu_inputs[i], cpu_tensor, 0); + if (copy_err != Error::Ok) { + ET_LOG(Error, "Failed to copy input %d from CPU to GPU", i); + return Error::Internal; + } + + // Log the GPU tensor scalar type after copy + auto gpu_tensor_after = + reinterpret_cast( + gpu_inputs[i]); + ET_LOG( + Debug, + "MetalBackend GPU tensor %d scalar_type after copy=%d", + i, + static_cast(gpu_tensor_after->scalar_type())); + + ET_LOG(Debug, "Successfully copied input %d from CPU to GPU", i); + } + + ET_LOG(Debug, "MetalBackend GPU inputs generated"); + + // Process output tensors: create GPU counterparts for ExecutorTorch CPU + // tensors + for (int i = 0; i < n_outputs; i++) { + // Get output tensor dimensions from ExecutorTorch CPU tensor + auto cpu_output_tensor = &(args[i + n_inputs]->toTensor()); + auto sizes = cpu_output_tensor->sizes(); + auto scalar_type = cpu_output_tensor->scalar_type(); + ET_LOG( + Debug, + "MetalBackend output %d scalar_type=%d", + i, + static_cast(scalar_type)); + + // Create GPU tensor with same shape for kernel output + std::vector sizes_vec(sizes.begin(), sizes.end()); + + AOTITensorHandle gpu_output_handle; + Error create_err = aoti_torch_empty_strided( + sizes_vec.size(), + sizes_vec.data(), + nullptr, // use default strides + static_cast(scalar_type), + mps_device_type, // device_type = mps + 0, // device_index = 0 + &gpu_output_handle); + + if (create_err != Error::Ok) { + ET_LOG(Error, "Failed to create GPU tensor for output %d", i); + return Error::Internal; + } + + gpu_outputs[i] = gpu_output_handle; + ET_LOG(Debug, "Created GPU output tensor %d", i); + } + + ET_LOG(Debug, "MetalBackend output generated"); + + // Log tensor handles before passing to AOTI container + ET_LOG(Debug, "Passing to AOTInductorModelContainerRun:"); + for (int i = 0; i < n_inputs; i++) { + void* gpu_input_data = gpu_inputs[i]->mutable_data_ptr(); + ET_LOG( + Debug, + " gpu_inputs[%d] = %p, data_ptr = %p", + i, + gpu_inputs[i], + gpu_input_data); + } + for (int i = 0; i < n_outputs; i++) { + void* gpu_output_data = gpu_outputs[i]->mutable_data_ptr(); + ET_LOG( + Debug, + " gpu_outputs[%d] = %p, data_ptr = %p", + i, + gpu_outputs[i], + gpu_output_data); + } + + // Run AOTI container with GPU tensors + AOTIRuntimeError error = AOTInductorModelContainerRun( + handle->container_handle, + gpu_inputs.data(), // Use GPU input tensors + n_inputs, + gpu_outputs.data(), // Use GPU output tensors + n_outputs, + nullptr, // Pass the actual Metal stream! + nullptr); // proxy_executor_handle can remain nullptr + + if (error != Error::Ok) { + ET_LOG( + Error, + "AOTInductorModelContainerRun failed with error code %d", + error); + return Error::Internal; + } + + // Ensure all GPU work is completed before reading results + try { + synchronize_metal_stream(); + } catch (const std::exception& e) { + ET_LOG( + Error, + "Failed to synchronize Metal stream after kernel execution: %s", + e.what()); + return Error::Internal; + } catch (...) { + ET_LOG( + Error, + "Failed to synchronize Metal stream after kernel execution: unknown exception"); + return Error::Internal; + } + + ET_LOG(Debug, "MetalBackend running done and synchronized"); + + // Copy GPU output results back to CPU output tensors + for (int i = 0; i < n_outputs; i++) { + auto cpu_output_tensor = &(args[i + n_inputs]->toTensor()); + // For DYNAMIC_BOUND tensors we try to resize + ET_CHECK_OK_OR_RETURN_ERROR( + resize_tensor(*cpu_output_tensor, gpu_outputs[i]->sizes()), + "Error resizing tensor at output index %d", + i); + ET_CHECK_OK_OR_RETURN_ERROR( + aoti_torch_copy_(cpu_output_tensor, gpu_outputs[i], 0), + "Failed to copy GPU output %d back to CPU", + i); + ET_LOG(Debug, "Copied GPU output %d back to CPU", i); + } + + // Clean up GPU tensors that we created (ExecutorTorch tensors are always + // CPU, so all GPU tensors are our copies) + for (int i = 0; i < n_inputs; i++) { + // All GPU input tensors were created by us, delete them + aoti_torch_delete_tensor_object(gpu_inputs[i]); + } + + for (int i = 0; i < n_outputs; i++) { + // All GPU output tensors were created by us, delete them + aoti_torch_delete_tensor_object(gpu_outputs[i]); + } + + ET_LOG(Debug, "MetalBackend execution completed successfully"); + + return Error::Ok; + } + + void destroy(DelegateHandle* handle_) const override { + if (handle_ == nullptr) { + return; + } + AOTIDelegateHandle* handle = (AOTIDelegateHandle*)handle_; + + // NOTE: AOTInductorModelContainerDelete does not work correctly with + // multiple .so files. Deleting one container frees shared resources, + // which causes segmentation faults when attempting to delete other + // containers. As a workaround, we skip explicit container deletion + // and defer cleanup to the OS. + // TODO: Find a proper solution for safe container deletion. + // AOTInductorModelContainerDelete(handle->container_handle); + + // Now close the shared library + if (handle->so_handle != nullptr) { + dlclose(handle->so_handle); + } + + // Remove the temporary shared library file + if (!handle->so_path.empty()) { + std::error_code remove_error; + std::filesystem::remove(handle->so_path, remove_error); + ET_CHECK_OR_LOG_ERROR( + !remove_error, + "Failed to remove temporary shared library %s: %s", + handle->so_path.c_str(), + remove_error.message().c_str()); + if (!remove_error) { + ET_LOG( + Info, + "Removed temporary shared library file: %s", + handle->so_path.c_str()); + } + } + + delete handle; + cleanup_memory(); + executorch::backends::aoti::cleanup_tensor_metadata(); + ET_LOG(Debug, "MetalBackend handle %p destroy", handle_); + } +}; + +} // namespace executorch::backends::metal + +namespace executorch::backends { +namespace { +auto cls = metal::MetalBackend(); +executorch::runtime::Backend backend{"MetalBackend", &cls}; +static executorch::runtime::Error success_with_compiler = + register_backend(backend); +} // namespace +} // namespace executorch::backends diff --git a/examples/models/voxtral/CMakeLists.txt b/examples/models/voxtral/CMakeLists.txt index 3995f5533e6..866d17160ba 100644 --- a/examples/models/voxtral/CMakeLists.txt +++ b/examples/models/voxtral/CMakeLists.txt @@ -93,6 +93,11 @@ if(EXECUTORCH_BUILD_CUDA) executorch_target_link_options_shared_lib(aoti_cuda) endif() +if(EXECUTORCH_BUILD_METAL) + list(APPEND link_libraries metal_backend) + executorch_target_link_options_shared_lib(metal_backend) +endif() + # Add tokenizers list(APPEND link_libraries tokenizers::tokenizers) diff --git a/tools/cmake/executorch-config.cmake b/tools/cmake/executorch-config.cmake index ba18aede63e..e5b4881fe3b 100644 --- a/tools/cmake/executorch-config.cmake +++ b/tools/cmake/executorch-config.cmake @@ -63,6 +63,7 @@ set(optional_lib_list coreml_inmemoryfs coremldelegate mpsdelegate + metal_backend neuron_backend qnn_executorch_backend portable_ops_lib diff --git a/tools/cmake/preset/default.cmake b/tools/cmake/preset/default.cmake index 04e84622589..861e41e4a63 100644 --- a/tools/cmake/preset/default.cmake +++ b/tools/cmake/preset/default.cmake @@ -152,6 +152,9 @@ define_overridable_option( define_overridable_option( EXECUTORCH_BUILD_CUDA "Build the CUDA backend" BOOL OFF ) +define_overridable_option( + EXECUTORCH_BUILD_METAL "Build the Metal backend" BOOL OFF +) define_overridable_option( EXECUTORCH_BUILD_VGF "Build the Arm VGF backend" BOOL OFF ) @@ -389,6 +392,10 @@ check_required_options_on( IF_ON EXECUTORCH_BUILD_CUDA REQUIRES EXECUTORCH_BUILD_EXTENSION_TENSOR ) +check_required_options_on( + IF_ON EXECUTORCH_BUILD_METAL REQUIRES EXECUTORCH_BUILD_EXTENSION_TENSOR +) + if(NOT EXISTS ${EXECUTORCH_PAL_DEFAULT_FILE_PATH}) message( FATAL_ERROR