diff --git a/CMakeLists.txt b/CMakeLists.txt index 119bf8325c8c..ab9cbb8e9c66 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,6 +26,7 @@ endif() # Alernatively, use cmake -DOPTION=VALUE through command-line. tvm_option(USE_CUDA "Build with CUDA" OFF) tvm_option(USE_OPENCL "Build with OpenCL" OFF) +tvm_option(USE_OPENCL_ENABLE_HOST_PTR "Enable OpenCL memory object access to host" OFF) tvm_option(USE_OPENCL_GTEST "Path to OpenCL specific gtest version for runtime cpp tests." /path/to/opencl/gtest) tvm_option(USE_VULKAN "Build with Vulkan" OFF) diff --git a/cmake/config.cmake b/cmake/config.cmake index 679f5c459e87..e26f909ea277 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -71,6 +71,11 @@ set(USE_AOCL OFF) # - /path/to/opencl-sdk: use specific path to opencl-sdk set(USE_OPENCL OFF) +# Wheather to allow OPENCL cl_mem access to host +# cl_mem will be allocated with CL_MEM_ALLOC_HOST_PTR +# OpenCLWorkspace->GetHostPtr API returns the host accessible pointer +set(USE_OPENCL_ENABLE_HOST_PTR OFF) + # Whether enable Metal runtime set(USE_METAL OFF) diff --git a/cmake/modules/LibInfo.cmake b/cmake/modules/LibInfo.cmake index 7c24088c0ad2..042fa3c6ddd7 100644 --- a/cmake/modules/LibInfo.cmake +++ b/cmake/modules/LibInfo.cmake @@ -89,6 +89,7 @@ function(add_lib_info src_file) TVM_INFO_USE_MSVC_MT="${USE_MSVC_MT}" TVM_INFO_USE_NNPACK="${USE_NNPACK}" TVM_INFO_USE_OPENCL="${USE_OPENCL}" + TVM_INFO_USE_OPENCL_ENABLE_HOST_PTR="${USE_OPENCL_ENABLE_HOST_PTR}" TVM_INFO_USE_OPENCL_GTEST="${USE_OPENCL_GTEST}" TVM_INFO_USE_OPENMP="${USE_OPENMP}" TVM_INFO_USE_PAPI="${USE_PAPI}" diff --git a/cmake/modules/OpenCL.cmake b/cmake/modules/OpenCL.cmake index 1e1041efe386..ced2da2d17e3 100644 --- a/cmake/modules/OpenCL.cmake +++ b/cmake/modules/OpenCL.cmake @@ -74,6 +74,9 @@ if(USE_OPENCL) target_link_libraries(opencl-cpptest PRIVATE gtest_main tvm_runtime) endif() list(APPEND RUNTIME_SRCS ${RUNTIME_OPENCL_SRCS}) + if(USE_OPENCL_ENABLE_HOST_PTR) + add_definitions(-DOPENCL_ENABLE_HOST_PTR) + endif(USE_OPENCL_ENABLE_HOST_PTR) else() list(APPEND COMPILER_SRCS src/target/opt/build_opencl_off.cc) endif(USE_OPENCL) diff --git a/cmake/modules/contrib/CLML.cmake b/cmake/modules/contrib/CLML.cmake index 30e60423b03b..e86a7e1ae032 100644 --- a/cmake/modules/contrib/CLML.cmake +++ b/cmake/modules/contrib/CLML.cmake @@ -54,5 +54,7 @@ if(USE_CLML_GRAPH_EXECUTOR) file(GLOB RUNTIME_OPENCL_SRCS src/runtime/opencl/*.cc) list(APPEND RUNTIME_SRCS ${RUNTIME_OPENCL_SRCS}) set(USE_OPENCL ON) - + if(USE_OPENCL_ENABLE_HOST_PTR) + add_definitions(-DOPENCL_ENABLE_HOST_PTR) + endif(USE_OPENCL_ENABLE_HOST_PTR) endif() diff --git a/src/runtime/opencl/opencl_common.h b/src/runtime/opencl/opencl_common.h index f0a68864d724..7bbb358f8f92 100644 --- a/src/runtime/opencl/opencl_common.h +++ b/src/runtime/opencl/opencl_common.h @@ -212,6 +212,7 @@ inline cl_channel_type DTypeToOpenCLChannelType(DLDataType data_type) { } class OpenCLThreadEntry; +struct BufferDescriptor; /*! * \brief Process global OpenCL workspace. @@ -290,6 +291,7 @@ class OpenCLWorkspace : public DeviceAPI { void* AllocDataSpace(Device dev, size_t size, size_t alignment, DLDataType type_hint) final; void* AllocDataSpace(Device dev, int ndim, const int64_t* shape, DLDataType dtype, Optional mem_scope = NullOpt) final; + void* GetNativePtr(const tvm::runtime::NDArray& narr); void FreeDataSpace(Device dev, void* ptr) final; void StreamSync(Device dev, TVMStreamHandle stream) final; void* AllocWorkspace(Device dev, size_t size, DLDataType type_hint) final; @@ -310,6 +312,8 @@ class OpenCLWorkspace : public DeviceAPI { void CopyDataFromTo(DLTensor* from, DLTensor* to, TVMStreamHandle stream) final; + void* CreateHostPtrIfEnabled(BufferDescriptor* desc, Device dev, size_t size); + private: std::string GetError() { if (this->devices.size() == 0) return noDevicesErrorMsg; @@ -377,6 +381,7 @@ struct BufferDescriptor { static String ScopeFromMemoryLayout(MemoryLayout mem_scope); cl_mem buffer{nullptr}; + cl_uchar* host_ptr{nullptr}; MemoryLayout layout{MemoryLayout::kBuffer1D}; }; } // namespace cl diff --git a/src/runtime/opencl/opencl_device_api.cc b/src/runtime/opencl/opencl_device_api.cc index 1244fddf0983..aa31d80d6e8b 100644 --- a/src/runtime/opencl/opencl_device_api.cc +++ b/src/runtime/opencl/opencl_device_api.cc @@ -29,6 +29,12 @@ #include "opencl_common.h" +#ifdef OPENCL_ENABLE_HOST_PTR +#define CL_MEM_CREATE_FLAGS CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR +#else +#define CL_MEM_CREATE_FLAGS CL_MEM_READ_WRITE +#endif + namespace tvm { namespace runtime { namespace cl { @@ -191,6 +197,17 @@ void OpenCLWorkspace::GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv) } } +void* OpenCLWorkspace::CreateHostPtrIfEnabled(cl::BufferDescriptor* desc, Device dev, size_t size) { +#if defined(OPENCL_ENABLE_HOST_PTR) + cl_int err_code; + desc->host_ptr = reinterpret_cast( + clEnqueueMapBuffer(this->GetQueue(dev), desc->buffer, CL_TRUE, CL_MAP_WRITE, 0, + sizeof(cl_uchar) * size, 0, NULL, NULL, &err_code)); + OPENCL_CHECK_ERROR(err_code); +#endif // OPENCL_ENABLE_HOST_PTR + return desc; +} + void* OpenCLWorkspace::AllocDataSpace(Device dev, size_t size, size_t alignment, DLDataType type_hint) { this->Init(); @@ -201,10 +218,10 @@ void* OpenCLWorkspace::AllocDataSpace(Device dev, size_t size, size_t alignment, if (size == 0) { size = 1; } - desc->buffer = clCreateBuffer(this->context, CL_MEM_READ_WRITE, size, nullptr, &err_code); + desc->buffer = clCreateBuffer(this->context, CL_MEM_CREATE_FLAGS, size, nullptr, &err_code); desc->layout = cl::BufferDescriptor::MemoryLayout::kBuffer1D; OPENCL_CHECK_ERROR(err_code); - return desc; + return CreateHostPtrIfEnabled(desc, dev, size); } void* OpenCLWorkspace::AllocDataSpace(Device dev, int ndim, const int64_t* shape, DLDataType dtype, @@ -226,12 +243,21 @@ void* OpenCLWorkspace::AllocDataSpace(Device dev, int ndim, const int64_t* shape return desc; } +void* OpenCLWorkspace::GetNativePtr(const tvm::runtime::NDArray& narr) { + cl::BufferDescriptor* desc = static_cast(narr.operator->()->data); + return desc->host_ptr; +} + void OpenCLWorkspace::FreeDataSpace(Device dev, void* ptr) { // We have to make sure that the memory object is not in the command queue // for some OpenCL platforms. OPENCL_CALL(clFinish(this->GetQueue(dev))); cl::BufferDescriptor* desc = static_cast(ptr); + if (desc->host_ptr) { + clEnqueueUnmapMemObject(this->GetQueue(dev), desc->buffer, + reinterpret_cast(desc->host_ptr), 0, NULL, NULL); + } OPENCL_CALL(clReleaseMemObject(desc->buffer)); delete desc; } @@ -245,7 +271,7 @@ cl_mem OpenCLWorkspace::AllocTexture(Device dev, size_t width, size_t height, cl_image_format format = {CL_RGBA, cl_type}; cl_image_desc descriptor = {CL_MEM_OBJECT_IMAGE2D, width, height, 0, 0, 0, 0, 0, 0}; cl_mem mptr = - clCreateImage(this->context, CL_MEM_READ_WRITE, &format, &descriptor, nullptr, &err_code); + clCreateImage(this->context, CL_MEM_CREATE_FLAGS, &format, &descriptor, nullptr, &err_code); OPENCL_CHECK_ERROR(err_code); return mptr; } diff --git a/src/runtime/opencl/opencl_wrapper/opencl_wrapper.cc b/src/runtime/opencl/opencl_wrapper/opencl_wrapper.cc index c447ebcb5339..2c2768945424 100644 --- a/src/runtime/opencl/opencl_wrapper/opencl_wrapper.cc +++ b/src/runtime/opencl/opencl_wrapper/opencl_wrapper.cc @@ -173,6 +173,11 @@ using f_clEnqueueNDRangeKernel = cl_int (*)(cl_command_queue, cl_kernel, cl_uint cl_event*); using f_clCreateCommandQueue = cl_command_queue (*)(cl_context, cl_device_id, cl_command_queue_properties, cl_int*); +using f_clEnqueueUnmapMemObject = cl_int (*)(cl_command_queue, cl_mem, void*, cl_uint, + const cl_event*, cl_event*); +using f_clEnqueueMapBuffer = void* (*)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, size_t, + size_t, cl_uint, const cl_event*, cl_event*, cl_int*); + } // namespace cl_int clGetPlatformIDs(cl_uint num_entries, cl_platform_id* platforms, cl_uint* num_platforms) { @@ -572,3 +577,29 @@ cl_command_queue clCreateCommandQueue(cl_context context, cl_device_id device, return nullptr; } } + +cl_int clEnqueueUnmapMemObject(cl_command_queue queue, cl_mem memobj, void* mapped_ptr, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, + cl_event* event) { + auto& lib = LibOpenCLWrapper::getInstance(); + auto func = (f_clEnqueueUnmapMemObject)lib.getOpenCLFunction("clEnqueueUnmapMemObject"); + if (func) { + return func(queue, memobj, mapped_ptr, num_events_in_wait_list, event_wait_list, event); + } else { + return CL_INVALID_PLATFORM; + } +} + +void* clEnqueueMapBuffer(cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_map, + cl_map_flags map_flags, size_t offset, size_t cb, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, + cl_event* event, cl_int* errcode_ret) { + auto& lib = LibOpenCLWrapper::getInstance(); + auto func = (f_clEnqueueMapBuffer)lib.getOpenCLFunction("clEnqueueMapBuffer"); + if (func) { + return func(command_queue, buffer, blocking_map, map_flags, offset, cb, num_events_in_wait_list, + event_wait_list, event, errcode_ret); + } else { + return nullptr; + } +} diff --git a/src/support/libinfo.cc b/src/support/libinfo.cc index c0fc9881b4f5..7bb1e04920fa 100644 --- a/src/support/libinfo.cc +++ b/src/support/libinfo.cc @@ -43,6 +43,10 @@ #define TVM_INFO_USE_OPENCL "NOT-FOUND" #endif +#ifndef TVM_INFO_USE_OPENCL_ENABLE_HOST_PTR +#define TVM_INFO_USE_OPENCL_ENABLE_HOST_PTR "NOT-FOUND" +#endif + #ifndef TVM_INFO_USE_OPENCL_GTEST #define TVM_INFO_USE_OPENCL_GTEST "NOT-FOUND" #endif @@ -294,6 +298,7 @@ TVM_DLL Map GetLibInfo() { {"USE_MSVC_MT", TVM_INFO_USE_MSVC_MT}, {"USE_NNPACK", TVM_INFO_USE_NNPACK}, {"USE_OPENCL", TVM_INFO_USE_OPENCL}, + {"USE_OPENCL_ENABLE_HOST_PTR", TVM_INFO_USE_OPENCL_ENABLE_HOST_PTR}, {"USE_OPENCL_GTEST", TVM_INFO_USE_OPENCL_GTEST}, {"USE_OPENMP", TVM_INFO_USE_OPENMP}, {"USE_PAPI", TVM_INFO_USE_PAPI}, diff --git a/tests/cpp-runtime/opencl/opencl_nativeptr.cc b/tests/cpp-runtime/opencl/opencl_nativeptr.cc new file mode 100644 index 000000000000..ebfb62e92069 --- /dev/null +++ b/tests/cpp-runtime/opencl/opencl_nativeptr.cc @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include + +#include "../src/runtime/opencl/opencl_common.h" + +using namespace tvm::runtime; +using namespace tvm::runtime::cl; + +#if defined(OPENCL_ENABLE_HOST_PTR) +TEST(OpenCLNDArray, native_ptr) { + OpenCLWorkspace* workspace = OpenCLWorkspace::Global(); + + auto A = tvm::runtime::NDArray::Empty({128, 128}, {kDLFloat, 32, 1}, {kDLOpenCL, 0}); + void* nptr = workspace->GetNativePtr(A); + memset(nptr, 0x0, 128 * 128 * 4); +} +#endif