diff --git a/cmake/config.cmake b/cmake/config.cmake index 26d50630f7d3..0d912c0c75de 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -483,3 +483,6 @@ SET(CMAKE_VS_PLATFORM_NAME_DEFAULT "x64") # Set Windows Visual Studio default host (equivalent to -Thost=x64) SET(CMAKE_VS_PLATFORM_TOOLSET_HOST_ARCHITECTURE "x64") + +# Enable Qualcomm OpenCL extension support +set(USE_OPENCL_EXTN_QCOM OFF) diff --git a/cmake/modules/OpenCL.cmake b/cmake/modules/OpenCL.cmake index ddcd1e4190d1..67d739bb63a0 100644 --- a/cmake/modules/OpenCL.cmake +++ b/cmake/modules/OpenCL.cmake @@ -84,7 +84,7 @@ if(USE_OPENCL) "tests/cpp-runtime/opencl/*.cc" ) add_executable(opencl-cpptest ${OPENCL_TEST_SRCS}) - target_link_libraries(opencl-cpptest PRIVATE gtest_main tvm_runtime) + target_link_libraries(opencl-cpptest PRIVATE gtest_main tvm_runtime ${OpenCL_LIBRARIES}) else() message(STATUS "Couldn't build OpenCL-Gtests") endif() @@ -93,6 +93,17 @@ if(USE_OPENCL) if(USE_OPENCL_ENABLE_HOST_PTR) add_definitions(-DOPENCL_ENABLE_HOST_PTR) endif(USE_OPENCL_ENABLE_HOST_PTR) + if(USE_OPENCL_EXTN_QCOM) + add_definitions(-DUSE_OPENCL_EXTN_QCOM) + find_path(ocl_header cl.h HINTS ${OpenCL_INCLUDE_DIRS} PATH_SUFFIXES CL) + set(OCL_VERSION_HEADER "${ocl_header}/cl.h") + if(EXISTS ${OCL_VERSION_HEADER}) + file(READ ${OCL_VERSION_HEADER} ver) + string(REGEX MATCH "CL_TARGET_OPENCL_VERSION ([0-9]*)" _ ${ver}) + add_definitions(-DCL_TARGET_OPENCL_VERSION=${CMAKE_MATCH_1}) + message(STATUS "Set OpenCL Target version to " ${CMAKE_MATCH_1}) + endif() + endif(USE_OPENCL_EXTN_QCOM) else() list(APPEND COMPILER_SRCS src/target/opt/build_opencl_off.cc) endif(USE_OPENCL) diff --git a/cmake/modules/contrib/CLML.cmake b/cmake/modules/contrib/CLML.cmake index e658f15865df..118091696a9f 100644 --- a/cmake/modules/contrib/CLML.cmake +++ b/cmake/modules/contrib/CLML.cmake @@ -77,7 +77,7 @@ if(USE_CLML_GRAPH_EXECUTOR) message(STATUS "Enable OpenCL as fallback to CLML") file(GLOB RUNTIME_OPENCL_SRCS src/runtime/opencl/*.cc) list(APPEND RUNTIME_SRCS ${RUNTIME_OPENCL_SRCS}) - set(USE_OPENCL ON) + set(USE_OPENCL ${CLML_PATH}) if(USE_OPENCL_ENABLE_HOST_PTR) add_definitions(-DOPENCL_ENABLE_HOST_PTR) endif(USE_OPENCL_ENABLE_HOST_PTR) diff --git a/cmake/utils/FindOpenCL.cmake b/cmake/utils/FindOpenCL.cmake index 8eb35ab3993e..13ffa7159381 100644 --- a/cmake/utils/FindOpenCL.cmake +++ b/cmake/utils/FindOpenCL.cmake @@ -46,7 +46,7 @@ macro(find_opencl use_opencl) endif() if(__opencl_sdk) - set(OpenCL_INCLUDE_DIRS ${__opencl_sdk}/include) + set(OpenCL_INCLUDE_DIRS ${__opencl_sdk}/include ${__opencl_sdk}) if (CMAKE_FIND_ROOT_PATH_MODE_LIBRARY STREQUAL "ONLY") set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) endif() diff --git a/src/runtime/opencl/opencl_common.h b/src/runtime/opencl/opencl_common.h index f752a487ea7e..e0abd1841b64 100644 --- a/src/runtime/opencl/opencl_common.h +++ b/src/runtime/opencl/opencl_common.h @@ -50,12 +50,17 @@ * files. This also allows us to expose the OpenCL version through * tvm.runtime.Device. */ +#if !defined(CL_TARGET_OPENCL_VERSION) #define CL_TARGET_OPENCL_VERSION 120 +#endif #ifdef __APPLE__ #include #else #include +#ifdef USE_OPENCL_EXTN_QCOM +#include +#endif #endif #include @@ -254,8 +259,13 @@ class OpenCLWorkspace : public DeviceAPI { } // Initialize the device. void Init(const std::string& type_key, const std::string& device_type, - const std::string& platform_name = ""); + const std::string& platform_name = "", cl_context_properties properties[] = nullptr); virtual void Init() { Init(this->type_key, "gpu"); } + virtual bool Init(cl_context_properties ctx_props[]) { + if (!contexts.empty()) return false; + Init(this->type_key, "gpu", "", ctx_props); + return true; + } // Check whether the context is OpenCL or not. virtual bool IsOpenCLDevice(Device dev) { return dev.device_type == kDLOpenCL; } // get the queue of the device @@ -314,6 +324,8 @@ class OpenCLWorkspace : public DeviceAPI { void* AllocDataSpace(Device dev, int ndim, const int64_t* shape, DLDataType dtype, Optional mem_scope = NullOpt) final; void* GetNativePtr(const tvm::runtime::NDArray& narr); + void SetNativePtr(const tvm::runtime::NDArray& narr, void* host_ptr, size_t buf_size); + void SetPerfHint(Device dev, cl_uint perf_hint); void FreeDataSpace(Device dev, void* ptr) final; void StreamSync(Device dev, TVMStreamHandle stream) final; void* AllocWorkspace(Device dev, size_t size, DLDataType type_hint) final; diff --git a/src/runtime/opencl/opencl_device_api.cc b/src/runtime/opencl/opencl_device_api.cc index 0057d0a10102..7b161e8932be 100644 --- a/src/runtime/opencl/opencl_device_api.cc +++ b/src/runtime/opencl/opencl_device_api.cc @@ -277,6 +277,47 @@ void* OpenCLWorkspace::GetNativePtr(const tvm::runtime::NDArray& narr) { return desc->host_ptr; } +void OpenCLWorkspace::SetNativePtr(const tvm::runtime::NDArray& narr, void* host_ptr, + size_t buf_size) { + cl::BufferDescriptor* desc = static_cast(narr.operator->()->data); + + this->Init(); + if (desc->layout == cl::BufferDescriptor::MemoryLayout::kBuffer1D) { +#ifdef USE_OPENCL_EXTN_QCOM + Device dev = narr.operator->()->device; + cl_device_id device_id = GetCLDeviceID(dev.device_id); + auto platform = device_info[device_id].platform_id; + + OPENCL_CALL(clFinish(this->GetQueue(dev))); + if (desc->host_ptr) { + OPENCL_CALL(clEnqueueUnmapMemObject(this->GetQueue(dev), desc->buffer, + reinterpret_cast(desc->host_ptr), 0, nullptr, + nullptr)); + desc->host_ptr = nullptr; + } + OPENCL_CALL(clReleaseMemObject(desc->buffer)); + + cl_int err_code; + desc->buffer = + clCreateBuffer(this->contexts[platform], + CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_EXT_HOST_PTR_QCOM, buf_size, + host_ptr, &err_code); + desc->layout = cl::BufferDescriptor::MemoryLayout::kBuffer1D; + OPENCL_CHECK_ERROR(err_code); +#endif + } else { + LOG(FATAL) << "Native Ptr not enabled over image objects"; + } +} + +void OpenCLWorkspace::SetPerfHint(Device dev, cl_uint perf_hint) { +#ifdef CL_CONTEXT_PERF_HINT_QCOM + cl_device_id device_id = GetCLDeviceID(dev.device_id); + auto platform = device_info[device_id].platform_id; + OPENCL_CALL(clSetPerfHintQCOM(this->contexts[platform], perf_hint)); +#endif +} + void OpenCLWorkspace::FreeDataSpace(Device dev, void* ptr) { // We have to make sure that the memory object is not in the command queue // for some OpenCL platforms. @@ -284,8 +325,9 @@ void OpenCLWorkspace::FreeDataSpace(Device dev, void* ptr) { cl::BufferDescriptor* desc = static_cast(ptr); if (desc->host_ptr) { - clEnqueueUnmapMemObject(this->GetQueue(dev), desc->buffer, - reinterpret_cast(desc->host_ptr), 0, nullptr, nullptr); + OPENCL_CALL(clEnqueueUnmapMemObject(this->GetQueue(dev), desc->buffer, + reinterpret_cast(desc->host_ptr), 0, nullptr, + nullptr)); } OPENCL_CALL(clReleaseMemObject(desc->buffer)); delete desc; @@ -473,7 +515,7 @@ bool MatchPlatformInfo(cl_platform_id pid, cl_platform_info param_name, std::str } void OpenCLWorkspace::Init(const std::string& type_key, const std::string& device_type, - const std::string& platform_name) { + const std::string& platform_name, cl_context_properties ctx_props[]) { if (initialized_) return; std::lock_guard lock(this->mu); if (initialized_) return; @@ -539,7 +581,7 @@ void OpenCLWorkspace::Init(const std::string& type_key, const std::string& devic for (auto& [platform, devices] : device_map) { this->platform_ids.push_back(platform); this->contexts[platform] = - clCreateContext(nullptr, devices.size(), &(devices[0]), nullptr, nullptr, &err_code); + clCreateContext(ctx_props, devices.size(), &(devices[0]), nullptr, nullptr, &err_code); this->devices.insert(this->devices.end(), devices.begin(), devices.end()); for (size_t i = 0; i < devices.size(); ++i) { cl_device_id did = devices[i]; diff --git a/src/support/libinfo.cc b/src/support/libinfo.cc index 2d1c33cbf282..f1768dfd77a8 100644 --- a/src/support/libinfo.cc +++ b/src/support/libinfo.cc @@ -63,6 +63,10 @@ #define TVM_INFO_USE_OPENCL_ENABLE_HOST_PTR "NOT-FOUND" #endif +#ifndef TVM_INFO_USE_OPENCL_EXTN_QCOM +#define TVM_INFO_USE_OPENCL_EXTN_QCOM "NOT-FOUND" +#endif + #ifndef TVM_INFO_USE_OPENCL_GTEST #define TVM_INFO_USE_OPENCL_GTEST "NOT-FOUND" #endif @@ -362,6 +366,7 @@ TVM_DLL Map GetLibInfo() { {"USE_NNPACK", TVM_INFO_USE_NNPACK}, {"USE_OPENCL", TVM_INFO_USE_OPENCL}, {"USE_OPENCL_ENABLE_HOST_PTR", TVM_INFO_USE_OPENCL_ENABLE_HOST_PTR}, + {"USE_OPENCL_EXTN_QCOM", TVM_INFO_USE_OPENCL_EXTN_QCOM}, {"USE_OPENCL_GTEST", TVM_INFO_USE_OPENCL_GTEST}, {"USE_OPENMP", TVM_INFO_USE_OPENMP}, {"USE_PAPI", TVM_INFO_USE_PAPI}, diff --git a/tests/cpp-runtime/opencl/aa_opencl_qcom_extn.cc b/tests/cpp-runtime/opencl/aa_opencl_qcom_extn.cc new file mode 100644 index 000000000000..1f3dc2057aee --- /dev/null +++ b/tests/cpp-runtime/opencl/aa_opencl_qcom_extn.cc @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +// Note:: This should be first tests to be executed. +// hence, crafted the filename accordingly + +#include +#include + +#include "../src/runtime/opencl/opencl_common.h" + +using namespace tvm::runtime; +using namespace tvm::runtime::cl; + +#ifdef USE_OPENCL_EXTN_QCOM +#pragma message("Qualcomm OpenCL Extn GTests: enabled") +TEST(QCOMExtn, ContextPriorityHint) { + OpenCLWorkspace* workspace = OpenCLWorkspace::Global(); + cl_context_properties properties[] = {CL_CONTEXT_PRIORITY_HINT_QCOM, CL_PRIORITY_HINT_LOW_QCOM, + 0}; + // Only allow one time + ASSERT_EQ(workspace->Init(properties), true); + // Subsequent calls will be failure + ASSERT_EQ(workspace->Init(properties), false); +} + +TEST(QCOMExtn, ContextPerfHint) { + OpenCLWorkspace* workspace = OpenCLWorkspace::Global(); + auto dev = DLDevice{kDLOpenCL, 0}; + workspace->SetPerfHint(dev, CL_PERF_HINT_HIGH_QCOM); +} +#else +#pragma message("Qualcomm OpenCL Extn GTests: disabled") +#endif diff --git a/tests/scripts/task_build_adreno_bins.sh b/tests/scripts/task_build_adreno_bins.sh index 38eefd93a692..412af4928123 100755 --- a/tests/scripts/task_build_adreno_bins.sh +++ b/tests/scripts/task_build_adreno_bins.sh @@ -50,6 +50,8 @@ echo set\(MACHINE_NAME aarch64-linux-gnu\) >> config.cmake echo set\(USE_OPENCL_GTEST ON\) >> config.cmake +echo set\(USE_OPENCL_EXTN_QCOM ON\) >> config.cmake + cmake -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK_HOME}/build/cmake/android.toolchain.cmake" \ -DANDROID_ABI=arm64-v8a \ -DANDROID_PLATFORM=android-28 \