Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions sycl/plugins/CMakeLists.txt
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,8 @@ foreach(plugin ${SYCL_ENABLE_PLUGINS})
add_subdirectory(${plugin})
endforeach()

# level_zero plugin depends today on unified_runtime plugin
# and unified_runtime plugin is not an independent plugin, adding it explicitly
if ("level_zero" IN_LIST SYCL_ENABLE_PLUGINS)
add_subdirectory(unified_runtime)
endif()
1 change: 1 addition & 0 deletions sycl/plugins/level_zero/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ add_sycl_plugin(level_zero
LIBRARIES
"${LEVEL_ZERO_LOADER}"
Threads::Threads
unified_runtime
)

find_package(Python3 REQUIRED)
Expand Down
233 changes: 8 additions & 225 deletions sycl/plugins/level_zero/pi_level_zero.cpp
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -42,22 +42,6 @@ void disableZeTracing();

namespace {

// Controls Level Zero calls serialization to w/a Level Zero driver being not MT
// ready. Recognized values (can be used as a bit mask):
enum {
ZeSerializeNone =
0, // no locking or blocking (except when SYCL RT requested blocking)
ZeSerializeLock = 1, // locking around each ZE_CALL
ZeSerializeBlock =
2, // blocking ZE calls, where supported (usually in enqueue commands)
};
static const pi_uint32 ZeSerialize = [] {
const char *SerializeMode = std::getenv("ZE_SERIALIZE");
const pi_uint32 SerializeModeValue =
SerializeMode ? std::atoi(SerializeMode) : 0;
return SerializeModeValue;
}();

// This is an experimental option to test performance of device to device copy
// operations on copy engines (versus compute engine)
static const bool UseCopyEngineForD2DCopy = [] {
Expand Down Expand Up @@ -106,30 +90,6 @@ static const bool ReuseDiscardedEvents = [] {
return std::stoi(ReuseDiscardedEventsFlag) > 0;
}();

// This class encapsulates actions taken along with a call to Level Zero API.
class ZeCall {
private:
// The global mutex that is used for total serialization of Level Zero calls.
static std::mutex GlobalLock;

public:
ZeCall() {
if ((ZeSerialize & ZeSerializeLock) != 0) {
GlobalLock.lock();
}
}
~ZeCall() {
if ((ZeSerialize & ZeSerializeLock) != 0) {
GlobalLock.unlock();
}
}

// The non-static version just calls static one.
ze_result_t doCall(ze_result_t ZeResult, const char *ZeName,
const char *ZeArgs, bool TraceError = true);
};
std::mutex ZeCall::GlobalLock;

// Controls PI level tracing prints.
static bool PrintPiTrace = false;

Expand All @@ -139,45 +99,14 @@ static const bool IndirectAccessTrackingEnabled = [] {
nullptr;
}();

// Map Level Zero runtime error code to PI error code.
static pi_result mapError(ze_result_t ZeResult) {
// TODO: these mapping need to be clarified and synced with the PI API return
// values, which is TBD.
static std::unordered_map<ze_result_t, pi_result> ErrorMapping = {
{ZE_RESULT_SUCCESS, PI_SUCCESS},
{ZE_RESULT_ERROR_DEVICE_LOST, PI_ERROR_DEVICE_NOT_FOUND},
{ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS, PI_ERROR_INVALID_OPERATION},
{ZE_RESULT_ERROR_NOT_AVAILABLE, PI_ERROR_INVALID_OPERATION},
{ZE_RESULT_ERROR_UNINITIALIZED, PI_ERROR_INVALID_PLATFORM},
{ZE_RESULT_ERROR_INVALID_ARGUMENT, PI_ERROR_INVALID_ARG_VALUE},
{ZE_RESULT_ERROR_INVALID_NULL_POINTER, PI_ERROR_INVALID_VALUE},
{ZE_RESULT_ERROR_INVALID_SIZE, PI_ERROR_INVALID_VALUE},
{ZE_RESULT_ERROR_UNSUPPORTED_SIZE, PI_ERROR_INVALID_VALUE},
{ZE_RESULT_ERROR_UNSUPPORTED_ALIGNMENT, PI_ERROR_INVALID_VALUE},
{ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT, PI_ERROR_INVALID_EVENT},
{ZE_RESULT_ERROR_INVALID_ENUMERATION, PI_ERROR_INVALID_VALUE},
{ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION, PI_ERROR_INVALID_VALUE},
{ZE_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT, PI_ERROR_INVALID_VALUE},
{ZE_RESULT_ERROR_INVALID_NATIVE_BINARY, PI_ERROR_INVALID_BINARY},
{ZE_RESULT_ERROR_INVALID_KERNEL_NAME, PI_ERROR_INVALID_KERNEL_NAME},
{ZE_RESULT_ERROR_INVALID_FUNCTION_NAME, PI_ERROR_BUILD_PROGRAM_FAILURE},
{ZE_RESULT_ERROR_OVERLAPPING_REGIONS, PI_ERROR_INVALID_OPERATION},
{ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION,
PI_ERROR_INVALID_WORK_GROUP_SIZE},
{ZE_RESULT_ERROR_MODULE_BUILD_FAILURE, PI_ERROR_BUILD_PROGRAM_FAILURE},
{ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, PI_ERROR_OUT_OF_RESOURCES},
{ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, PI_ERROR_OUT_OF_HOST_MEMORY}};

auto It = ErrorMapping.find(ZeResult);
if (It == ErrorMapping.end()) {
return PI_ERROR_UNKNOWN;
}
return It->second;
}

// This will count the calls to Level-Zero
static std::map<const char *, int> *ZeCallCount = nullptr;

// Map from L0 to PI result
static inline pi_result mapError(ze_result_t Result) {
return ur2piResult(ze2urResult(Result));
}

// Trace a call to Level-Zero RT
#define ZE_CALL(ZeName, ZeArgs) \
{ \
Expand All @@ -186,9 +115,6 @@ static std::map<const char *, int> *ZeCallCount = nullptr;
return mapError(Result); \
}

#define ZE_CALL_NOCHECK(ZeName, ZeArgs) \
ZeCall().doCall(ZeName ZeArgs, #ZeName, #ZeArgs, false)

// Trace an internal PI call; returns in case of an error.
#define PI_CALL(Call) \
{ \
Expand All @@ -199,29 +125,6 @@ static std::map<const char *, int> *ZeCallCount = nullptr;
return Result; \
}

enum DebugLevel {
ZE_DEBUG_NONE = 0x0,
ZE_DEBUG_BASIC = 0x1,
ZE_DEBUG_VALIDATION = 0x2,
ZE_DEBUG_CALL_COUNT = 0x4,
ZE_DEBUG_ALL = -1
};

// Controls Level Zero calls tracing.
static const int ZeDebug = [] {
const char *DebugMode = std::getenv("ZE_DEBUG");
return DebugMode ? std::atoi(DebugMode) : ZE_DEBUG_NONE;
}();

static void zePrint(const char *Format, ...) {
if (ZeDebug & ZE_DEBUG_BASIC) {
va_list Args;
va_start(Args, Format);
vfprintf(stderr, Format, Args);
va_end(Args);
}
}

// Controls if we should choose doing eager initialization
// to make it happen on warmup paths and have the reportable
// paths be less likely affected.
Expand Down Expand Up @@ -459,10 +362,6 @@ static sycl::detail::SpinLock *PiPlatformsCacheMutex =
new sycl::detail::SpinLock;
static bool PiPlatformCachePopulated = false;

// Flags which tell whether various Level Zero extensions are available.
static bool PiDriverGlobalOffsetExtensionFound = false;
static bool PiDriverModuleProgramExtensionFound = false;

pi_result
_pi_context::getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &Pool,
size_t &Index, bool HostVisible,
Expand Down Expand Up @@ -2313,123 +2212,6 @@ static ze_result_t
checkUnresolvedSymbols(ze_module_handle_t ZeModule,
ze_module_build_log_handle_t *ZeBuildLog);

// This function will ensure compatibility with both Linux and Windows for
// setting environment variables.
static bool setEnvVar(const char *name, const char *value) {
#ifdef _WIN32
int Res = _putenv_s(name, value);
#else
int Res = setenv(name, value, 1);
#endif
if (Res != 0) {
zePrint(
"Level Zero plugin was unable to set the environment variable: %s\n",
name);
return false;
}
return true;
}

static class ZeUSMImportExtension {
// Pointers to functions that import/release host memory into USM
ze_result_t (*zexDriverImportExternalPointer)(ze_driver_handle_t hDriver,
void *, size_t);
ze_result_t (*zexDriverReleaseImportedPointer)(ze_driver_handle_t, void *);

public:
// Whether user has requested Import/Release, and platform supports it.
bool Enabled;

ZeUSMImportExtension() : Enabled{false} {}

void setZeUSMImport(pi_platform Platform) {
// Whether env var SYCL_USM_HOSTPTR_IMPORT has been set requesting
// host ptr import during buffer creation.
const char *USMHostPtrImportStr = std::getenv("SYCL_USM_HOSTPTR_IMPORT");
if (!USMHostPtrImportStr || std::atoi(USMHostPtrImportStr) == 0)
return;

// Check if USM hostptr import feature is available.
ze_driver_handle_t driverHandle = Platform->ZeDriver;
if (ZE_CALL_NOCHECK(zeDriverGetExtensionFunctionAddress,
(driverHandle, "zexDriverImportExternalPointer",
reinterpret_cast<void **>(
&zexDriverImportExternalPointer))) == 0) {
ZE_CALL_NOCHECK(
zeDriverGetExtensionFunctionAddress,
(driverHandle, "zexDriverReleaseImportedPointer",
reinterpret_cast<void **>(&zexDriverReleaseImportedPointer)));
// Hostptr import/release is turned on because it has been requested
// by the env var, and this platform supports the APIs.
Enabled = true;
// Hostptr import is only possible if piMemBufferCreate receives a
// hostptr as an argument. The SYCL runtime passes a host ptr
// only when SYCL_HOST_UNIFIED_MEMORY is enabled. Therefore we turn it on.
setEnvVar("SYCL_HOST_UNIFIED_MEMORY", "1");
}
}
void doZeUSMImport(ze_driver_handle_t driverHandle, void *HostPtr,
size_t Size) {
ZE_CALL_NOCHECK(zexDriverImportExternalPointer,
(driverHandle, HostPtr, Size));
}
void doZeUSMRelease(ze_driver_handle_t driverHandle, void *HostPtr) {
ZE_CALL_NOCHECK(zexDriverReleaseImportedPointer, (driverHandle, HostPtr));
}
} ZeUSMImport;

pi_result _pi_platform::initialize() {
// Cache driver properties
ZeStruct<ze_driver_properties_t> ZeDriverProperties;
ZE_CALL(zeDriverGetProperties, (ZeDriver, &ZeDriverProperties));
uint32_t DriverVersion = ZeDriverProperties.driverVersion;
// Intel Level-Zero GPU driver stores version as:
// | 31 - 24 | 23 - 16 | 15 - 0 |
// | Major | Minor | Build |
auto VersionMajor = std::to_string((DriverVersion & 0xFF000000) >> 24);
auto VersionMinor = std::to_string((DriverVersion & 0x00FF0000) >> 16);
auto VersionBuild = std::to_string(DriverVersion & 0x0000FFFF);
ZeDriverVersion = VersionMajor + "." + VersionMinor + "." + VersionBuild;

ZE_CALL(zeDriverGetApiVersion, (ZeDriver, &ZeApiVersion));
ZeDriverApiVersion = std::to_string(ZE_MAJOR_VERSION(ZeApiVersion)) + "." +
std::to_string(ZE_MINOR_VERSION(ZeApiVersion));

// Cache driver extension properties
uint32_t Count = 0;
ZE_CALL(zeDriverGetExtensionProperties, (ZeDriver, &Count, nullptr));

std::vector<ze_driver_extension_properties_t> zeExtensions(Count);

ZE_CALL(zeDriverGetExtensionProperties,
(ZeDriver, &Count, zeExtensions.data()));

for (auto extension : zeExtensions) {
// Check if global offset extension is available
if (strncmp(extension.name, ZE_GLOBAL_OFFSET_EXP_NAME,
strlen(ZE_GLOBAL_OFFSET_EXP_NAME) + 1) == 0) {
if (extension.version == ZE_GLOBAL_OFFSET_EXP_VERSION_1_0) {
PiDriverGlobalOffsetExtensionFound = true;
}
}
// Check if extension is available for "static linking" (compiling multiple
// SPIR-V modules together into one Level Zero module).
if (strncmp(extension.name, ZE_MODULE_PROGRAM_EXP_NAME,
strlen(ZE_MODULE_PROGRAM_EXP_NAME) + 1) == 0) {
if (extension.version == ZE_MODULE_PROGRAM_EXP_VERSION_1_0) {
PiDriverModuleProgramExtensionFound = true;
}
}
zeDriverExtensionMap[extension.name] = extension.version;
}

// Check if import user ptr into USM feature has been requested.
// If yes, then set up L0 API pointers if the platform supports it.
ZeUSMImport.setZeUSMImport(this);

return PI_SUCCESS;
}

pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms,
pi_uint32 *NumPlatforms) {

Expand Down Expand Up @@ -4886,7 +4668,8 @@ pi_result piProgramLink(pi_context Context, pi_uint32 NumDevices,
// input module.
//
// TODO: Remove this workaround when the driver is fixed.
if (!PiDriverModuleProgramExtensionFound || (NumInputPrograms == 1)) {
if (!DeviceList[0]->Platform->ZeDriverModuleProgramExtensionFound ||
(NumInputPrograms == 1)) {
if (NumInputPrograms == 1) {
ZeModuleDesc.pNext = nullptr;
ZeModuleDesc.inputSize = ZeExtModuleDesc.inputSizes[0];
Expand Down Expand Up @@ -5525,7 +5308,7 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
std::scoped_lock<pi_shared_mutex, pi_shared_mutex, pi_shared_mutex> Lock(
Queue->Mutex, Kernel->Mutex, Kernel->Program->Mutex);
if (GlobalWorkOffset != NULL) {
if (!PiDriverGlobalOffsetExtensionFound) {
if (!Queue->Device->Platform->ZeDriverGlobalOffsetExtensionFound) {
zePrint("No global offset extension found on this driver\n");
return PI_ERROR_INVALID_VALUE;
}
Expand Down
Loading