Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions layers/10_cmdbufemu/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ The following environment variables can modify the behavior of the command buffe
| Environment Variable | Behavior | Example Format |
|----------------------|----------|-----------------|
| `CMDBUFEMU_EnhancedErrorChecking` | Enables additional error checking when commands are added to a command buffer using a command buffer "test queue". By default, the additional error checking is disabled. | `export CMDBUFEMU_EnhancedErrorChecking=1`<br/><br/>`set CMDBUFEMU_EnhancedErrorChecking=1` |
| `CMDBUFEMU_KernelForProfiling` | Enables use of an empty kernel for event profiling instead of event profiling on a command-queue barrier. By default, to minimize overhead, the empty kernel is not used. | `export CMDBUFEMU_KernelForProfiling=1`<br/><br/>`set CMDBUFEMU_KernelForProfiling=1` |

## Known Limitations

Expand Down
112 changes: 100 additions & 12 deletions layers/10_cmdbufemu/emulate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,26 @@ const cl_mutable_dispatch_fields_khr g_MutableDispatchCaps =
CL_MUTABLE_DISPATCH_ARGUMENTS_KHR |
CL_MUTABLE_DISPATCH_EXEC_INFO_KHR;

static cl_int enqueueProfilingKernel(
cl_command_queue queue,
cl_kernel kernel,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event )
{
const size_t one = 1;
return g_pNextDispatch->clEnqueueNDRangeKernel(
queue,
kernel,
1,
nullptr,
&one,
nullptr,
num_events_in_wait_list,
event_wait_list,
event );
}

typedef struct _cl_mutable_command_khr
{
static bool isValid( cl_mutable_command_khr command )
Expand Down Expand Up @@ -1229,6 +1249,7 @@ typedef struct _cl_command_buffer_khr
(props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) == 0 );

cmdbuf->setupTestQueue(queue);
cmdbuf->setupProfilingKernel(queue);
}
}

Expand All @@ -1254,6 +1275,11 @@ typedef struct _cl_command_buffer_khr
{
g_pNextDispatch->clReleaseCommandQueue(queue);
}

for( auto kernel : ProfilingKernels )
{
g_pNextDispatch->clReleaseKernel(kernel);
}
}

static bool isValid( cl_command_buffer_khr cmdbuf )
Expand Down Expand Up @@ -1297,20 +1323,17 @@ typedef struct _cl_command_buffer_khr

cl_command_queue getQueue() const
{
if( Queues.size() > 0 )
{
return Queues[0];
}
return nullptr;
return Queues.empty() ? nullptr : Queues[0];
}

cl_command_queue getTestQueue() const
{
if( TestQueues.size() > 0 )
{
return TestQueues[0];
}
return nullptr;
return TestQueues.empty() ? nullptr : TestQueues[0];
}

cl_kernel getProfilingKernel() const
{
return ProfilingKernels.empty() ? nullptr : ProfilingKernels[0];
}

cl_mutable_dispatch_asserts_khr getMutableDispatchAsserts() const
Expand Down Expand Up @@ -1671,6 +1694,7 @@ typedef struct _cl_command_buffer_khr
std::vector<bool> IsInOrder;
std::vector<cl_command_queue> TestQueues;
std::vector<cl_event> BlockingEvents;
std::vector<cl_kernel> ProfilingKernels;

std::vector<std::unique_ptr<Command>> Commands;
std::atomic<uint32_t> NextSyncPoint;
Expand Down Expand Up @@ -1747,6 +1771,52 @@ typedef struct _cl_command_buffer_khr
}
}

void setupProfilingKernel(cl_command_queue queue)
{
if( g_KernelForProfiling )
{
cl_context context = nullptr;
g_pNextDispatch->clGetCommandQueueInfo(
queue,
CL_QUEUE_CONTEXT,
sizeof(context),
&context,
nullptr );

cl_device_id device = nullptr;
g_pNextDispatch->clGetCommandQueueInfo(
queue,
CL_QUEUE_DEVICE,
sizeof(device),
&device,
nullptr );

const char* kernelString = "kernel void Empty() {}";
cl_program program = g_pNextDispatch->clCreateProgramWithSource(
context,
1,
&kernelString,
nullptr,
nullptr );
g_pNextDispatch->clBuildProgram(
program,
1,
&device,
nullptr,
nullptr,
nullptr );

cl_kernel kernel = g_pNextDispatch->clCreateKernel(
program,
"Empty",
nullptr );
g_pNextDispatch->clReleaseProgram(
program );

ProfilingKernels.push_back(kernel);
}
}

_cl_command_buffer_khr(
cl_command_buffer_flags_khr flags,
cl_mutable_dispatch_asserts_khr mutableDispatchAsserts) :
Expand Down Expand Up @@ -1993,7 +2063,16 @@ cl_int CL_API_CALL clEnqueueCommandBufferKHR_EMU(
queue,
num_events_in_wait_list,
event_wait_list,
event ? &startEvent : nullptr);
event == nullptr || g_KernelForProfiling ? nullptr : &startEvent );
if( errorCode == CL_SUCCESS && event && g_KernelForProfiling )
{
errorCode = enqueueProfilingKernel(
queue,
cmdbuf->getProfilingKernel(),
0,
nullptr,
&startEvent );
}
}

if( errorCode == CL_SUCCESS )
Expand All @@ -2007,7 +2086,16 @@ cl_int CL_API_CALL clEnqueueCommandBufferKHR_EMU(
queue,
0,
nullptr,
event );
g_KernelForProfiling ? nullptr : event );
if( errorCode == CL_SUCCESS && g_KernelForProfiling )
{
errorCode = enqueueProfilingKernel(
queue,
cmdbuf->getProfilingKernel(),
0,
nullptr,
event );
}
}

if( event )
Expand Down
1 change: 1 addition & 0 deletions layers/10_cmdbufemu/emulate.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <map>

extern bool g_EnhancedErrorChecking;
extern bool g_KernelForProfiling;

extern const struct _cl_icd_dispatch* g_pNextDispatch;

Expand Down
6 changes: 6 additions & 0 deletions layers/10_cmdbufemu/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@

bool g_EnhancedErrorChecking = false;

// Using kernels for profiling can fix issues with some implementations
// that do not properly support event profiling on barrkers.

bool g_KernelForProfiling = false;

const struct _cl_icd_dispatch* g_pNextDispatch = NULL;

static cl_int CL_API_CALL
Expand Down Expand Up @@ -283,6 +288,7 @@ CL_API_ENTRY cl_int CL_API_CALL clInitLayer(
_init_dispatch();

getControl("CMDBUFEMU_EnhancedErrorChecking", g_EnhancedErrorChecking);
getControl("CMDBUFEMU_KernelForProfiling", g_KernelForProfiling);

g_pNextDispatch = target_dispatch;

Expand Down
Loading