Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 68 additions & 59 deletions src/runtime/contrib/clml/clml_runtime.cc
Original file line number Diff line number Diff line change
Expand Up @@ -140,15 +140,17 @@ class CLMLRuntime : public JSONRuntimeBase {
void InitCLML() {
// Setup CLML Context
cl_int result = 0;

workspace = cl::OpenCLWorkspace::Global();
workspace->Init();
tentry = workspace->GetThreadEntry();

if (!ExtensionStringPresent()) {
LOG(WARNING) << "CLML Runtime Init: Qualcomm extn not present.\n";
LOG(FATAL) << "CLML Runtime Init: Qualcomm extn not present.\n";
return;
}
device_id = workspace->GetCLDeviceID(tentry->device.device_id);
platform_id = workspace->device_to_platform[device_id];

// Query and Get CLML Interface
static const cl_uint MAX_VERSIONS = 256;
cl_int majorVersions[MAX_VERSIONS];
Expand Down Expand Up @@ -492,7 +494,7 @@ class CLMLRuntime : public JSONRuntimeBase {
JSONGraphNode node = it->second.second;
void* node_data = nullptr;

allocateTensorMemory(h_ClmlIntf, workspace->context, tensor_desc);
allocateTensorMemory(h_ClmlIntf, workspace->contexts[platform_id], tensor_desc);

if (node.GetOpType() == "const") {
node_data = data_entry_[EntryID(it->first, 0)]->data;
Expand Down Expand Up @@ -581,11 +583,9 @@ class CLMLRuntime : public JSONRuntimeBase {

bool ExtensionStringPresent(void) {
cl_int result = 0;
if (workspace->platform_id == nullptr) {
return 0;
}
size_t reqd_size = 0;
cl_device_id device_id = workspace->devices[workspace->GetThreadEntry()->device.device_id];
cl_device_id device_id =
workspace->GetCLDeviceID(workspace->GetThreadEntry()->device.device_id);
result = clGetDeviceInfo(device_id, CL_DEVICE_EXTENSIONS, 0, NULL, &reqd_size);
ICHECK(reqd_size > 0u && result == CL_SUCCESS) << "clGetDeviceInfo:" << result;

Expand All @@ -607,7 +607,8 @@ class CLMLRuntime : public JSONRuntimeBase {

cl_ml_tensor_desc_qcom desc = {
dtype, layout, dims.n, dims.c, dims.h, dims.w, 0, CL_TENSOR_DIMENSIONS_4D_QCOM, { 0 }};
result = h_ClmlIntf->clCreateMLTensorQCOM(workspace->context, NULL, &desc, &tensor);
result =
h_ClmlIntf->clCreateMLTensorQCOM(workspace->contexts[platform_id], NULL, &desc, &tensor);
ICHECK(tensor && result == CL_SUCCESS) << "clCreateMLTensorQCOM:" << result;
(void)result;
return tensor;
Expand All @@ -619,11 +620,12 @@ class CLMLRuntime : public JSONRuntimeBase {
cl_int result = CL_OUT_OF_HOST_MEMORY;
cl_mem buffer = NULL;

result =
h_ClmlIntf->clGetMLTensorMemorySizeQCOM(workspace->context, pTensorMemDesc->tensor, &size);
result = h_ClmlIntf->clGetMLTensorMemorySizeQCOM(workspace->contexts[platform_id],
pTensorMemDesc->tensor, &size);
ICHECK(result == CL_SUCCESS) << "clGetMLTensorMemorySizeQCOM:" << result;

buffer = clCreateBuffer(workspace->context, CL_MEM_READ_WRITE, size, NULL, &result);
buffer =
clCreateBuffer(workspace->contexts[platform_id], CL_MEM_READ_WRITE, size, NULL, &result);
ICHECK(result == CL_SUCCESS) << "clCreateBuffer:" << result;

pTensorMemDesc->memory = buffer;
Expand Down Expand Up @@ -686,7 +688,8 @@ class CLMLRuntime : public JSONRuntimeBase {
cl_channel_type cl_dtype = MakeCLDataType(tvm_dtype);

auto tensor_dsc = std::make_shared<cl_ml_tensor_memory_desc_qcom>();
tensor_dsc->tensor = DeviceMakeCLMLTensor(workspace->context, dims, layout, cl_dtype);
tensor_dsc->tensor =
DeviceMakeCLMLTensor(workspace->contexts[platform_id], dims, layout, cl_dtype);
return tensor_dsc;
}

Expand Down Expand Up @@ -800,8 +803,8 @@ class CLMLRuntime : public JSONRuntimeBase {
} else {
cl_ml_tensor_desc_qcom desc = {};
desc.num_dimensions = CL_TENSOR_UNUSED_QCOM;
result =
h_ClmlIntf->clCreateMLTensorQCOM(workspace->context, NULL, &desc, &layer_.unusedTensor);
result = h_ClmlIntf->clCreateMLTensorQCOM(workspace->contexts[platform_id], NULL, &desc,
&layer_.unusedTensor);
ICHECK(layer_.unusedTensor && result == CL_SUCCESS) << "clCreateMLTensorQCOM:" << result;
bias->tensor = layer_.unusedTensor;
}
Expand All @@ -821,13 +824,13 @@ class CLMLRuntime : public JSONRuntimeBase {
if (!has_bn) {
if (!has_act) {
result = h_ClmlIntf->clCreateMLOpConvolutionForwardQCOM(
workspace->context, 0, &conv_desc, input->tensor, weight->tensor, bias->tensor,
output->tensor, &op, NULL);
workspace->contexts[platform_id], 0, &conv_desc, input->tensor, weight->tensor,
bias->tensor, output->tensor, &op, NULL);
ICHECK(op && result == CL_SUCCESS) << "Convolution Error:" << result;
} else {
result = h_ClmlIntf->clCreateMLOpFusedConvolutionActivationForwardQCOM(
workspace->context, 0, &conv_desc, &act_desc, input->tensor, weight->tensor,
bias->tensor, NULL, output->tensor, &op, tuning_cache);
workspace->contexts[platform_id], 0, &conv_desc, &act_desc, input->tensor,
weight->tensor, bias->tensor, NULL, output->tensor, &op, tuning_cache);
ICHECK(op && result == CL_SUCCESS) << "Convolution Error:" << result;
}
layer_.func_ins.push_back(input);
Expand All @@ -854,15 +857,15 @@ class CLMLRuntime : public JSONRuntimeBase {
cl_ml_op_batchnorm_desc_qcom bn_desc = {CL_BATCHNORM_MODE_SPATIAL_QCOM, cl_arithmetic_mode};
if (!has_act) {
result = h_ClmlIntf->clCreateMLOpFusedConvolutionBatchNormForwardQCOM(
workspace->context, 0, &conv_desc, &bn_desc, input->tensor, weight->tensor,
bias->tensor, output->tensor, bn_mean->tensor, bn_var->tensor, bn_scale->tensor,
bn_bias->tensor, &op, tuning_cache);
workspace->contexts[platform_id], 0, &conv_desc, &bn_desc, input->tensor,
weight->tensor, bias->tensor, output->tensor, bn_mean->tensor, bn_var->tensor,
bn_scale->tensor, bn_bias->tensor, &op, tuning_cache);
ICHECK(op && result == CL_SUCCESS) << "Convolution Error:" << result;
} else {
result = h_ClmlIntf->clCreateMLOpFusedConvolutionBatchNormActivationForwardQCOM(
workspace->context, 0, &conv_desc, &bn_desc, &act_desc, input->tensor, weight->tensor,
bias->tensor, output->tensor, NULL, bn_mean->tensor, bn_var->tensor, bn_scale->tensor,
bn_bias->tensor, &op, tuning_cache);
workspace->contexts[platform_id], 0, &conv_desc, &bn_desc, &act_desc, input->tensor,
weight->tensor, bias->tensor, output->tensor, NULL, bn_mean->tensor, bn_var->tensor,
bn_scale->tensor, bn_bias->tensor, &op, tuning_cache);

ICHECK(op && result == CL_SUCCESS) << "Convolution Error:" << result;
}
Expand Down Expand Up @@ -895,13 +898,13 @@ class CLMLRuntime : public JSONRuntimeBase {

cl_ml_tensor_desc_qcom desc = {};
desc.num_dimensions = CL_TENSOR_UNUSED_QCOM;
result =
h_ClmlIntf->clCreateMLTensorQCOM(workspace->context, NULL, &desc, &layer_.unusedTensor);
result = h_ClmlIntf->clCreateMLTensorQCOM(workspace->contexts[platform_id], NULL, &desc,
&layer_.unusedTensor);
ICHECK(layer_.unusedTensor && result == CL_SUCCESS) << ":" << result;

result = h_ClmlIntf->clCreateMLOpActivationForwardQCOM(workspace->context, 0, &act_desc,
input->tensor, layer_.unusedTensor,
output->tensor, &op, tuning_cache);
result = h_ClmlIntf->clCreateMLOpActivationForwardQCOM(
workspace->contexts[platform_id], 0, &act_desc, input->tensor, layer_.unusedTensor,
output->tensor, &op, tuning_cache);
ICHECK(op && result == CL_SUCCESS) << "Activation Error:" << result;

layer_.func_ins.push_back(input);
Expand Down Expand Up @@ -947,8 +950,8 @@ class CLMLRuntime : public JSONRuntimeBase {
cl_ml_op_batchnorm_desc_qcom bn_desc = {CL_BATCHNORM_MODE_SPATIAL_QCOM, cl_arithmetic_mode};

result = h_ClmlIntf->clCreateMLOpBatchNormForwardQCOM(
workspace->context, 0, &bn_desc, input->tensor, bn_mean->tensor, bn_var->tensor,
bn_scale->tensor, bn_bias->tensor, output->tensor, &op, tuning_cache);
workspace->contexts[platform_id], 0, &bn_desc, input->tensor, bn_mean->tensor,
bn_var->tensor, bn_scale->tensor, bn_bias->tensor, output->tensor, &op, tuning_cache);
ICHECK(op && result == CL_SUCCESS) << "Batchnorm Error:" << result;

layer->function.push_back(op);
Expand Down Expand Up @@ -997,12 +1000,13 @@ class CLMLRuntime : public JSONRuntimeBase {
cl_ml_tensor_desc_qcom desc = {};
cl_ml_tensor_qcom unusedTensor = NULL;
desc.num_dimensions = CL_TENSOR_UNUSED_QCOM;
result = h_ClmlIntf->clCreateMLTensorQCOM(workspace->context, NULL, &desc, &unusedTensor);
result = h_ClmlIntf->clCreateMLTensorQCOM(workspace->contexts[platform_id], NULL, &desc,
&unusedTensor);
ICHECK(unusedTensor && result == CL_SUCCESS) << ":" << result;

result =
h_ClmlIntf->clCreateMLOpPoolingForwardQCOM(workspace->context, 0, &pool_desc, input->tensor,
unusedTensor, output->tensor, &op, tuning_cache);
result = h_ClmlIntf->clCreateMLOpPoolingForwardQCOM(workspace->contexts[platform_id], 0,
&pool_desc, input->tensor, unusedTensor,
output->tensor, &op, tuning_cache);
ICHECK(op && result == CL_SUCCESS) << "Pooling Error:" << result;

layer_.func_ins.push_back(input);
Expand Down Expand Up @@ -1043,13 +1047,13 @@ class CLMLRuntime : public JSONRuntimeBase {

cl_ml_tensor_desc_qcom desc = {};
desc.num_dimensions = CL_TENSOR_UNUSED_QCOM;
result =
h_ClmlIntf->clCreateMLTensorQCOM(workspace->context, NULL, &desc, &layer_.unusedTensor);
result = h_ClmlIntf->clCreateMLTensorQCOM(workspace->contexts[platform_id], NULL, &desc,
&layer_.unusedTensor);
ICHECK(layer_.unusedTensor && result == CL_SUCCESS) << ":" << result;

result = h_ClmlIntf->clCreateMLOpPoolingForwardQCOM(workspace->context, 0, &pool_desc,
input->tensor, layer_.unusedTensor,
output->tensor, &op, tuning_cache);
result = h_ClmlIntf->clCreateMLOpPoolingForwardQCOM(
workspace->contexts[platform_id], 0, &pool_desc, input->tensor, layer_.unusedTensor,
output->tensor, &op, tuning_cache);
ICHECK(op && result == CL_SUCCESS) << "Pooling Error:" << result;

layer_.func_ins.push_back(input);
Expand Down Expand Up @@ -1079,7 +1083,7 @@ class CLMLRuntime : public JSONRuntimeBase {
cl_ml_op_softmax_desc_qcom softmax_desc = {CL_SOFTMAX_ALGORITHM_ACCURATE_QCOM,
CL_SOFTMAX_MODE_INSTANCE_QCOM, cl_arithmetic_mode};

result = h_ClmlIntf->clCreateMLOpSoftmaxQCOM(workspace->context, 0, &softmax_desc,
result = h_ClmlIntf->clCreateMLOpSoftmaxQCOM(workspace->contexts[platform_id], 0, &softmax_desc,
input->tensor, output->tensor, &op, tuning_cache);
ICHECK(op && result == CL_SUCCESS) << "SoftMax Error:" << result;

Expand Down Expand Up @@ -1125,8 +1129,8 @@ class CLMLRuntime : public JSONRuntimeBase {
{clml_padding[0], clml_padding[1], clml_padding[2], clml_padding[3], 0, 0, 0, 0},
cl_arithmetic_mode};

result = h_ClmlIntf->clCreateMLOpPadQCOM(workspace->context, 0, &pad_desc, input->tensor,
output->tensor, &op, tuning_cache);
result = h_ClmlIntf->clCreateMLOpPadQCOM(workspace->contexts[platform_id], 0, &pad_desc,
input->tensor, output->tensor, &op, tuning_cache);
ICHECK(op && result == CL_SUCCESS) << "Pad Error:" << result;

layer_.func_ins.push_back(input);
Expand All @@ -1150,7 +1154,7 @@ class CLMLRuntime : public JSONRuntimeBase {
cl_dtype);
auto output = MakeCLMLTensorFromJSONNode(node, CL_TENSOR_LAYOUT_OPTIMAL_QCOM, cl_dtype);

result = h_ClmlIntf->clCreateMLOpReshapeQCOM(workspace->context, 0, input->tensor,
result = h_ClmlIntf->clCreateMLOpReshapeQCOM(workspace->contexts[platform_id], 0, input->tensor,
output->tensor, &op, tuning_cache);
ICHECK(op && result == CL_SUCCESS) << "Reshape Error:" << result;

Expand All @@ -1175,7 +1179,7 @@ class CLMLRuntime : public JSONRuntimeBase {
cl_dtype);
auto output = MakeCLMLTensorFromJSONNode(node, CL_TENSOR_LAYOUT_OPTIMAL_QCOM, cl_dtype);

result = h_ClmlIntf->clCreateMLOpReshapeQCOM(workspace->context, 0, input->tensor,
result = h_ClmlIntf->clCreateMLOpReshapeQCOM(workspace->contexts[platform_id], 0, input->tensor,
output->tensor, &op, tuning_cache);
ICHECK(op && result == CL_SUCCESS) << "Reshape Error:" << result;

Expand Down Expand Up @@ -1210,8 +1214,8 @@ class CLMLRuntime : public JSONRuntimeBase {
}
cl_ml_op_concat_desc_qcom concatDesc = {axis, (cl_uint)inputSize, cl_arithmetic_mode};

result = h_ClmlIntf->clCreateMLOpConcatQCOM(workspace->context, 0, &concatDesc, concatInputs,
output->tensor, &op, tuning_cache);
result = h_ClmlIntf->clCreateMLOpConcatQCOM(workspace->contexts[platform_id], 0, &concatDesc,
concatInputs, output->tensor, &op, tuning_cache);
ICHECK(op && result == CL_SUCCESS) << "Concat Error:" << result;

layer->function.push_back(op);
Expand Down Expand Up @@ -1250,8 +1254,8 @@ class CLMLRuntime : public JSONRuntimeBase {
} else {
cl_ml_tensor_desc_qcom desc = {};
desc.num_dimensions = CL_TENSOR_UNUSED_QCOM;
result =
h_ClmlIntf->clCreateMLTensorQCOM(workspace->context, NULL, &desc, &layer_.unusedTensor);
result = h_ClmlIntf->clCreateMLTensorQCOM(workspace->contexts[platform_id], NULL, &desc,
&layer_.unusedTensor);
ICHECK(layer_.unusedTensor && result == CL_SUCCESS) << "clCreateMLTensorQCOM:" << result;
bias->tensor = layer_.unusedTensor;
}
Expand All @@ -1269,8 +1273,8 @@ class CLMLRuntime : public JSONRuntimeBase {
cl_arithmetic_mode};

result = h_ClmlIntf->clCreateMLOpConvolutionForwardQCOM(
workspace->context, 0, &conv_desc, input->tensor, weight->tensor, bias->tensor,
output->tensor, &op, NULL);
workspace->contexts[platform_id], 0, &conv_desc, input->tensor, weight->tensor,
bias->tensor, output->tensor, &op, NULL);
ICHECK(op && result == CL_SUCCESS) << "Fully Connected Error:" << result;

layer->function.push_back(op);
Expand Down Expand Up @@ -1300,8 +1304,8 @@ class CLMLRuntime : public JSONRuntimeBase {
cl_ml_op_clip_desc_qcom clip_desc = {
CL_CLIP_BY_VALUE_QCOM, {{a_max}, CL_FLOAT}, {{a_min}, CL_FLOAT}, cl_arithmetic_mode};

result = h_ClmlIntf->clCreateMLOpClipQCOM(workspace->context, 0, &clip_desc, input->tensor,
output->tensor, &op, tuning_cache);
result = h_ClmlIntf->clCreateMLOpClipQCOM(workspace->contexts[platform_id], 0, &clip_desc,
input->tensor, output->tensor, &op, tuning_cache);
ICHECK(op && result == CL_SUCCESS) << "Clip Error:" << result;

layer_.func_ins.push_back(input);
Expand Down Expand Up @@ -1342,8 +1346,9 @@ class CLMLRuntime : public JSONRuntimeBase {
cl_ml_op_binary_desc_qcom add_desc = {
binary_op, {{1.0}, CL_FLOAT}, {{1.0}, CL_FLOAT}, {{0.0}, CL_FLOAT}, cl_arithmetic_mode};

result = h_ClmlIntf->clCreateMLOpBinaryQCOM(workspace->context, 0, &add_desc, input_a->tensor,
input_b->tensor, output->tensor, &op, tuning_cache);
result = h_ClmlIntf->clCreateMLOpBinaryQCOM(workspace->contexts[platform_id], 0, &add_desc,
input_a->tensor, input_b->tensor, output->tensor,
&op, tuning_cache);
ICHECK(op && result == CL_SUCCESS) << op_name << " Node Error:" << result;

layer_.func_ins.push_back(input_a);
Expand Down Expand Up @@ -1371,8 +1376,9 @@ class CLMLRuntime : public JSONRuntimeBase {
cl_uint block_size = std::stoi(node.GetAttr<std::vector<std::string>>("block_size")[0]);

cl_ml_op_depthtospace_desc_qcom dtos_desc = {block_size, cl_arithmetic_mode};
result = h_ClmlIntf->clCreateMLOpDepthToSpaceQCOM(
workspace->context, 0, &dtos_desc, input->tensor, output->tensor, &op, tuning_cache);
result =
h_ClmlIntf->clCreateMLOpDepthToSpaceQCOM(workspace->contexts[platform_id], 0, &dtos_desc,
input->tensor, output->tensor, &op, tuning_cache);
ICHECK(op && result == CL_SUCCESS) << "DepthToSpace Layer Error:" << result;

layer_.func_ins.push_back(input);
Expand All @@ -1399,8 +1405,9 @@ class CLMLRuntime : public JSONRuntimeBase {
cl_bool align_corners = std::stoi(node.GetAttr<std::vector<std::string>>("align_corners")[0]);

cl_ml_op_resize_bilinear_desc_qcom resize_desc = {align_corners, false, cl_arithmetic_mode};
result = h_ClmlIntf->clCreateMLOpResizeBilinearQCOM(
workspace->context, 0, &resize_desc, input->tensor, output->tensor, &op, tuning_cache);
result = h_ClmlIntf->clCreateMLOpResizeBilinearQCOM(workspace->contexts[platform_id], 0,
&resize_desc, input->tensor, output->tensor,
&op, tuning_cache);
ICHECK(op && result == CL_SUCCESS) << "Resize Layer Error:" << result;

layer_.func_ins.push_back(input);
Expand All @@ -1418,6 +1425,8 @@ class CLMLRuntime : public JSONRuntimeBase {
GET_ML_API_INTERFACE* h_ClmlIntf = NULL;
cl::OpenCLWorkspace* workspace = NULL;
cl::OpenCLThreadEntry* tentry = NULL;
cl_device_id device_id;
cl_platform_id platform_id;
cl_ml_tuningcache_qcom tuning_cache = NULL;
bool is_tuning_run;
char* tuning_file;
Expand Down
2 changes: 2 additions & 0 deletions src/runtime/opencl/opencl_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,8 @@ class OpenCLWorkspace : public DeviceAPI {

return prop & CL_QUEUE_PROFILING_ENABLE;
}
// Check if the device is present or not
bool IsDeviceExists(unsigned int device_id) { return device_id < devices.size(); }
// Enable queue profiling, recreate if required
void EnableQueueProfiling(Device dev, bool enable) {
bool is_enabled = cl::OpenCLWorkspace::Global()->IsProfiling(dev);
Expand Down