apache · echuraev · Feb 16, 2023 · Feb 13, 2023 · Feb 14, 2023 · Feb 16, 2023
diff --git a/src/runtime/contrib/clml/clml_runtime.cc b/src/runtime/contrib/clml/clml_runtime.cc
@@ -140,15 +140,17 @@ class CLMLRuntime : public JSONRuntimeBase {
   void InitCLML() {
     // Setup CLML Context
     cl_int result = 0;
-
     workspace = cl::OpenCLWorkspace::Global();
     workspace->Init();
     tentry = workspace->GetThreadEntry();
 
     if (!ExtensionStringPresent()) {
-      LOG(WARNING) << "CLML Runtime Init: Qualcomm extn not present.\n";
+      LOG(FATAL) << "CLML Runtime Init: Qualcomm extn not present.\n";
       return;
     }
+    device_id = workspace->GetCLDeviceID(tentry->device.device_id);
+    platform_id = workspace->device_to_platform[device_id];
+
     // Query and Get CLML Interface
     static const cl_uint MAX_VERSIONS = 256;
     cl_int majorVersions[MAX_VERSIONS];
@@ -492,7 +494,7 @@ class CLMLRuntime : public JSONRuntimeBase {
       JSONGraphNode node = it->second.second;
       void* node_data = nullptr;
 
-      allocateTensorMemory(h_ClmlIntf, workspace->context, tensor_desc);
+      allocateTensorMemory(h_ClmlIntf, workspace->contexts[platform_id], tensor_desc);
 
       if (node.GetOpType() == "const") {
         node_data = data_entry_[EntryID(it->first, 0)]->data;
@@ -581,11 +583,9 @@ class CLMLRuntime : public JSONRuntimeBase {
 
   bool ExtensionStringPresent(void) {
     cl_int result = 0;
-    if (workspace->platform_id == nullptr) {
-      return 0;
-    }
     size_t reqd_size = 0;
-    cl_device_id device_id = workspace->devices[workspace->GetThreadEntry()->device.device_id];
+    cl_device_id device_id =
+        workspace->GetCLDeviceID(workspace->GetThreadEntry()->device.device_id);
     result = clGetDeviceInfo(device_id, CL_DEVICE_EXTENSIONS, 0, NULL, &reqd_size);
     ICHECK(reqd_size > 0u && result == CL_SUCCESS) << "clGetDeviceInfo:" << result;
 
@@ -607,7 +607,8 @@ class CLMLRuntime : public JSONRuntimeBase {
 
     cl_ml_tensor_desc_qcom desc = {
         dtype, layout, dims.n, dims.c, dims.h, dims.w, 0, CL_TENSOR_DIMENSIONS_4D_QCOM, { 0 }};
-    result = h_ClmlIntf->clCreateMLTensorQCOM(workspace->context, NULL, &desc, &tensor);
+    result =
+        h_ClmlIntf->clCreateMLTensorQCOM(workspace->contexts[platform_id], NULL, &desc, &tensor);
     ICHECK(tensor && result == CL_SUCCESS) << "clCreateMLTensorQCOM:" << result;
     (void)result;
     return tensor;
@@ -619,11 +620,12 @@ class CLMLRuntime : public JSONRuntimeBase {
     cl_int result = CL_OUT_OF_HOST_MEMORY;
     cl_mem buffer = NULL;
 
-    result =
-        h_ClmlIntf->clGetMLTensorMemorySizeQCOM(workspace->context, pTensorMemDesc->tensor, &size);
+    result = h_ClmlIntf->clGetMLTensorMemorySizeQCOM(workspace->contexts[platform_id],
+                                                     pTensorMemDesc->tensor, &size);
     ICHECK(result == CL_SUCCESS) << "clGetMLTensorMemorySizeQCOM:" << result;
 
-    buffer = clCreateBuffer(workspace->context, CL_MEM_READ_WRITE, size, NULL, &result);
+    buffer =
+        clCreateBuffer(workspace->contexts[platform_id], CL_MEM_READ_WRITE, size, NULL, &result);
     ICHECK(result == CL_SUCCESS) << "clCreateBuffer:" << result;
 
     pTensorMemDesc->memory = buffer;
@@ -686,7 +688,8 @@ class CLMLRuntime : public JSONRuntimeBase {
     cl_channel_type cl_dtype = MakeCLDataType(tvm_dtype);
 
     auto tensor_dsc = std::make_shared<cl_ml_tensor_memory_desc_qcom>();
-    tensor_dsc->tensor = DeviceMakeCLMLTensor(workspace->context, dims, layout, cl_dtype);
+    tensor_dsc->tensor =
+        DeviceMakeCLMLTensor(workspace->contexts[platform_id], dims, layout, cl_dtype);
     return tensor_dsc;
   }
 
@@ -800,8 +803,8 @@ class CLMLRuntime : public JSONRuntimeBase {
     } else {
       cl_ml_tensor_desc_qcom desc = {};
       desc.num_dimensions = CL_TENSOR_UNUSED_QCOM;
-      result =
-          h_ClmlIntf->clCreateMLTensorQCOM(workspace->context, NULL, &desc, &layer_.unusedTensor);
+      result = h_ClmlIntf->clCreateMLTensorQCOM(workspace->contexts[platform_id], NULL, &desc,
+                                                &layer_.unusedTensor);
       ICHECK(layer_.unusedTensor && result == CL_SUCCESS) << "clCreateMLTensorQCOM:" << result;
       bias->tensor = layer_.unusedTensor;
     }
@@ -821,13 +824,13 @@ class CLMLRuntime : public JSONRuntimeBase {
     if (!has_bn) {
       if (!has_act) {
         result = h_ClmlIntf->clCreateMLOpConvolutionForwardQCOM(
-            workspace->context, 0, &conv_desc, input->tensor, weight->tensor, bias->tensor,
-            output->tensor, &op, NULL);
+            workspace->contexts[platform_id], 0, &conv_desc, input->tensor, weight->tensor,
+            bias->tensor, output->tensor, &op, NULL);
         ICHECK(op && result == CL_SUCCESS) << "Convolution Error:" << result;
       } else {
         result = h_ClmlIntf->clCreateMLOpFusedConvolutionActivationForwardQCOM(
-            workspace->context, 0, &conv_desc, &act_desc, input->tensor, weight->tensor,
-            bias->tensor, NULL, output->tensor, &op, tuning_cache);
+            workspace->contexts[platform_id], 0, &conv_desc, &act_desc, input->tensor,
+            weight->tensor, bias->tensor, NULL, output->tensor, &op, tuning_cache);
         ICHECK(op && result == CL_SUCCESS) << "Convolution Error:" << result;
       }
       layer_.func_ins.push_back(input);
@@ -854,15 +857,15 @@ class CLMLRuntime : public JSONRuntimeBase {
       cl_ml_op_batchnorm_desc_qcom bn_desc = {CL_BATCHNORM_MODE_SPATIAL_QCOM, cl_arithmetic_mode};
       if (!has_act) {
         result = h_ClmlIntf->clCreateMLOpFusedConvolutionBatchNormForwardQCOM(
-            workspace->context, 0, &conv_desc, &bn_desc, input->tensor, weight->tensor,
-            bias->tensor, output->tensor, bn_mean->tensor, bn_var->tensor, bn_scale->tensor,
-            bn_bias->tensor, &op, tuning_cache);
+            workspace->contexts[platform_id], 0, &conv_desc, &bn_desc, input->tensor,
+            weight->tensor, bias->tensor, output->tensor, bn_mean->tensor, bn_var->tensor,
+            bn_scale->tensor, bn_bias->tensor, &op, tuning_cache);
         ICHECK(op && result == CL_SUCCESS) << "Convolution Error:" << result;
       } else {
         result = h_ClmlIntf->clCreateMLOpFusedConvolutionBatchNormActivationForwardQCOM(
-            workspace->context, 0, &conv_desc, &bn_desc, &act_desc, input->tensor, weight->tensor,
-            bias->tensor, output->tensor, NULL, bn_mean->tensor, bn_var->tensor, bn_scale->tensor,
-            bn_bias->tensor, &op, tuning_cache);
+            workspace->contexts[platform_id], 0, &conv_desc, &bn_desc, &act_desc, input->tensor,
+            weight->tensor, bias->tensor, output->tensor, NULL, bn_mean->tensor, bn_var->tensor,
+            bn_scale->tensor, bn_bias->tensor, &op, tuning_cache);
 
         ICHECK(op && result == CL_SUCCESS) << "Convolution Error:" << result;
       }
@@ -895,13 +898,13 @@ class CLMLRuntime : public JSONRuntimeBase {
 
     cl_ml_tensor_desc_qcom desc = {};
     desc.num_dimensions = CL_TENSOR_UNUSED_QCOM;
-    result =
-        h_ClmlIntf->clCreateMLTensorQCOM(workspace->context, NULL, &desc, &layer_.unusedTensor);
+    result = h_ClmlIntf->clCreateMLTensorQCOM(workspace->contexts[platform_id], NULL, &desc,
+                                              &layer_.unusedTensor);
     ICHECK(layer_.unusedTensor && result == CL_SUCCESS) << ":" << result;
 
-    result = h_ClmlIntf->clCreateMLOpActivationForwardQCOM(workspace->context, 0, &act_desc,
-                                                           input->tensor, layer_.unusedTensor,
-                                                           output->tensor, &op, tuning_cache);
+    result = h_ClmlIntf->clCreateMLOpActivationForwardQCOM(
+        workspace->contexts[platform_id], 0, &act_desc, input->tensor, layer_.unusedTensor,
+        output->tensor, &op, tuning_cache);
     ICHECK(op && result == CL_SUCCESS) << "Activation Error:" << result;
 
     layer_.func_ins.push_back(input);
@@ -947,8 +950,8 @@ class CLMLRuntime : public JSONRuntimeBase {
     cl_ml_op_batchnorm_desc_qcom bn_desc = {CL_BATCHNORM_MODE_SPATIAL_QCOM, cl_arithmetic_mode};
 
     result = h_ClmlIntf->clCreateMLOpBatchNormForwardQCOM(
-        workspace->context, 0, &bn_desc, input->tensor, bn_mean->tensor, bn_var->tensor,
-        bn_scale->tensor, bn_bias->tensor, output->tensor, &op, tuning_cache);
+        workspace->contexts[platform_id], 0, &bn_desc, input->tensor, bn_mean->tensor,
+        bn_var->tensor, bn_scale->tensor, bn_bias->tensor, output->tensor, &op, tuning_cache);
     ICHECK(op && result == CL_SUCCESS) << "Batchnorm Error:" << result;
 
     layer->function.push_back(op);
@@ -997,12 +1000,13 @@ class CLMLRuntime : public JSONRuntimeBase {
     cl_ml_tensor_desc_qcom desc = {};
     cl_ml_tensor_qcom unusedTensor = NULL;
     desc.num_dimensions = CL_TENSOR_UNUSED_QCOM;
-    result = h_ClmlIntf->clCreateMLTensorQCOM(workspace->context, NULL, &desc, &unusedTensor);
+    result = h_ClmlIntf->clCreateMLTensorQCOM(workspace->contexts[platform_id], NULL, &desc,
+                                              &unusedTensor);
     ICHECK(unusedTensor && result == CL_SUCCESS) << ":" << result;
 
-    result =
-        h_ClmlIntf->clCreateMLOpPoolingForwardQCOM(workspace->context, 0, &pool_desc, input->tensor,
-                                                   unusedTensor, output->tensor, &op, tuning_cache);
+    result = h_ClmlIntf->clCreateMLOpPoolingForwardQCOM(workspace->contexts[platform_id], 0,
+                                                        &pool_desc, input->tensor, unusedTensor,
+                                                        output->tensor, &op, tuning_cache);
     ICHECK(op && result == CL_SUCCESS) << "Pooling Error:" << result;
 
     layer_.func_ins.push_back(input);
@@ -1043,13 +1047,13 @@ class CLMLRuntime : public JSONRuntimeBase {
 
     cl_ml_tensor_desc_qcom desc = {};
     desc.num_dimensions = CL_TENSOR_UNUSED_QCOM;
-    result =
-        h_ClmlIntf->clCreateMLTensorQCOM(workspace->context, NULL, &desc, &layer_.unusedTensor);
+    result = h_ClmlIntf->clCreateMLTensorQCOM(workspace->contexts[platform_id], NULL, &desc,
+                                              &layer_.unusedTensor);
     ICHECK(layer_.unusedTensor && result == CL_SUCCESS) << ":" << result;
 
-    result = h_ClmlIntf->clCreateMLOpPoolingForwardQCOM(workspace->context, 0, &pool_desc,
-                                                        input->tensor, layer_.unusedTensor,
-                                                        output->tensor, &op, tuning_cache);
+    result = h_ClmlIntf->clCreateMLOpPoolingForwardQCOM(
+        workspace->contexts[platform_id], 0, &pool_desc, input->tensor, layer_.unusedTensor,
+        output->tensor, &op, tuning_cache);
     ICHECK(op && result == CL_SUCCESS) << "Pooling Error:" << result;
 
     layer_.func_ins.push_back(input);
@@ -1079,7 +1083,7 @@ class CLMLRuntime : public JSONRuntimeBase {
     cl_ml_op_softmax_desc_qcom softmax_desc = {CL_SOFTMAX_ALGORITHM_ACCURATE_QCOM,
                                                CL_SOFTMAX_MODE_INSTANCE_QCOM, cl_arithmetic_mode};
 
-    result = h_ClmlIntf->clCreateMLOpSoftmaxQCOM(workspace->context, 0, &softmax_desc,
+    result = h_ClmlIntf->clCreateMLOpSoftmaxQCOM(workspace->contexts[platform_id], 0, &softmax_desc,
                                                  input->tensor, output->tensor, &op, tuning_cache);
     ICHECK(op && result == CL_SUCCESS) << "SoftMax Error:" << result;
 
@@ -1125,8 +1129,8 @@ class CLMLRuntime : public JSONRuntimeBase {
         {clml_padding[0], clml_padding[1], clml_padding[2], clml_padding[3], 0, 0, 0, 0},
         cl_arithmetic_mode};
 
-    result = h_ClmlIntf->clCreateMLOpPadQCOM(workspace->context, 0, &pad_desc, input->tensor,
-                                             output->tensor, &op, tuning_cache);
+    result = h_ClmlIntf->clCreateMLOpPadQCOM(workspace->contexts[platform_id], 0, &pad_desc,
+                                             input->tensor, output->tensor, &op, tuning_cache);
     ICHECK(op && result == CL_SUCCESS) << "Pad Error:" << result;
 
     layer_.func_ins.push_back(input);
@@ -1150,7 +1154,7 @@ class CLMLRuntime : public JSONRuntimeBase {
                                              cl_dtype);
     auto output = MakeCLMLTensorFromJSONNode(node, CL_TENSOR_LAYOUT_OPTIMAL_QCOM, cl_dtype);
 
-    result = h_ClmlIntf->clCreateMLOpReshapeQCOM(workspace->context, 0, input->tensor,
+    result = h_ClmlIntf->clCreateMLOpReshapeQCOM(workspace->contexts[platform_id], 0, input->tensor,
                                                  output->tensor, &op, tuning_cache);
     ICHECK(op && result == CL_SUCCESS) << "Reshape Error:" << result;
 
@@ -1175,7 +1179,7 @@ class CLMLRuntime : public JSONRuntimeBase {
                                              cl_dtype);
     auto output = MakeCLMLTensorFromJSONNode(node, CL_TENSOR_LAYOUT_OPTIMAL_QCOM, cl_dtype);
 
-    result = h_ClmlIntf->clCreateMLOpReshapeQCOM(workspace->context, 0, input->tensor,
+    result = h_ClmlIntf->clCreateMLOpReshapeQCOM(workspace->contexts[platform_id], 0, input->tensor,
                                                  output->tensor, &op, tuning_cache);
     ICHECK(op && result == CL_SUCCESS) << "Reshape Error:" << result;
 
@@ -1210,8 +1214,8 @@ class CLMLRuntime : public JSONRuntimeBase {
     }
     cl_ml_op_concat_desc_qcom concatDesc = {axis, (cl_uint)inputSize, cl_arithmetic_mode};
 
-    result = h_ClmlIntf->clCreateMLOpConcatQCOM(workspace->context, 0, &concatDesc, concatInputs,
-                                                output->tensor, &op, tuning_cache);
+    result = h_ClmlIntf->clCreateMLOpConcatQCOM(workspace->contexts[platform_id], 0, &concatDesc,
+                                                concatInputs, output->tensor, &op, tuning_cache);
     ICHECK(op && result == CL_SUCCESS) << "Concat Error:" << result;
 
     layer->function.push_back(op);
@@ -1250,8 +1254,8 @@ class CLMLRuntime : public JSONRuntimeBase {
     } else {
       cl_ml_tensor_desc_qcom desc = {};
       desc.num_dimensions = CL_TENSOR_UNUSED_QCOM;
-      result =
-          h_ClmlIntf->clCreateMLTensorQCOM(workspace->context, NULL, &desc, &layer_.unusedTensor);
+      result = h_ClmlIntf->clCreateMLTensorQCOM(workspace->contexts[platform_id], NULL, &desc,
+                                                &layer_.unusedTensor);
       ICHECK(layer_.unusedTensor && result == CL_SUCCESS) << "clCreateMLTensorQCOM:" << result;
       bias->tensor = layer_.unusedTensor;
     }
@@ -1269,8 +1273,8 @@ class CLMLRuntime : public JSONRuntimeBase {
                                                 cl_arithmetic_mode};
 
     result = h_ClmlIntf->clCreateMLOpConvolutionForwardQCOM(
-        workspace->context, 0, &conv_desc, input->tensor, weight->tensor, bias->tensor,
-        output->tensor, &op, NULL);
+        workspace->contexts[platform_id], 0, &conv_desc, input->tensor, weight->tensor,
+        bias->tensor, output->tensor, &op, NULL);
     ICHECK(op && result == CL_SUCCESS) << "Fully Connected Error:" << result;
 
     layer->function.push_back(op);
@@ -1300,8 +1304,8 @@ class CLMLRuntime : public JSONRuntimeBase {
     cl_ml_op_clip_desc_qcom clip_desc = {
         CL_CLIP_BY_VALUE_QCOM, {{a_max}, CL_FLOAT}, {{a_min}, CL_FLOAT}, cl_arithmetic_mode};
 
-    result = h_ClmlIntf->clCreateMLOpClipQCOM(workspace->context, 0, &clip_desc, input->tensor,
-                                              output->tensor, &op, tuning_cache);
+    result = h_ClmlIntf->clCreateMLOpClipQCOM(workspace->contexts[platform_id], 0, &clip_desc,
+                                              input->tensor, output->tensor, &op, tuning_cache);
     ICHECK(op && result == CL_SUCCESS) << "Clip Error:" << result;
 
     layer_.func_ins.push_back(input);
@@ -1342,8 +1346,9 @@ class CLMLRuntime : public JSONRuntimeBase {
     cl_ml_op_binary_desc_qcom add_desc = {
         binary_op, {{1.0}, CL_FLOAT}, {{1.0}, CL_FLOAT}, {{0.0}, CL_FLOAT}, cl_arithmetic_mode};
 
-    result = h_ClmlIntf->clCreateMLOpBinaryQCOM(workspace->context, 0, &add_desc, input_a->tensor,
-                                                input_b->tensor, output->tensor, &op, tuning_cache);
+    result = h_ClmlIntf->clCreateMLOpBinaryQCOM(workspace->contexts[platform_id], 0, &add_desc,
+                                                input_a->tensor, input_b->tensor, output->tensor,
+                                                &op, tuning_cache);
     ICHECK(op && result == CL_SUCCESS) << op_name << " Node Error:" << result;
 
     layer_.func_ins.push_back(input_a);
@@ -1371,8 +1376,9 @@ class CLMLRuntime : public JSONRuntimeBase {
     cl_uint block_size = std::stoi(node.GetAttr<std::vector<std::string>>("block_size")[0]);
 
     cl_ml_op_depthtospace_desc_qcom dtos_desc = {block_size, cl_arithmetic_mode};
-    result = h_ClmlIntf->clCreateMLOpDepthToSpaceQCOM(
-        workspace->context, 0, &dtos_desc, input->tensor, output->tensor, &op, tuning_cache);
+    result =
+        h_ClmlIntf->clCreateMLOpDepthToSpaceQCOM(workspace->contexts[platform_id], 0, &dtos_desc,
+                                                 input->tensor, output->tensor, &op, tuning_cache);
     ICHECK(op && result == CL_SUCCESS) << "DepthToSpace Layer Error:" << result;
 
     layer_.func_ins.push_back(input);
@@ -1399,8 +1405,9 @@ class CLMLRuntime : public JSONRuntimeBase {
     cl_bool align_corners = std::stoi(node.GetAttr<std::vector<std::string>>("align_corners")[0]);
 
     cl_ml_op_resize_bilinear_desc_qcom resize_desc = {align_corners, false, cl_arithmetic_mode};
-    result = h_ClmlIntf->clCreateMLOpResizeBilinearQCOM(
-        workspace->context, 0, &resize_desc, input->tensor, output->tensor, &op, tuning_cache);
+    result = h_ClmlIntf->clCreateMLOpResizeBilinearQCOM(workspace->contexts[platform_id], 0,
+                                                        &resize_desc, input->tensor, output->tensor,
+                                                        &op, tuning_cache);
     ICHECK(op && result == CL_SUCCESS) << "Resize Layer Error:" << result;
 
     layer_.func_ins.push_back(input);
@@ -1418,6 +1425,8 @@ class CLMLRuntime : public JSONRuntimeBase {
   GET_ML_API_INTERFACE* h_ClmlIntf = NULL;
   cl::OpenCLWorkspace* workspace = NULL;
   cl::OpenCLThreadEntry* tentry = NULL;
+  cl_device_id device_id;
+  cl_platform_id platform_id;
   cl_ml_tuningcache_qcom tuning_cache = NULL;
   bool is_tuning_run;
   char* tuning_file;

diff --git a/src/runtime/opencl/opencl_common.h b/src/runtime/opencl/opencl_common.h
@@ -282,6 +282,8 @@ class OpenCLWorkspace : public DeviceAPI {
 
     return prop & CL_QUEUE_PROFILING_ENABLE;
   }
+  // Check if the device is present or not
+  bool IsDeviceExists(unsigned int device_id) { return device_id < devices.size(); }
   // Enable queue profiling, recreate if required
   void EnableQueueProfiling(Device dev, bool enable) {
     bool is_enabled = cl::OpenCLWorkspace::Global()->IsProfiling(dev);