diff --git a/backends/vulkan/runtime/graph/ops/PrepackNode.cpp b/backends/vulkan/runtime/graph/ops/PrepackNode.cpp index f1ea1df7ff9..e0a75cde814 100644 --- a/backends/vulkan/runtime/graph/ops/PrepackNode.cpp +++ b/backends/vulkan/runtime/graph/ops/PrepackNode.cpp @@ -17,11 +17,10 @@ namespace vkcompute { api::ShaderInfo get_noop_shader(ComputeGraph& graph, const ValueRef packed) { - std::stringstream noop_shader_name; - noop_shader_name << "no_op"; - apply_ndim_suffix(noop_shader_name, graph.get_val(packed).toTensor()); - apply_dtype_suffix(noop_shader_name, graph.get_val(packed).toTensor()); - return VK_KERNEL_FROM_STR(noop_shader_name.str()); + std::string noop_shader_name("no_op"); + add_ndim_suffix(noop_shader_name, graph.get_val(packed).toTensor()); + add_dtype_suffix(noop_shader_name, graph.get_val(packed).toTensor()); + return VK_KERNEL_FROM_STR(noop_shader_name); } PrepackNode::PrepackNode( diff --git a/backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp b/backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp index 28988433974..8f23286ff58 100644 --- a/backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp @@ -75,14 +75,15 @@ void add_binary_op_node( const api::utils::ivec2 broadcast_params = create_broadcast_params(t_in1, t_in2); - std::stringstream kernel_name; - kernel_name << "binary_" << op_name; - apply_memory_layout_suffix(kernel_name, t_out); - apply_dtype_suffix(kernel_name, t_out); + std::string kernel_name("binary_"); + kernel_name.reserve(kShaderNameReserve); + kernel_name += op_name; + add_memory_layout_suffix(kernel_name, t_out); + add_dtype_suffix(kernel_name, t_out); graph.execute_nodes().emplace_back(new ExecuteNode( graph, - VK_KERNEL_FROM_STR(kernel_name.str()), + VK_KERNEL_FROM_STR(kernel_name), global_size, local_size, // Inputs and Outputs diff --git a/backends/vulkan/runtime/graph/ops/impl/Conv2d.cpp b/backends/vulkan/runtime/graph/ops/impl/Conv2d.cpp index 5b83e7d31fa..2ca2749baed 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Conv2d.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Conv2d.cpp @@ -90,40 +90,41 @@ api::ShaderInfo get_conv2d_shader( const bool prepack_weights, const Conv2dMethod method, const ValueRef weight) { - std::stringstream kernel_name; + std::string kernel_name; + kernel_name.reserve(kShaderNameReserve); switch (method) { case Conv2dMethod::Depthwise: - kernel_name << "conv2d_dw"; + kernel_name = "conv2d_dw"; if (!prepack_weights) { const auto& weight_sizes = graph.get_val(weight).toTensorRef().sizes; if (weight_sizes.at(2) == 3 && weight_sizes.at(3) == 3) { - kernel_name << "_output_tile_3x3"; + kernel_name += "_output_tile_3x3"; } if (weight_sizes.at(2) == 5 && weight_sizes.at(3) == 5) { - kernel_name << "_output_tile_5x5"; + kernel_name += "_output_tile_5x5"; } } break; case Conv2dMethod::Pointwise: if (prepack_weights) { - kernel_name << "conv2d"; + kernel_name = "conv2d"; } else { - kernel_name << "conv2d_pw"; + kernel_name = "conv2d_pw"; } break; case Conv2dMethod::SlidingWindow: - kernel_name << "conv2d"; + kernel_name = "conv2d"; break; case Conv2dMethod::Transposed: - kernel_name << "conv_transpose2d"; + kernel_name = "conv_transpose2d"; break; } if (prepack_weights) { - kernel_name << "_prepack_weights"; + kernel_name += "_prepack_weights"; } - apply_dtype_suffix(kernel_name, t_out); + add_dtype_suffix(kernel_name, t_out); - return VK_KERNEL_FROM_STR(kernel_name.str()); + return VK_KERNEL_FROM_STR(kernel_name); } std::vector get_final_sizes( diff --git a/backends/vulkan/runtime/graph/ops/impl/MatMul.cpp b/backends/vulkan/runtime/graph/ops/impl/MatMul.cpp index ed49926baef..32b5d613b1b 100644 --- a/backends/vulkan/runtime/graph/ops/impl/MatMul.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/MatMul.cpp @@ -78,15 +78,15 @@ void add_matmul_node( api::utils::uvec3 global_size = t_out.virtual_extents(); api::utils::uvec3 local_size = adaptive_work_group_size(global_size); - std::stringstream kernel_name; - kernel_name << "matmul"; - apply_memory_layout_suffix(kernel_name, t_mat1); - apply_memory_layout_suffix(kernel_name, t_mat2); - apply_dtype_suffix(kernel_name, t_out); + std::string kernel_name("matmul"); + kernel_name.reserve(kShaderNameReserve); + add_memory_layout_suffix(kernel_name, t_mat1); + add_memory_layout_suffix(kernel_name, t_mat2); + add_dtype_suffix(kernel_name, t_out); graph.execute_nodes().emplace_back(new ExecuteNode( graph, - VK_KERNEL_FROM_STR(kernel_name.str()), + VK_KERNEL_FROM_STR(kernel_name), global_size, local_size, // Inputs and Outputs diff --git a/backends/vulkan/runtime/graph/ops/impl/Pool.cpp b/backends/vulkan/runtime/graph/ops/impl/Pool.cpp index e5fe1954715..632d540334d 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Pool.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Pool.cpp @@ -73,9 +73,8 @@ void add_max_pool2d_node( api::utils::uvec3 global_size = t_out.virtual_extents(); api::utils::uvec3 local_size = adaptive_work_group_size(global_size); - std::stringstream kernel_name; - kernel_name << "max_pool2d"; - apply_dtype_suffix(kernel_name, t_out); + std::string kernel_name("max_pool2d"); + add_dtype_suffix(kernel_name, t_out); KernelParams kernel_params = create_kernel_params( graph, @@ -87,7 +86,7 @@ void add_max_pool2d_node( graph.execute_nodes().emplace_back(new ExecuteNode( graph, - VK_KERNEL_FROM_STR(kernel_name.str()), + VK_KERNEL_FROM_STR(kernel_name), global_size, local_size, // Inputs and Outputs diff --git a/backends/vulkan/runtime/graph/ops/impl/Sum.cpp b/backends/vulkan/runtime/graph/ops/impl/Sum.cpp index ff235a2357f..9b25345fcc1 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Sum.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Sum.cpp @@ -71,17 +71,17 @@ void add_sum_dim_node( api::utils::uvec3 global_size = t_out.virtual_extents(); api::utils::uvec3 local_size = adaptive_work_group_size(global_size); - std::stringstream kernel_name; - kernel_name << "sum_dim"; + std::string kernel_name("sum_dim"); + kernel_name.reserve(kShaderNameReserve); if (keepdim) { - kernel_name << "_keepdim"; + kernel_name += "_keepdim"; } - apply_dtype_suffix(kernel_name, t_out); + add_dtype_suffix(kernel_name, t_out); graph.execute_nodes().emplace_back(new ExecuteNode( graph, - VK_KERNEL_FROM_STR(kernel_name.str()), + VK_KERNEL_FROM_STR(kernel_name), global_size, local_size, // Inputs and Outputs diff --git a/backends/vulkan/runtime/graph/ops/impl/UnaryOp.cpp b/backends/vulkan/runtime/graph/ops/impl/UnaryOp.cpp index 9960b6054d6..2ea14a41237 100644 --- a/backends/vulkan/runtime/graph/ops/impl/UnaryOp.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/UnaryOp.cpp @@ -44,13 +44,12 @@ void add_unary_op_node( api::utils::uvec3 global_size = t_out.virtual_extents(); api::utils::uvec3 local_size = adaptive_work_group_size(global_size); - std::stringstream kernel_name; - kernel_name << op_name; - apply_dtype_suffix(kernel_name, t_out); + std::string kernel_name(op_name); + add_dtype_suffix(kernel_name, t_out); graph.execute_nodes().emplace_back(new ExecuteNode( graph, - VK_KERNEL_FROM_STR(kernel_name.str()), + VK_KERNEL_FROM_STR(kernel_name), global_size, local_size, // Inputs and Outputs diff --git a/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.cpp b/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.cpp index e472690327b..0bca0b4f055 100644 --- a/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.cpp +++ b/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.cpp @@ -10,47 +10,45 @@ namespace vkcompute { -void apply_dtype_suffix(std::stringstream& kernel_name, const vTensor& tensor) { +void add_dtype_suffix(std::string& kernel_name, const vTensor& tensor) { switch (tensor.image().format()) { case VK_FORMAT_R32G32B32A32_SFLOAT: - kernel_name << "_float"; + kernel_name += "_float"; break; case VK_FORMAT_R16G16B16A16_SFLOAT: - kernel_name << "_half"; + kernel_name += "_half"; break; case VK_FORMAT_R32G32B32A32_SINT: - kernel_name << "_int"; + kernel_name += "_int"; break; default: break; } } -void apply_ndim_suffix(std::stringstream& kernel_name, const vTensor& tensor) { +void add_ndim_suffix(std::string& kernel_name, const vTensor& tensor) { switch (tensor.storage_type()) { case api::kTexture3D: - kernel_name << "_3d"; + kernel_name += "_3d"; break; case api::kTexture2D: - kernel_name << "_2d"; + kernel_name += "_2d"; break; default: break; } } -void apply_memory_layout_suffix( - std::stringstream& kernel_name, - const vTensor& tensor) { +void add_memory_layout_suffix(std::string& kernel_name, const vTensor& tensor) { switch (tensor.gpu_memory_layout()) { case api::kChannelsPacked: - kernel_name << "_C_packed"; + kernel_name += "_C_packed"; break; case api::kHeightPacked: - kernel_name << "_H_packed"; + kernel_name += "_H_packed"; break; case api::kWidthPacked: - kernel_name << "_W_packed"; + kernel_name += "_W_packed"; break; default: break; diff --git a/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h b/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h index 3da972b224c..a784a4acb4c 100644 --- a/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h +++ b/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h @@ -10,16 +10,16 @@ #include -#include +#include namespace vkcompute { -void apply_dtype_suffix(std::stringstream& kernel_name, const vTensor& tensor); +constexpr size_t kShaderNameReserve = 64u; -void apply_ndim_suffix(std::stringstream& kernel_name, const vTensor& tensor); +void add_dtype_suffix(std::string& kernel_name, const vTensor& tensor); -void apply_memory_layout_suffix( - std::stringstream& kernel_name, - const vTensor& tensor); +void add_ndim_suffix(std::string& kernel_name, const vTensor& tensor); + +void add_memory_layout_suffix(std::string& kernel_name, const vTensor& tensor); } // namespace vkcompute diff --git a/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp b/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp index 94228321f79..71819f73e9c 100644 --- a/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp +++ b/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp @@ -94,23 +94,24 @@ api::ShaderInfo get_nchw_to_image_shader(const vTensor& v_dst) { VK_THROW("Quantized Tensors are currently not supported!"); } - std::stringstream kernel_name; + std::string kernel_name; + kernel_name.reserve(kShaderNameReserve); switch (v_dst.storage_type()) { case api::kTexture3D: - kernel_name << "nchw_to_image3d"; + kernel_name = "nchw_to_image3d"; break; case api::kTexture2D: - kernel_name << "nchw_to_image2d"; + kernel_name = "nchw_to_image2d"; break; default: VK_THROW("No kernel available!"); } - apply_memory_layout_suffix(kernel_name, v_dst); - apply_dtype_suffix(kernel_name, v_dst); + add_memory_layout_suffix(kernel_name, v_dst); + add_dtype_suffix(kernel_name, v_dst); - return VK_KERNEL_FROM_STR(kernel_name.str()); + return VK_KERNEL_FROM_STR(kernel_name); } api::ShaderInfo get_image_to_nchw_shader(const vTensor& v_src) { @@ -118,23 +119,24 @@ api::ShaderInfo get_image_to_nchw_shader(const vTensor& v_src) { VK_THROW("Quantized Tensors are currently not supported!"); } - std::stringstream kernel_name; + std::string kernel_name; + kernel_name.reserve(kShaderNameReserve); switch (v_src.storage_type()) { case api::kTexture3D: - kernel_name << "image3d_to_nchw"; + kernel_name = "image3d_to_nchw"; break; case api::kTexture2D: - kernel_name << "image2d_to_nchw"; + kernel_name = "image2d_to_nchw"; break; default: VK_THROW("No kernel available!"); } - apply_memory_layout_suffix(kernel_name, v_src); - apply_dtype_suffix(kernel_name, v_src); + add_memory_layout_suffix(kernel_name, v_src); + add_dtype_suffix(kernel_name, v_src); - return VK_KERNEL_FROM_STR(kernel_name.str()); + return VK_KERNEL_FROM_STR(kernel_name); } } // namespace vkcompute diff --git a/backends/vulkan/test/utils/test_utils.cpp b/backends/vulkan/test/utils/test_utils.cpp index caa94dd8f02..bab054b3d8d 100644 --- a/backends/vulkan/test/utils/test_utils.cpp +++ b/backends/vulkan/test/utils/test_utils.cpp @@ -63,15 +63,15 @@ void record_conv2d_prepack_weights_op( const bool transposed) { api::PipelineBarrier pipeline_barrier{}; - std::stringstream kernel_name; + std::string kernel_name; if (transposed) { - kernel_name << "conv_transpose2d"; + kernel_name = "conv_transpose2d"; } else { - kernel_name << "conv2d"; + kernel_name = "conv2d"; } - kernel_name << "_prepack_weights"; - apply_dtype_suffix(kernel_name, v_dst); - api::ShaderInfo shader = VK_KERNEL_FROM_STR(kernel_name.str()); + kernel_name += "_prepack_weights"; + add_dtype_suffix(kernel_name, v_dst); + api::ShaderInfo shader = VK_KERNEL_FROM_STR(kernel_name); api::UniformParamsBuffer original_sizes_ubo( context, api::utils::make_ivec4(original_sizes, /*reverse = */ true)); @@ -100,13 +100,12 @@ void record_binary_op( vTensor& v_in1, vTensor& v_in2, vTensor& v_dst) { - std::stringstream kernel_name; - kernel_name << "binary_" << op_name << "_nobroadcast__test"; - apply_dtype_suffix(kernel_name, v_dst); + std::string kernel_name = "binary_" + op_name + "_nobroadcast__test"; + add_dtype_suffix(kernel_name, v_dst); api::PipelineBarrier pipeline_barrier{}; context->submit_compute_job( - VK_KERNEL_FROM_STR(kernel_name.str()), + VK_KERNEL_FROM_STR(kernel_name), pipeline_barrier, v_dst.virtual_extents(), adaptive_work_group_size(v_dst.virtual_extents()), diff --git a/backends/vulkan/test/vulkan_compute_api_test.cpp b/backends/vulkan/test/vulkan_compute_api_test.cpp index 88e0e68120d..bfbf9d68c29 100644 --- a/backends/vulkan/test/vulkan_compute_api_test.cpp +++ b/backends/vulkan/test/vulkan_compute_api_test.cpp @@ -49,9 +49,8 @@ TEST_F(VulkanComputeAPITest, update_params_between_submit) { std::vector sizes = {4, 4, 2}; vTensor a = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ true); - std::stringstream kernel_name; - kernel_name << "fill_texture__test"; - apply_dtype_suffix(kernel_name, a); + std::string kernel_name("fill_texture__test"); + add_dtype_suffix(kernel_name, a); struct Params final { api::utils::ivec3 size; @@ -70,7 +69,7 @@ TEST_F(VulkanComputeAPITest, update_params_between_submit) { { api::PipelineBarrier pipeline_barrier{}; api::context()->submit_compute_job( - VK_KERNEL_FROM_STR(kernel_name.str()), + VK_KERNEL_FROM_STR(kernel_name), pipeline_barrier, {4, 4, 4}, {4, 4, 4}, @@ -748,15 +747,14 @@ void run_from_gpu_test( vTensor vten = vTensor(api::context(), sizes, api::kFloat, storage_type, memory_layout); - std::stringstream kernel_name; - kernel_name << "idx_fill_texture"; - apply_memory_layout_suffix(kernel_name, vten); - apply_dtype_suffix(kernel_name, vten); + std::string kernel_name("idx_fill_texture"); + add_memory_layout_suffix(kernel_name, vten); + add_dtype_suffix(kernel_name, vten); { api::PipelineBarrier pipeline_barrier{}; api::context()->submit_compute_job( - VK_KERNEL_FROM_STR(kernel_name.str()), + VK_KERNEL_FROM_STR(kernel_name), pipeline_barrier, vten.virtual_extents(), {4, 4, 4},