@@ -4079,7 +4079,33 @@ static vk_submission ggml_vk_begin_submission(vk_device& device, vk_queue& q, bo
40794079 return s;
40804080}
40814081
4082- static void ggml_vk_dispatch_pipeline(ggml_backend_vk_context* ctx, vk_context& subctx, vk_pipeline& pipeline, std::initializer_list<vk::DescriptorBufferInfo> const& descriptor_buffer_infos, size_t push_constant_size, const void* push_constants, std::array<uint32_t, 3> elements) {
4082+ template <typename T> size_t push_constant_size(const T &t) {
4083+ static_assert(std::is_class<T>::value, "T must be a struct/class");
4084+ GGML_UNUSED(t);
4085+ return sizeof(T);
4086+ }
4087+ template <typename T> size_t push_constant_size(const std::vector<T> &t) {
4088+ GGML_UNUSED(t);
4089+ return sizeof(T) * t.size();
4090+ }
4091+ template <typename T, uint32_t N> size_t push_constant_size(const std::array<T, N> &t) {
4092+ GGML_UNUSED(t);
4093+ return sizeof(T) * N;
4094+ }
4095+
4096+ template <typename T> const T *push_constant_data(const T &t) {
4097+ static_assert(std::is_class<T>::value, "T must be a struct/class");
4098+ return &t;
4099+ }
4100+ template <typename T> const T *push_constant_data(const std::vector<T> &t) {
4101+ return t.data();
4102+ }
4103+ template <typename T, uint32_t N> const T *push_constant_data(const std::array<T, N> &t) {
4104+ return t.data();
4105+ }
4106+
4107+ template <typename T>
4108+ static void ggml_vk_dispatch_pipeline(ggml_backend_vk_context* ctx, vk_context& subctx, vk_pipeline& pipeline, std::initializer_list<vk::DescriptorBufferInfo> const& descriptor_buffer_infos, const T &push_constants, std::array<uint32_t, 3> elements) {
40834109 const uint32_t wg0 = CEIL_DIV(elements[0], pipeline->wg_denoms[0]);
40844110 const uint32_t wg1 = CEIL_DIV(elements[1], pipeline->wg_denoms[1]);
40854111 const uint32_t wg2 = CEIL_DIV(elements[2], pipeline->wg_denoms[2]);
@@ -4095,7 +4121,7 @@ static void ggml_vk_dispatch_pipeline(ggml_backend_vk_context* ctx, vk_context&
40954121 vk::WriteDescriptorSet write_descriptor_set{ descriptor_set, 0, 0, pipeline->parameter_count, vk::DescriptorType::eStorageBuffer, nullptr, descriptor_buffer_infos.begin() };
40964122 ctx->device->device.updateDescriptorSets({ write_descriptor_set }, {});
40974123
4098- subctx->s->buffer.pushConstants(pipeline->layout, vk::ShaderStageFlagBits::eCompute, 0, push_constant_size, push_constants);
4124+ subctx->s->buffer.pushConstants(pipeline->layout, vk::ShaderStageFlagBits::eCompute, 0, push_constant_size(push_constants), push_constant_data( push_constants) );
40994125 subctx->s->buffer.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->pipeline);
41004126 subctx->s->buffer.bindDescriptorSets(vk::PipelineBindPoint::eCompute,
41014127 pipeline->layout,
@@ -4558,18 +4584,18 @@ static void ggml_vk_matmul(
45584584 ggml_vk_sync_buffers(subctx);
45594585 if (split_k == 1) {
45604586 const vk_mat_mat_push_constants pc = { m, n, k, stride_a, stride_b, stride_d, batch_stride_a, batch_stride_b, batch_stride_d, k, ne02, ne12, broadcast2, broadcast3, padded_n };
4561- ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { a, b, d }, sizeof(vk_mat_mat_push_constants), & pc, { m, n, batch });
4587+ ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { a, b, d }, pc, { m, n, batch });
45624588 return;
45634589 }
45644590
45654591 GGML_ASSERT(batch_stride_d == m * n);
45664592
45674593 const vk_mat_mat_push_constants pc1 = { m, n, k, stride_a, stride_b, stride_d, batch_stride_a, batch_stride_b, batch_stride_d, CEIL_DIV(k, split_k), ne02, ne12, broadcast2, broadcast3, padded_n };
45684594 // Make sure enough workgroups get assigned for split k to work
4569- ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { a, b, split_k_buffer }, sizeof(vk_mat_mat_push_constants), & pc1, { (CEIL_DIV(m, pipeline->wg_denoms[0]) * pipeline->wg_denoms[0]) * split_k, n, batch });
4595+ ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { a, b, split_k_buffer }, pc1, { (CEIL_DIV(m, pipeline->wg_denoms[0]) * pipeline->wg_denoms[0]) * split_k, n, batch });
45704596 ggml_vk_sync_buffers(subctx);
45714597 const std::array<uint32_t, 2> pc2 = { (uint32_t)(m * n * batch), split_k };
4572- ggml_vk_dispatch_pipeline(ctx, subctx, ctx->device->pipeline_matmul_split_k_reduce, { split_k_buffer, d }, pc2.size() * sizeof(uint32_t), pc2.data() , { m * n * batch, 1, 1 });
4598+ ggml_vk_dispatch_pipeline(ctx, subctx, ctx->device->pipeline_matmul_split_k_reduce, { split_k_buffer, d }, pc2, { m * n * batch, 1, 1 });
45734599}
45744600
45754601static vk_pipeline ggml_vk_guess_matmul_id_pipeline(ggml_backend_vk_context * ctx, vk_matmul_pipeline& mmp, uint32_t m, uint32_t n, bool aligned, ggml_type src0_type) {
@@ -4617,7 +4643,7 @@ static void ggml_vk_matmul_id(
46174643 ggml_vk_sync_buffers(subctx);
46184644 const vk_mat_mat_id_push_constants pc = { m, n, k, stride_a, stride_b, stride_d, batch_stride_a, batch_stride_b, batch_stride_d,
46194645 nei0, nei1, nbi1, ne11, padded_n };
4620- ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { a, b, d, ids }, sizeof(vk_mat_mat_id_push_constants), & pc, { m, nei1, n_as });
4646+ ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { a, b, d, ids }, pc, { m, nei1, n_as });
46214647}
46224648
46234649static bool ggml_vk_dim01_contiguous(const ggml_tensor * tensor) {
@@ -4738,7 +4764,7 @@ static void ggml_vk_cpy_to_contiguous(ggml_backend_vk_context * ctx, vk_context&
47384764 };
47394765 init_pushconst_fastdiv(pc);
47404766 ggml_vk_sync_buffers(subctx);
4741- ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { in, out }, sizeof(vk_op_unary_push_constants), & pc, elements);
4767+ ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { in, out }, pc, elements);
47424768}
47434769
47444770static vk_pipeline ggml_vk_get_quantize_pipeline(ggml_backend_vk_context * ctx, ggml_type type) {
@@ -4757,7 +4783,7 @@ static void ggml_vk_quantize_q8_1(ggml_backend_vk_context * ctx, vk_context& sub
47574783 vk_pipeline pipeline = ggml_vk_get_quantize_pipeline(ctx, GGML_TYPE_Q8_1);
47584784
47594785 ggml_vk_sync_buffers(subctx);
4760- ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { in, out }, sizeof( uint32_t), &ne , { ne, 1, 1 });
4786+ ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { in, out }, std::array< uint32_t, 1>{ne} , { ne, 1, 1 });
47614787}
47624788
47634789static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, bool dryrun = false) {
@@ -4957,7 +4983,7 @@ static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context& sub
49574983 } else if (qx_needs_dequant) {
49584984 const std::vector<uint32_t> pc = { (uint32_t)ne01, (uint32_t)ne10, (uint32_t)ne10, (uint32_t)ne10, (uint32_t)(ggml_nelements(src0)) };
49594985 ggml_vk_sync_buffers(subctx);
4960- ggml_vk_dispatch_pipeline(ctx, subctx, to_fp16_vk_0, { vk_subbuffer{ d_Qx, qx_buf_offset, qx_sz * ne02 * ne03 }, vk_subbuffer{ d_X, 0, x_sz * ne02 * ne03 } }, pc.size() * sizeof(uint32_t), pc.data() , { (uint32_t)(x_ne * ne02 * ne03), 1, 1});
4986+ ggml_vk_dispatch_pipeline(ctx, subctx, to_fp16_vk_0, { vk_subbuffer{ d_Qx, qx_buf_offset, qx_sz * ne02 * ne03 }, vk_subbuffer{ d_X, 0, x_sz * ne02 * ne03 } }, pc, { (uint32_t)(x_ne * ne02 * ne03), 1, 1});
49614987 }
49624988 if (y_non_contig) {
49634989 ggml_vk_cpy_to_contiguous(ctx, subctx, to_fp16_vk_1, src1, { d_Qy, qy_buf_offset, VK_WHOLE_SIZE }, { d_Y, 0, VK_WHOLE_SIZE });
@@ -5173,7 +5199,7 @@ static void ggml_vk_mul_mat_vec_q_f16(ggml_backend_vk_context * ctx, vk_context&
51735199 ggml_vk_sync_buffers(subctx);
51745200 ggml_vk_dispatch_pipeline(ctx, subctx, dmmv,
51755201 { vk_subbuffer{ d_X, x_buf_offset, x_sz * ne02 * ne03 }, vk_subbuffer{ d_Y, y_buf_offset, y_sz * ne12 * ne13 }, vk_subbuffer{ d_D, d_buf_offset, d_sz * ne22 * ne23} },
5176- sizeof(vk_mat_vec_push_constants), & pc, { groups_x, (uint32_t)(ne12 * ne13), groups_z });
5202+ pc, { groups_x, (uint32_t)(ne12 * ne13), groups_z });
51775203}
51785204
51795205static void ggml_vk_mul_mat_vec_p021_f16_f32(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, bool dryrun = false) {
@@ -5261,7 +5287,7 @@ static void ggml_vk_mul_mat_vec_p021_f16_f32(ggml_backend_vk_context * ctx, vk_c
52615287 }
52625288
52635289 ggml_vk_sync_buffers(subctx);
5264- ggml_vk_dispatch_pipeline(ctx, subctx, ctx->device->pipeline_mul_mat_vec_p021_f16_f32[gqa_ratio - 1], { vk_subbuffer{ d_Qx, qx_buf_offset, qx_sz }, vk_subbuffer{ d_Qy, qy_buffer_offset, qy_sz + qy_shader_offset }, vk_subbuffer{ d_D, d_buffer_offset, d_sz + d_shader_offset } }, 6 * sizeof(uint32_t), & pc, { 1, (uint32_t)ne01, workgroups_z });
5290+ ggml_vk_dispatch_pipeline(ctx, subctx, ctx->device->pipeline_mul_mat_vec_p021_f16_f32[gqa_ratio - 1], { vk_subbuffer{ d_Qx, qx_buf_offset, qx_sz }, vk_subbuffer{ d_Qy, qy_buffer_offset, qy_sz + qy_shader_offset }, vk_subbuffer{ d_D, d_buffer_offset, d_sz + d_shader_offset } }, pc, { 1, (uint32_t)ne01, workgroups_z });
52655291}
52665292
52675293static void ggml_vk_mul_mat_vec_nc_f16_f32(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, bool dryrun = false) {
@@ -5344,7 +5370,7 @@ static void ggml_vk_mul_mat_vec_nc_f16_f32(ggml_backend_vk_context * ctx, vk_con
53445370 const std::array<uint32_t, 9> pc = { (uint32_t)ne00, (uint32_t)ne01, row_stride_x, channel_stride_x, channel_stride_y, (uint32_t)(ne12 / ne02), (uint32_t)ne12, (uint32_t)(qy_shader_offset / ggml_type_size(src1->type)), (uint32_t)(d_shader_offset / ggml_type_size(dst->type)) };
53455371 ggml_vk_sync_buffers(subctx);
53465372 ggml_vk_dispatch_pipeline(ctx, subctx, ctx->device->pipeline_mul_mat_vec_nc_f16_f32,
5347- { vk_subbuffer{ d_Qx, qx_buf_offset, qx_sz }, vk_subbuffer{ d_Qy, qy_buffer_offset, qy_sz + qy_shader_offset }, vk_subbuffer{ d_D, d_buffer_offset, d_sz + d_shader_offset } }, 7 * sizeof(uint32_t), & pc, { 1, (uint32_t)ne01, (uint32_t)ne12 });
5373+ { vk_subbuffer{ d_Qx, qx_buf_offset, qx_sz }, vk_subbuffer{ d_Qy, qy_buffer_offset, qy_sz + qy_shader_offset }, vk_subbuffer{ d_D, d_buffer_offset, d_sz + d_shader_offset } }, pc, { 1, (uint32_t)ne01, (uint32_t)ne12 });
53485374}
53495375
53505376static void ggml_vk_mul_mat(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, bool dryrun = false) {
@@ -5560,7 +5586,7 @@ static void ggml_vk_mul_mat_id_q_f16(ggml_backend_vk_context * ctx, vk_context&
55605586 const std::vector<uint32_t> pc = { (uint32_t)ne01, (uint32_t)ne10, (uint32_t)ne10, (uint32_t)ne10, (uint32_t)(ggml_nelements(src0)) };
55615587 ggml_vk_sync_buffers(subctx);
55625588 ggml_vk_dispatch_pipeline(ctx, subctx, to_fp16_vk_0,
5563- { vk_subbuffer{ d_Qx, qx_buf_offset, qx_sz * ne02 * ne03 }, vk_subbuffer{ d_X, 0, x_sz * ne02 * ne03 } }, pc.size() * sizeof(uint32_t), pc.data() , { (uint32_t)(x_ne * ne02 * ne03), 1, 1});
5589+ { vk_subbuffer{ d_Qx, qx_buf_offset, qx_sz * ne02 * ne03 }, vk_subbuffer{ d_X, 0, x_sz * ne02 * ne03 } }, pc, { (uint32_t)(x_ne * ne02 * ne03), 1, 1});
55645590 }
55655591 if (y_non_contig) {
55665592 ggml_vk_cpy_to_contiguous(ctx, subctx, to_fp16_vk_1, src1, { d_Qy, qy_buf_offset, VK_WHOLE_SIZE }, { d_Y, 0, VK_WHOLE_SIZE });
@@ -5780,7 +5806,7 @@ static void ggml_vk_mul_mat_vec_id_q_f16(ggml_backend_vk_context * ctx, vk_conte
57805806 ggml_vk_dispatch_pipeline(ctx, subctx, dmmv,
57815807 { vk_subbuffer{ d_X, x_buf_offset, x_sz * ne02 * ne03 },
57825808 vk_subbuffer{ d_Y, y_buf_offset, y_sz * ne12 * ne13 }, vk_subbuffer{ d_D, d_buf_offset, d_sz * ne22 * ne23}, vk_subbuffer{ d_ids, ids_buf_offset, ids_sz } },
5783- sizeof(vk_mat_vec_id_push_constants), & pc, { groups_x, (uint32_t)nei0, groups_z });
5809+ pc, { groups_x, (uint32_t)nei0, groups_z });
57845810}
57855811
57865812static void ggml_vk_mul_mat_id(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, const ggml_tensor * src1, const ggml_tensor * src2, ggml_tensor * dst, bool dryrun = false) {
@@ -6130,7 +6156,7 @@ static void ggml_vk_flash_attn(ggml_backend_vk_context * ctx, vk_context& subctx
61306156 // there's no more than one tile of rows (i.e. workgroups_x would have been
61316157 // one). We reuse workgroups_x to mean the number of splits, so we need to
61326158 // cancel out the divide by wg_denoms[0].
6133- sizeof(vk_flash_attn_push_constants), & pc, { workgroups_x * pipeline->wg_denoms[0], workgroups_y, workgroups_z });
6159+ pc, { workgroups_x * pipeline->wg_denoms[0], workgroups_y, workgroups_z });
61346160
61356161 ggml_vk_sync_buffers(subctx);
61366162 const std::array<uint32_t, 3> pc2 = { D, (uint32_t)ne1, split_k };
@@ -6139,7 +6165,7 @@ static void ggml_vk_flash_attn(ggml_backend_vk_context * ctx, vk_context& subctx
61396165 vk_subbuffer{ctx->prealloc_split_k, 0, VK_WHOLE_SIZE},
61406166 vk_subbuffer{d_D, d_buf_offset, VK_WHOLE_SIZE},
61416167 },
6142- pc2.size() * uint32_t{sizeof(uint32_t)}, pc2.data() , { (uint32_t)ne1, 1, 1 });
6168+ pc2, { (uint32_t)ne1, 1, 1 });
61436169 } else {
61446170 ggml_vk_dispatch_pipeline(ctx, subctx, pipeline,
61456171 {
@@ -6149,7 +6175,7 @@ static void ggml_vk_flash_attn(ggml_backend_vk_context * ctx, vk_context& subctx
61496175 vk_subbuffer{d_M, m_buf_offset, VK_WHOLE_SIZE},
61506176 vk_subbuffer{d_D, d_buf_offset, VK_WHOLE_SIZE},
61516177 },
6152- sizeof(vk_flash_attn_push_constants), & pc, { workgroups_x, workgroups_y, workgroups_z });
6178+ pc, { workgroups_x, workgroups_y, workgroups_z });
61536179 }
61546180}
61556181
@@ -6827,7 +6853,7 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context& subctx, co
68276853 }
68286854
68296855 ggml_vk_sync_buffers(subctx);
6830- ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_X, x_buf_offset, x_sz }, subbuf_y, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, sizeof(PC), & pc, elements);
6856+ ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_X, x_buf_offset, x_sz }, subbuf_y, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, pc, elements);
68316857 } else if (op == GGML_OP_ROPE || op == GGML_OP_ROPE_BACK) {
68326858 // Empty src2 is possible in rope, but the shader needs a buffer
68336859 vk_subbuffer subbuf_z;
@@ -6838,26 +6864,26 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context& subctx, co
68386864 }
68396865
68406866 ggml_vk_sync_buffers(subctx);
6841- ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_X, x_buf_offset, x_sz }, vk_subbuffer{ d_Y, y_buf_offset, y_sz }, subbuf_z, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, sizeof(PC), & pc, elements);
6867+ ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_X, x_buf_offset, x_sz }, vk_subbuffer{ d_Y, y_buf_offset, y_sz }, subbuf_z, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, pc, elements);
68426868 } else if (op == GGML_OP_IM2COL) {
68436869 // im2col uses only src1 and dst buffers
68446870 ggml_vk_sync_buffers(subctx);
6845- ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_Y, y_buf_offset, y_sz }, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, sizeof(PC), & pc, elements);
6871+ ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_Y, y_buf_offset, y_sz }, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, pc, elements);
68466872 } else if (op == GGML_OP_COUNT_EQUAL) {
68476873 ggml_vk_sync_buffers(subctx);
68486874 // count_equal assumes that destination buffer is initialized with zeroes
68496875 ggml_vk_buffer_memset_async(subctx, d_D, d_buf_offset, 0, d_sz);
68506876 ggml_vk_sync_buffers(subctx);
6851- ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_X, x_buf_offset, x_sz }, vk_subbuffer{ d_Y, y_buf_offset, y_sz }, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, sizeof(PC), & pc, elements);
6877+ ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_X, x_buf_offset, x_sz }, vk_subbuffer{ d_Y, y_buf_offset, y_sz }, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, pc, elements);
68526878 } else if (use_src2) {
68536879 ggml_vk_sync_buffers(subctx);
6854- ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_X, x_buf_offset, x_sz }, vk_subbuffer{ d_Y, y_buf_offset, y_sz }, vk_subbuffer{ d_Z, z_buf_offset, z_sz }, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, sizeof(PC), & pc, elements);
6880+ ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_X, x_buf_offset, x_sz }, vk_subbuffer{ d_Y, y_buf_offset, y_sz }, vk_subbuffer{ d_Z, z_buf_offset, z_sz }, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, pc, elements);
68556881 } else if (use_src1) {
68566882 ggml_vk_sync_buffers(subctx);
6857- ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_X, x_buf_offset, x_sz }, vk_subbuffer{ d_Y, y_buf_offset, y_sz }, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, sizeof(PC), & pc, elements);
6883+ ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_X, x_buf_offset, x_sz }, vk_subbuffer{ d_Y, y_buf_offset, y_sz }, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, pc, elements);
68586884 } else {
68596885 ggml_vk_sync_buffers(subctx);
6860- ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_X, x_buf_offset, x_sz }, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, sizeof(PC), & pc, elements);
6886+ ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_X, x_buf_offset, x_sz }, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, pc, elements);
68616887 }
68626888}
68636889
@@ -7026,7 +7052,7 @@ static void ggml_vk_op_f32_wkv(ggml_backend_vk_context * ctx, vk_context& subctx
70267052 vk_subbuffer{ d_srcs[4], src_offsets[4], src_sizes[4] },
70277053 vk_subbuffer{ d_srcs[5], src_offsets[5], src_sizes[5] },
70287054 vk_subbuffer{ d_D, dst_offset, dst_size }
7029- }, sizeof(vk_op_rwkv_wkv6_push_constants), & pc, elements);
7055+ }, pc, elements);
70307056 } else if (version == 7) {
70317057 ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, {
70327058 vk_subbuffer{ d_srcs[0], src_offsets[0], src_sizes[0] },
@@ -7037,7 +7063,7 @@ static void ggml_vk_op_f32_wkv(ggml_backend_vk_context * ctx, vk_context& subctx
70377063 vk_subbuffer{ d_srcs[5], src_offsets[5], src_sizes[5] },
70387064 vk_subbuffer{ d_srcs[6], src_offsets[6], src_sizes[6] },
70397065 vk_subbuffer{ d_D, dst_offset, dst_size }
7040- }, sizeof(vk_op_rwkv_wkv7_push_constants), & pc, elements);
7066+ }, pc, elements);
70417067 } else {
70427068 // shouldn't happen
70437069 GGML_ASSERT(false);
@@ -7174,7 +7200,7 @@ static void ggml_vk_op_f32_opt_step_adamw(ggml_backend_vk_context * ctx, vk_cont
71747200 vk_subbuffer{ d_GM, gm_offset, gm_size },
71757201 vk_subbuffer{ d_GV, gv_offset, gv_size },
71767202 vk_subbuffer{ d_P, p_offset, p_size },
7177- }, sizeof(vk_op_push_constants), & pc, elements);
7203+ }, pc, elements);
71787204}
71797205
71807206static void ggml_vk_opt_step_adamw(ggml_backend_vk_context * ctx, vk_context& subctx, ggml_tensor * dst, bool dryrun = false) {
@@ -8063,7 +8089,7 @@ static void ggml_vk_test_dequant(ggml_backend_vk_context * ctx, size_t ne, ggml_
80638089 vk_context subctx = ggml_vk_create_context(ctx, ctx->device->compute_queue);
80648090 ggml_vk_ctx_begin(ctx->device, subctx);
80658091 const std::vector<uint32_t> pc = { 1, (uint32_t)ne, (uint32_t)ne, (uint32_t)ne, (uint32_t)ne };
8066- ggml_vk_dispatch_pipeline(ctx, subctx, p, { vk_subbuffer{ qx_buf, 0, qx_sz }, vk_subbuffer{ x_buf, 0, x_sz_f16 } }, pc.size() * sizeof(int), pc.data() , { (uint32_t)ne, 1, 1});
8092+ ggml_vk_dispatch_pipeline(ctx, subctx, p, { vk_subbuffer{ qx_buf, 0, qx_sz }, vk_subbuffer{ x_buf, 0, x_sz_f16 } }, pc, { (uint32_t)ne, 1, 1});
80678093 ggml_vk_ctx_end(subctx);
80688094
80698095 auto begin = std::chrono::high_resolution_clock::now();
0 commit comments