Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 34 additions & 22 deletions ggml/src/ggml-opencl/ggml-opencl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4453,7 +4453,8 @@ static void ggml_cl_upscale(ggml_backend_t backend, const ggml_tensor * src0, gg

ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context;

const ggml_scale_mode mode = (ggml_scale_mode) ggml_get_op_params_i32(dst, 0);
const int mode_flags = (ggml_scale_mode) ggml_get_op_params_i32(dst, 0);
const ggml_scale_mode mode = (ggml_scale_mode) (mode_flags & 0xFF);
cl_kernel kernel = nullptr;

if (mode == GGML_SCALE_MODE_NEAREST) {
Expand Down Expand Up @@ -4484,18 +4485,22 @@ static void ggml_cl_upscale(ggml_backend_t backend, const ggml_tensor * src0, gg
const cl_ulong nb02 = src0->nb[2];
const cl_ulong nb03 = src0->nb[3];

const int ne00_src = src0->ne[0];
const int ne01_src = src0->ne[1];
const int ne00 = src0->ne[0];
const int ne01 = src0->ne[1];
const int ne02 = src0->ne[2];
const int ne03 = src0->ne[3];

const int ne10_dst = dst->ne[0];
const int ne11_dst = dst->ne[1];
const int ne12_dst = dst->ne[2];
const int ne13_dst = dst->ne[3];
const int ne0 = dst->ne[0];
const int ne1 = dst->ne[1];
const int ne2 = dst->ne[2];
const int ne3 = dst->ne[3];

float sf0 = (float)ne0 / ne00;
float sf1 = (float)ne1 / ne01;
float sf2 = (float)ne2 / ne02;
float sf3 = (float)ne3 / ne03;

const float sf0 = (float)dst->ne[0] / src0->ne[0];
const float sf1 = (float)dst->ne[1] / src0->ne[1];
const float sf2 = (float)dst->ne[2] / src0->ne[2];
const float sf3 = (float)dst->ne[3] / src0->ne[3];
float pixel_offset = 0.5f;

CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &extra_src0->data_device));
CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_ulong), &off_src0));
Expand All @@ -4507,29 +4512,36 @@ static void ggml_cl_upscale(ggml_backend_t backend, const ggml_tensor * src0, gg
CL_CHECK(clSetKernelArg(kernel, 7, sizeof(cl_ulong), &nb03));

if (mode == GGML_SCALE_MODE_NEAREST) {
CL_CHECK(clSetKernelArg(kernel, 8, sizeof(int), &ne10_dst));
CL_CHECK(clSetKernelArg(kernel, 9, sizeof(int), &ne11_dst));
CL_CHECK(clSetKernelArg(kernel, 10, sizeof(int), &ne12_dst));
CL_CHECK(clSetKernelArg(kernel, 11, sizeof(int), &ne13_dst));
CL_CHECK(clSetKernelArg(kernel, 8, sizeof(int), &ne0));
CL_CHECK(clSetKernelArg(kernel, 9, sizeof(int), &ne1));
CL_CHECK(clSetKernelArg(kernel, 10, sizeof(int), &ne2));
CL_CHECK(clSetKernelArg(kernel, 11, sizeof(int), &ne3));
CL_CHECK(clSetKernelArg(kernel, 12, sizeof(float), &sf0));
CL_CHECK(clSetKernelArg(kernel, 13, sizeof(float), &sf1));
CL_CHECK(clSetKernelArg(kernel, 14, sizeof(float), &sf2));
CL_CHECK(clSetKernelArg(kernel, 15, sizeof(float), &sf3));
} else if (mode == GGML_SCALE_MODE_BILINEAR) {
CL_CHECK(clSetKernelArg(kernel, 8, sizeof(int), &ne00_src));
CL_CHECK(clSetKernelArg(kernel, 9, sizeof(int), &ne01_src));
CL_CHECK(clSetKernelArg(kernel, 10, sizeof(int), &ne10_dst));
CL_CHECK(clSetKernelArg(kernel, 11, sizeof(int), &ne11_dst));
CL_CHECK(clSetKernelArg(kernel, 12, sizeof(int), &ne12_dst));
CL_CHECK(clSetKernelArg(kernel, 13, sizeof(int), &ne13_dst));
if (mode_flags & GGML_SCALE_FLAG_ALIGN_CORNERS) {
sf0 = (float)(ne0 - 1) / (ne00 - 1);
sf1 = (float)(ne1 - 1) / (ne01 - 1);
pixel_offset = 0.0f;
}

CL_CHECK(clSetKernelArg(kernel, 8, sizeof(int), &ne00));
CL_CHECK(clSetKernelArg(kernel, 9, sizeof(int), &ne01));
CL_CHECK(clSetKernelArg(kernel, 10, sizeof(int), &ne0));
CL_CHECK(clSetKernelArg(kernel, 11, sizeof(int), &ne1));
CL_CHECK(clSetKernelArg(kernel, 12, sizeof(int), &ne2));
CL_CHECK(clSetKernelArg(kernel, 13, sizeof(int), &ne3));
CL_CHECK(clSetKernelArg(kernel, 14, sizeof(float), &sf0));
CL_CHECK(clSetKernelArg(kernel, 15, sizeof(float), &sf1));
CL_CHECK(clSetKernelArg(kernel, 16, sizeof(float), &sf2));
CL_CHECK(clSetKernelArg(kernel, 17, sizeof(float), &sf3));
CL_CHECK(clSetKernelArg(kernel, 18, sizeof(float), &pixel_offset));
}


size_t dst_total_elements = (size_t)ne10_dst * ne11_dst * ne12_dst * ne13_dst;
size_t dst_total_elements = (size_t)ne0 * ne1 * ne2 * ne3;
if (dst_total_elements == 0) {
return;
}
Expand Down
5 changes: 2 additions & 3 deletions ggml/src/ggml-opencl/kernels/upscale.cl
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ kernel void kernel_upscale_bilinear(
float sf0,
float sf1,
float sf2,
float sf3
float sf3,
float pixel_offset
) {
global const char * src_base = (global const char *)p_src0 + off_src0;
global float * dst_base = (global float *)((global char *)p_dst + off_dst);
Expand All @@ -80,8 +81,6 @@ kernel void kernel_upscale_bilinear(
int i02_src = (int)(i12_dst / sf2);
int i03_src = (int)(i13_dst / sf3);

const float pixel_offset = 0.5f;

float y_src_f = ((float)i11_dst + pixel_offset) / sf1 - pixel_offset;
long y0_src = (long)floor(y_src_f);
long y1_src = y0_src + 1;
Expand Down
Loading