Skip to content

Commit 603e43d

Browse files
authored
opencl : update upscale to support align corners (#14488)
1 parent 611ba4b commit 603e43d

File tree

2 files changed

+36
-25
lines changed

2 files changed

+36
-25
lines changed

ggml/src/ggml-opencl/ggml-opencl.cpp

Lines changed: 34 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4453,7 +4453,8 @@ static void ggml_cl_upscale(ggml_backend_t backend, const ggml_tensor * src0, gg
44534453

44544454
ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context;
44554455

4456-
const ggml_scale_mode mode = (ggml_scale_mode) ggml_get_op_params_i32(dst, 0);
4456+
const int mode_flags = (ggml_scale_mode) ggml_get_op_params_i32(dst, 0);
4457+
const ggml_scale_mode mode = (ggml_scale_mode) (mode_flags & 0xFF);
44574458
cl_kernel kernel = nullptr;
44584459

44594460
if (mode == GGML_SCALE_MODE_NEAREST) {
@@ -4484,18 +4485,22 @@ static void ggml_cl_upscale(ggml_backend_t backend, const ggml_tensor * src0, gg
44844485
const cl_ulong nb02 = src0->nb[2];
44854486
const cl_ulong nb03 = src0->nb[3];
44864487

4487-
const int ne00_src = src0->ne[0];
4488-
const int ne01_src = src0->ne[1];
4488+
const int ne00 = src0->ne[0];
4489+
const int ne01 = src0->ne[1];
4490+
const int ne02 = src0->ne[2];
4491+
const int ne03 = src0->ne[3];
44894492

4490-
const int ne10_dst = dst->ne[0];
4491-
const int ne11_dst = dst->ne[1];
4492-
const int ne12_dst = dst->ne[2];
4493-
const int ne13_dst = dst->ne[3];
4493+
const int ne0 = dst->ne[0];
4494+
const int ne1 = dst->ne[1];
4495+
const int ne2 = dst->ne[2];
4496+
const int ne3 = dst->ne[3];
4497+
4498+
float sf0 = (float)ne0 / ne00;
4499+
float sf1 = (float)ne1 / ne01;
4500+
float sf2 = (float)ne2 / ne02;
4501+
float sf3 = (float)ne3 / ne03;
44944502

4495-
const float sf0 = (float)dst->ne[0] / src0->ne[0];
4496-
const float sf1 = (float)dst->ne[1] / src0->ne[1];
4497-
const float sf2 = (float)dst->ne[2] / src0->ne[2];
4498-
const float sf3 = (float)dst->ne[3] / src0->ne[3];
4503+
float pixel_offset = 0.5f;
44994504

45004505
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &extra_src0->data_device));
45014506
CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_ulong), &off_src0));
@@ -4507,29 +4512,36 @@ static void ggml_cl_upscale(ggml_backend_t backend, const ggml_tensor * src0, gg
45074512
CL_CHECK(clSetKernelArg(kernel, 7, sizeof(cl_ulong), &nb03));
45084513

45094514
if (mode == GGML_SCALE_MODE_NEAREST) {
4510-
CL_CHECK(clSetKernelArg(kernel, 8, sizeof(int), &ne10_dst));
4511-
CL_CHECK(clSetKernelArg(kernel, 9, sizeof(int), &ne11_dst));
4512-
CL_CHECK(clSetKernelArg(kernel, 10, sizeof(int), &ne12_dst));
4513-
CL_CHECK(clSetKernelArg(kernel, 11, sizeof(int), &ne13_dst));
4515+
CL_CHECK(clSetKernelArg(kernel, 8, sizeof(int), &ne0));
4516+
CL_CHECK(clSetKernelArg(kernel, 9, sizeof(int), &ne1));
4517+
CL_CHECK(clSetKernelArg(kernel, 10, sizeof(int), &ne2));
4518+
CL_CHECK(clSetKernelArg(kernel, 11, sizeof(int), &ne3));
45144519
CL_CHECK(clSetKernelArg(kernel, 12, sizeof(float), &sf0));
45154520
CL_CHECK(clSetKernelArg(kernel, 13, sizeof(float), &sf1));
45164521
CL_CHECK(clSetKernelArg(kernel, 14, sizeof(float), &sf2));
45174522
CL_CHECK(clSetKernelArg(kernel, 15, sizeof(float), &sf3));
45184523
} else if (mode == GGML_SCALE_MODE_BILINEAR) {
4519-
CL_CHECK(clSetKernelArg(kernel, 8, sizeof(int), &ne00_src));
4520-
CL_CHECK(clSetKernelArg(kernel, 9, sizeof(int), &ne01_src));
4521-
CL_CHECK(clSetKernelArg(kernel, 10, sizeof(int), &ne10_dst));
4522-
CL_CHECK(clSetKernelArg(kernel, 11, sizeof(int), &ne11_dst));
4523-
CL_CHECK(clSetKernelArg(kernel, 12, sizeof(int), &ne12_dst));
4524-
CL_CHECK(clSetKernelArg(kernel, 13, sizeof(int), &ne13_dst));
4524+
if (mode_flags & GGML_SCALE_FLAG_ALIGN_CORNERS) {
4525+
sf0 = (float)(ne0 - 1) / (ne00 - 1);
4526+
sf1 = (float)(ne1 - 1) / (ne01 - 1);
4527+
pixel_offset = 0.0f;
4528+
}
4529+
4530+
CL_CHECK(clSetKernelArg(kernel, 8, sizeof(int), &ne00));
4531+
CL_CHECK(clSetKernelArg(kernel, 9, sizeof(int), &ne01));
4532+
CL_CHECK(clSetKernelArg(kernel, 10, sizeof(int), &ne0));
4533+
CL_CHECK(clSetKernelArg(kernel, 11, sizeof(int), &ne1));
4534+
CL_CHECK(clSetKernelArg(kernel, 12, sizeof(int), &ne2));
4535+
CL_CHECK(clSetKernelArg(kernel, 13, sizeof(int), &ne3));
45254536
CL_CHECK(clSetKernelArg(kernel, 14, sizeof(float), &sf0));
45264537
CL_CHECK(clSetKernelArg(kernel, 15, sizeof(float), &sf1));
45274538
CL_CHECK(clSetKernelArg(kernel, 16, sizeof(float), &sf2));
45284539
CL_CHECK(clSetKernelArg(kernel, 17, sizeof(float), &sf3));
4540+
CL_CHECK(clSetKernelArg(kernel, 18, sizeof(float), &pixel_offset));
45294541
}
45304542

45314543

4532-
size_t dst_total_elements = (size_t)ne10_dst * ne11_dst * ne12_dst * ne13_dst;
4544+
size_t dst_total_elements = (size_t)ne0 * ne1 * ne2 * ne3;
45334545
if (dst_total_elements == 0) {
45344546
return;
45354547
}

ggml/src/ggml-opencl/kernels/upscale.cl

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,8 @@ kernel void kernel_upscale_bilinear(
6060
float sf0,
6161
float sf1,
6262
float sf2,
63-
float sf3
63+
float sf3,
64+
float pixel_offset
6465
) {
6566
global const char * src_base = (global const char *)p_src0 + off_src0;
6667
global float * dst_base = (global float *)((global char *)p_dst + off_dst);
@@ -80,8 +81,6 @@ kernel void kernel_upscale_bilinear(
8081
int i02_src = (int)(i12_dst / sf2);
8182
int i03_src = (int)(i13_dst / sf3);
8283

83-
const float pixel_offset = 0.5f;
84-
8584
float y_src_f = ((float)i11_dst + pixel_offset) / sf1 - pixel_offset;
8685
long y0_src = (long)floor(y_src_f);
8786
long y1_src = y0_src + 1;

0 commit comments

Comments
 (0)