Skip to content

Commit 4e3f47c

Browse files
committed
Conv2D: Add CPU version
1 parent b23fa0b commit 4e3f47c

File tree

5 files changed

+216
-2
lines changed

5 files changed

+216
-2
lines changed

ggml/include/ggml.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,7 @@ extern "C" {
481481
GGML_OP_CONV_TRANSPOSE_1D,
482482
GGML_OP_IM2COL,
483483
GGML_OP_IM2COL_BACK,
484+
GGML_OP_CONV_2D,
484485
GGML_OP_CONV_2D_DW,
485486
GGML_OP_CONV_TRANSPOSE_2D,
486487
GGML_OP_POOL_1D,
@@ -1723,6 +1724,17 @@ extern "C" {
17231724
struct ggml_tensor * b,
17241725
int stride);
17251726

1727+
GGML_API struct ggml_tensor * ggml_conv_2d_direct(
1728+
struct ggml_context * ctx,
1729+
struct ggml_tensor * a, // convolution kernel [KW, KH, IC, OC]
1730+
struct ggml_tensor * b, // input data [W, H, C, N]
1731+
int s0, // stride dimension 0
1732+
int s1, // stride dimension 1
1733+
int p0, // padding dimension 0
1734+
int p1, // padding dimension 1
1735+
int d0, // dilation dimension 0
1736+
int d1); // dilation dimension 1
1737+
17261738
enum ggml_op_pool {
17271739
GGML_OP_POOL_MAX,
17281740
GGML_OP_POOL_AVG,

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1858,6 +1858,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
18581858
{
18591859
ggml_compute_forward_im2col_back_f32(params, tensor);
18601860
} break;
1861+
case GGML_OP_CONV_2D:
1862+
{
1863+
ggml_compute_forward_conv_2d(params, tensor);
1864+
} break;
18611865
case GGML_OP_CONV_2D_DW:
18621866
{
18631867
ggml_compute_forward_conv_2d_dw(params, tensor);
@@ -2203,6 +2207,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
22032207
} break;
22042208
case GGML_OP_IM2COL:
22052209
case GGML_OP_IM2COL_BACK:
2210+
case GGML_OP_CONV_2D:
22062211
case GGML_OP_CONV_2D_DW:
22072212
case GGML_OP_CONV_TRANSPOSE_1D:
22082213
case GGML_OP_CONV_TRANSPOSE_2D:

ggml/src/ggml-cpu/ops.cpp

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6058,6 +6058,163 @@ void ggml_compute_forward_im2col_back_f32(
60586058
}
60596059
}
60606060

6061+
// ggml_compute_forward_conv_2d
6062+
6063+
static void ggml_compute_forward_conv_2d_f32(
6064+
const ggml_compute_params * params,
6065+
const ggml_tensor * kernel, // [KW, KH, IC, OC]
6066+
const ggml_tensor * src, // [W, H, C, N]
6067+
ggml_tensor * dst) { // [OW, OH, OC, N]
6068+
6069+
const int32_t s0 = ggml_get_op_params_i32(dst, 0);
6070+
const int32_t s1 = ggml_get_op_params_i32(dst, 1);
6071+
const int32_t p0 = ggml_get_op_params_i32(dst, 2);
6072+
const int32_t p1 = ggml_get_op_params_i32(dst, 3);
6073+
const int32_t d0 = ggml_get_op_params_i32(dst, 4);
6074+
const int32_t d1 = ggml_get_op_params_i32(dst, 5);
6075+
6076+
const int64_t OW = dst->ne[0];
6077+
const int64_t OH = dst->ne[1];
6078+
const int64_t OC = dst->ne[2];
6079+
const int64_t N = dst->ne[3];
6080+
6081+
const int64_t IW = src->ne[0];
6082+
const int64_t IH = src->ne[1];
6083+
const int64_t IC = src->ne[2];
6084+
6085+
const int64_t KW = kernel->ne[0];
6086+
const int64_t KH = kernel->ne[1];
6087+
6088+
const float * kernel_data = (const float *)kernel->data;
6089+
const float * src_data = (const float *)src->data;
6090+
float * dst_data = (float *)dst->data;
6091+
6092+
const int64_t rows_total = OH * N;
6093+
const int64_t rows_per_thread = (rows_total + params->nth - 1) / params->nth;
6094+
const int64_t row_start = params->ith * rows_per_thread;
6095+
const int64_t row_end = MIN(row_start + rows_per_thread, rows_total);
6096+
6097+
for (int64_t row = row_start; row < row_end; ++row) {
6098+
const int64_t oh = row % OH;
6099+
const int64_t n = row / OH;
6100+
const float * src_batch = src_data + n * IW * IH * IC;
6101+
6102+
for (int64_t ow = 0; ow < OW; ++ow) {
6103+
for (int64_t oc = 0; oc < OC; ++oc) {
6104+
float sum = 0.0f;
6105+
const float * kernel_channel = kernel_data + oc * KW * KH * IC;
6106+
6107+
for (int64_t kh = 0; kh < KH; ++kh) {
6108+
const int64_t ih = oh * s1 - p1 + kh * d1;
6109+
if (ih < 0 || ih >= IH) continue;
6110+
6111+
for (int64_t kw = 0; kw < KW; ++kw) {
6112+
const int64_t iw = ow * s0 - p0 + kw * d0;
6113+
if (iw < 0 || iw >= IW) continue;
6114+
6115+
#pragma omp simd
6116+
for (int64_t ic = 0; ic < IC; ++ic) {
6117+
const float * kernel_ptr = kernel_channel + (kh * KW + kw) + ic * KW * KH;
6118+
const float * src_ptr = src_batch + (ih * IW + iw) + ic * IW * IH;
6119+
sum += (*kernel_ptr) * (*src_ptr);
6120+
}
6121+
}
6122+
}
6123+
6124+
dst_data[((n * OC + oc) * OH + oh) * OW + ow] = sum;
6125+
}
6126+
}
6127+
}
6128+
}
6129+
6130+
static void ggml_compute_forward_conv_2d_f16(
6131+
const ggml_compute_params * params,
6132+
const ggml_tensor * kernel, // [KW, KH, IC, OC]
6133+
const ggml_tensor * src, // [W, H, C, N]
6134+
ggml_tensor * dst) { // [OW, OH, OC, N]
6135+
6136+
const int32_t s0 = ggml_get_op_params_i32(dst, 0);
6137+
const int32_t s1 = ggml_get_op_params_i32(dst, 1);
6138+
const int32_t p0 = ggml_get_op_params_i32(dst, 2);
6139+
const int32_t p1 = ggml_get_op_params_i32(dst, 3);
6140+
const int32_t d0 = ggml_get_op_params_i32(dst, 4);
6141+
const int32_t d1 = ggml_get_op_params_i32(dst, 5);
6142+
6143+
const int64_t OW = dst->ne[0];
6144+
const int64_t OH = dst->ne[1];
6145+
const int64_t OC = dst->ne[2];
6146+
const int64_t N = dst->ne[3];
6147+
6148+
const int64_t IW = src->ne[0];
6149+
const int64_t IH = src->ne[1];
6150+
const int64_t IC = src->ne[2];
6151+
6152+
const int64_t KW = kernel->ne[0];
6153+
const int64_t KH = kernel->ne[1];
6154+
6155+
const ggml_fp16_t * kernel_data = (const ggml_fp16_t *)kernel->data;
6156+
const ggml_fp16_t * src_data = (const ggml_fp16_t *)src->data;
6157+
ggml_fp16_t * dst_data = (ggml_fp16_t *)dst->data;
6158+
6159+
const int64_t rows_total = OH * N;
6160+
const int64_t rows_per_thread = (rows_total + params->nth - 1) / params->nth;
6161+
const int64_t row_start = params->ith * rows_per_thread;
6162+
const int64_t row_end = MIN(row_start + rows_per_thread, rows_total);
6163+
6164+
for (int64_t row = row_start; row < row_end; ++row) {
6165+
const int64_t oh = row % OH;
6166+
const int64_t n = row / OH;
6167+
const ggml_fp16_t * src_batch = src_data + n * IW * IH * IC;
6168+
6169+
for (int64_t ow = 0; ow < OW; ++ow) {
6170+
for (int64_t oc = 0; oc < OC; ++oc) {
6171+
float sum = 0.0f;
6172+
const ggml_fp16_t * kernel_channel = kernel_data + oc * KW * KH * IC;
6173+
for (int64_t kh = 0; kh < KH; ++kh) {
6174+
const int64_t ih = oh * s1 - p1 + kh * d1;
6175+
if (ih < 0 || ih >= IH) continue;
6176+
6177+
for (int64_t kw = 0; kw < KW; ++kw) {
6178+
const int64_t iw = ow * s0 - p0 + kw * d0;
6179+
if (iw < 0 || iw >= IW) continue;
6180+
6181+
for (int64_t ic = 0; ic < IC; ++ic) {
6182+
const ggml_fp16_t * kernel_ptr = kernel_channel + (kh * KW + kw) + ic * KW * KH;
6183+
const ggml_fp16_t * src_ptr = src_batch + (ih * IW + iw) + ic * IW * IH;
6184+
sum += GGML_FP16_TO_FP32(*kernel_ptr) * GGML_FP16_TO_FP32(*src_ptr);
6185+
}
6186+
}
6187+
}
6188+
6189+
dst_data[((n * OC + oc) * OH + oh) * OW + ow] = GGML_FP32_TO_FP16(sum);
6190+
}
6191+
}
6192+
}
6193+
}
6194+
6195+
void ggml_compute_forward_conv_2d(
6196+
const ggml_compute_params * params,
6197+
ggml_tensor * dst) {
6198+
6199+
const ggml_tensor * src0 = dst->src[0];
6200+
const ggml_tensor * src1 = dst->src[1];
6201+
6202+
switch (src0->type) {
6203+
case GGML_TYPE_F16:
6204+
{
6205+
ggml_compute_forward_conv_2d_f16(params, src0, src1, dst);
6206+
} break;
6207+
case GGML_TYPE_F32:
6208+
{
6209+
ggml_compute_forward_conv_2d_f32(params, src0, src1, dst);
6210+
} break;
6211+
default:
6212+
{
6213+
GGML_ABORT("fatal error");
6214+
}
6215+
}
6216+
}
6217+
60616218
// ggml_compute_forward_conv_transpose_2d
60626219

60636220
void ggml_compute_forward_conv_transpose_2d(

ggml/src/ggml-cpu/ops.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ void ggml_compute_forward_clamp(const struct ggml_compute_params * params, struc
6464
void ggml_compute_forward_conv_transpose_1d(const struct ggml_compute_params * params, struct ggml_tensor * dst);
6565
void ggml_compute_forward_im2col(const struct ggml_compute_params * params, struct ggml_tensor * dst);
6666
void ggml_compute_forward_im2col_back_f32(const struct ggml_compute_params * params, struct ggml_tensor * dst);
67+
void ggml_compute_forward_conv_2d(const struct ggml_compute_params * params, struct ggml_tensor * dst);
6768
void ggml_compute_forward_conv_transpose_2d(const struct ggml_compute_params * params, struct ggml_tensor * dst);
6869
void ggml_compute_forward_conv_2d_dw(const struct ggml_compute_params * params, struct ggml_tensor * dst);
6970
void ggml_compute_forward_pool_1d(const struct ggml_compute_params * params, struct ggml_tensor * dst);

ggml/src/ggml.c

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -986,7 +986,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
986986
"OPT_STEP_ADAMW",
987987
};
988988

989-
static_assert(GGML_OP_COUNT == 83, "GGML_OP_COUNT != 83");
989+
static_assert(GGML_OP_COUNT == 84, "GGML_OP_COUNT != 84");
990990

991991
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
992992
"none",
@@ -1043,6 +1043,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
10431043
"conv_transpose_1d(x)",
10441044
"im2col(x)",
10451045
"im2col_back(x)",
1046+
"conv_2d(x)",
10461047
"conv_2d_dw(x)",
10471048
"conv_transpose_2d(x)",
10481049
"pool_1d(x)",
@@ -1082,7 +1083,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
10821083
"adamw(x)",
10831084
};
10841085

1085-
static_assert(GGML_OP_COUNT == 83, "GGML_OP_COUNT != 83");
1086+
static_assert(GGML_OP_COUNT == 84, "GGML_OP_COUNT != 84");
10861087

10871088
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
10881089

@@ -4131,6 +4132,44 @@ struct ggml_tensor * ggml_conv_2d_dw_direct(
41314132
return result;
41324133
}
41334134

4135+
// ggml_conv_2d_direct
4136+
4137+
struct ggml_tensor * ggml_conv_2d_direct(
4138+
struct ggml_context * ctx,
4139+
struct ggml_tensor * a, // convolution kernel [KW, KH, IC, OC]
4140+
struct ggml_tensor * b, // input data [W, H, C, N]
4141+
int s0, // stride dimension 0
4142+
int s1, // stride dimension 1
4143+
int p0, // padding dimension 0
4144+
int p1, // padding dimension 1
4145+
int d0, // dilation dimension 0
4146+
int d1) {// dilation dimension 1
4147+
4148+
GGML_ASSERT(a->ne[2] == b->ne[2]);
4149+
GGML_ASSERT(a->type == b->type);
4150+
4151+
int64_t ne[4];
4152+
ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
4153+
ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1);
4154+
ne[2] = a->ne[3];
4155+
ne[3] = b->ne[3];
4156+
4157+
struct ggml_tensor * result = ggml_new_tensor(ctx, b->type, 4, ne);
4158+
4159+
ggml_set_op_params_i32(result, 0, s0);
4160+
ggml_set_op_params_i32(result, 1, s1);
4161+
ggml_set_op_params_i32(result, 2, p0);
4162+
ggml_set_op_params_i32(result, 3, p1);
4163+
ggml_set_op_params_i32(result, 4, d0);
4164+
ggml_set_op_params_i32(result, 5, d1);
4165+
4166+
result->op = GGML_OP_CONV_2D;
4167+
result->src[0] = a;
4168+
result->src[1] = b;
4169+
4170+
return result;
4171+
}
4172+
41344173
// ggml_conv_transpose_2d_p0
41354174

41364175
static int64_t ggml_calc_conv_transpose_output_size(int64_t ins, int64_t ks, int s, int p) {

0 commit comments

Comments
 (0)