Skip to content

Commit 392301f

Browse files
committed
Conv2D: Add CPU version
1 parent 716301d commit 392301f

File tree

5 files changed

+216
-2
lines changed

5 files changed

+216
-2
lines changed

ggml/include/ggml.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,7 @@ extern "C" {
481481
GGML_OP_CONV_TRANSPOSE_1D,
482482
GGML_OP_IM2COL,
483483
GGML_OP_IM2COL_BACK,
484+
GGML_OP_CONV_2D,
484485
GGML_OP_CONV_2D_DW,
485486
GGML_OP_CONV_TRANSPOSE_2D,
486487
GGML_OP_POOL_1D,
@@ -1723,6 +1724,17 @@ extern "C" {
17231724
struct ggml_tensor * b,
17241725
int stride);
17251726

1727+
GGML_API struct ggml_tensor * ggml_conv_2d_direct(
1728+
struct ggml_context * ctx,
1729+
struct ggml_tensor * a, // convolution kernel [KW, KH, IC, OC]
1730+
struct ggml_tensor * b, // input data [W, H, C, N]
1731+
int s0, // stride dimension 0
1732+
int s1, // stride dimension 1
1733+
int p0, // padding dimension 0
1734+
int p1, // padding dimension 1
1735+
int d0, // dilation dimension 0
1736+
int d1); // dilation dimension 1
1737+
17261738
enum ggml_op_pool {
17271739
GGML_OP_POOL_MAX,
17281740
GGML_OP_POOL_AVG,

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1861,6 +1861,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
18611861
{
18621862
ggml_compute_forward_im2col_back_f32(params, tensor);
18631863
} break;
1864+
case GGML_OP_CONV_2D:
1865+
{
1866+
ggml_compute_forward_conv_2d(params, tensor);
1867+
} break;
18641868
case GGML_OP_CONV_2D_DW:
18651869
{
18661870
ggml_compute_forward_conv_2d_dw(params, tensor);
@@ -2206,6 +2210,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
22062210
} break;
22072211
case GGML_OP_IM2COL:
22082212
case GGML_OP_IM2COL_BACK:
2213+
case GGML_OP_CONV_2D:
22092214
case GGML_OP_CONV_2D_DW:
22102215
case GGML_OP_CONV_TRANSPOSE_1D:
22112216
case GGML_OP_CONV_TRANSPOSE_2D:

ggml/src/ggml-cpu/ops.cpp

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6058,6 +6058,163 @@ void ggml_compute_forward_im2col_back_f32(
60586058
}
60596059
}
60606060

6061+
// ggml_compute_forward_conv_2d
6062+
6063+
static void ggml_compute_forward_conv_2d_f32(
6064+
const ggml_compute_params * params,
6065+
const ggml_tensor * kernel, // [KW, KH, IC, OC]
6066+
const ggml_tensor * src, // [W, H, C, N]
6067+
ggml_tensor * dst) { // [OW, OH, OC, N]
6068+
6069+
const int32_t s0 = ggml_get_op_params_i32(dst, 0);
6070+
const int32_t s1 = ggml_get_op_params_i32(dst, 1);
6071+
const int32_t p0 = ggml_get_op_params_i32(dst, 2);
6072+
const int32_t p1 = ggml_get_op_params_i32(dst, 3);
6073+
const int32_t d0 = ggml_get_op_params_i32(dst, 4);
6074+
const int32_t d1 = ggml_get_op_params_i32(dst, 5);
6075+
6076+
const int64_t OW = dst->ne[0];
6077+
const int64_t OH = dst->ne[1];
6078+
const int64_t OC = dst->ne[2];
6079+
const int64_t N = dst->ne[3];
6080+
6081+
const int64_t IW = src->ne[0];
6082+
const int64_t IH = src->ne[1];
6083+
const int64_t IC = src->ne[2];
6084+
6085+
const int64_t KW = kernel->ne[0];
6086+
const int64_t KH = kernel->ne[1];
6087+
6088+
const float * kernel_data = (const float *)kernel->data;
6089+
const float * src_data = (const float *)src->data;
6090+
float * dst_data = (float *)dst->data;
6091+
6092+
const int64_t rows_total = OH * N;
6093+
const int64_t rows_per_thread = (rows_total + params->nth - 1) / params->nth;
6094+
const int64_t row_start = params->ith * rows_per_thread;
6095+
const int64_t row_end = MIN(row_start + rows_per_thread, rows_total);
6096+
6097+
for (int64_t row = row_start; row < row_end; ++row) {
6098+
const int64_t oh = row % OH;
6099+
const int64_t n = row / OH;
6100+
const float * src_batch = src_data + n * IW * IH * IC;
6101+
6102+
for (int64_t ow = 0; ow < OW; ++ow) {
6103+
for (int64_t oc = 0; oc < OC; ++oc) {
6104+
float sum = 0.0f;
6105+
const float * kernel_channel = kernel_data + oc * KW * KH * IC;
6106+
6107+
for (int64_t kh = 0; kh < KH; ++kh) {
6108+
const int64_t ih = oh * s1 - p1 + kh * d1;
6109+
if (ih < 0 || ih >= IH) continue;
6110+
6111+
for (int64_t kw = 0; kw < KW; ++kw) {
6112+
const int64_t iw = ow * s0 - p0 + kw * d0;
6113+
if (iw < 0 || iw >= IW) continue;
6114+
6115+
#pragma omp simd
6116+
for (int64_t ic = 0; ic < IC; ++ic) {
6117+
const float * kernel_ptr = kernel_channel + (kh * KW + kw) + ic * KW * KH;
6118+
const float * src_ptr = src_batch + (ih * IW + iw) + ic * IW * IH;
6119+
sum += (*kernel_ptr) * (*src_ptr);
6120+
}
6121+
}
6122+
}
6123+
6124+
dst_data[((n * OC + oc) * OH + oh) * OW + ow] = sum;
6125+
}
6126+
}
6127+
}
6128+
}
6129+
6130+
static void ggml_compute_forward_conv_2d_f16(
6131+
const ggml_compute_params * params,
6132+
const ggml_tensor * kernel, // [KW, KH, IC, OC]
6133+
const ggml_tensor * src, // [W, H, C, N]
6134+
ggml_tensor * dst) { // [OW, OH, OC, N]
6135+
6136+
const int32_t s0 = ggml_get_op_params_i32(dst, 0);
6137+
const int32_t s1 = ggml_get_op_params_i32(dst, 1);
6138+
const int32_t p0 = ggml_get_op_params_i32(dst, 2);
6139+
const int32_t p1 = ggml_get_op_params_i32(dst, 3);
6140+
const int32_t d0 = ggml_get_op_params_i32(dst, 4);
6141+
const int32_t d1 = ggml_get_op_params_i32(dst, 5);
6142+
6143+
const int64_t OW = dst->ne[0];
6144+
const int64_t OH = dst->ne[1];
6145+
const int64_t OC = dst->ne[2];
6146+
const int64_t N = dst->ne[3];
6147+
6148+
const int64_t IW = src->ne[0];
6149+
const int64_t IH = src->ne[1];
6150+
const int64_t IC = src->ne[2];
6151+
6152+
const int64_t KW = kernel->ne[0];
6153+
const int64_t KH = kernel->ne[1];
6154+
6155+
const ggml_fp16_t * kernel_data = (const ggml_fp16_t *)kernel->data;
6156+
const ggml_fp16_t * src_data = (const ggml_fp16_t *)src->data;
6157+
ggml_fp16_t * dst_data = (ggml_fp16_t *)dst->data;
6158+
6159+
const int64_t rows_total = OH * N;
6160+
const int64_t rows_per_thread = (rows_total + params->nth - 1) / params->nth;
6161+
const int64_t row_start = params->ith * rows_per_thread;
6162+
const int64_t row_end = MIN(row_start + rows_per_thread, rows_total);
6163+
6164+
for (int64_t row = row_start; row < row_end; ++row) {
6165+
const int64_t oh = row % OH;
6166+
const int64_t n = row / OH;
6167+
const ggml_fp16_t * src_batch = src_data + n * IW * IH * IC;
6168+
6169+
for (int64_t ow = 0; ow < OW; ++ow) {
6170+
for (int64_t oc = 0; oc < OC; ++oc) {
6171+
float sum = 0.0f;
6172+
const ggml_fp16_t * kernel_channel = kernel_data + oc * KW * KH * IC;
6173+
for (int64_t kh = 0; kh < KH; ++kh) {
6174+
const int64_t ih = oh * s1 - p1 + kh * d1;
6175+
if (ih < 0 || ih >= IH) continue;
6176+
6177+
for (int64_t kw = 0; kw < KW; ++kw) {
6178+
const int64_t iw = ow * s0 - p0 + kw * d0;
6179+
if (iw < 0 || iw >= IW) continue;
6180+
6181+
for (int64_t ic = 0; ic < IC; ++ic) {
6182+
const ggml_fp16_t * kernel_ptr = kernel_channel + (kh * KW + kw) + ic * KW * KH;
6183+
const ggml_fp16_t * src_ptr = src_batch + (ih * IW + iw) + ic * IW * IH;
6184+
sum += GGML_FP16_TO_FP32(*kernel_ptr) * GGML_FP16_TO_FP32(*src_ptr);
6185+
}
6186+
}
6187+
}
6188+
6189+
dst_data[((n * OC + oc) * OH + oh) * OW + ow] = GGML_FP32_TO_FP16(sum);
6190+
}
6191+
}
6192+
}
6193+
}
6194+
6195+
void ggml_compute_forward_conv_2d(
6196+
const ggml_compute_params * params,
6197+
ggml_tensor * dst) {
6198+
6199+
const ggml_tensor * src0 = dst->src[0];
6200+
const ggml_tensor * src1 = dst->src[1];
6201+
6202+
switch (src0->type) {
6203+
case GGML_TYPE_F16:
6204+
{
6205+
ggml_compute_forward_conv_2d_f16(params, src0, src1, dst);
6206+
} break;
6207+
case GGML_TYPE_F32:
6208+
{
6209+
ggml_compute_forward_conv_2d_f32(params, src0, src1, dst);
6210+
} break;
6211+
default:
6212+
{
6213+
GGML_ABORT("fatal error");
6214+
}
6215+
}
6216+
}
6217+
60616218
// ggml_compute_forward_conv_transpose_2d
60626219

60636220
void ggml_compute_forward_conv_transpose_2d(

ggml/src/ggml-cpu/ops.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ void ggml_compute_forward_clamp(const struct ggml_compute_params * params, struc
6464
void ggml_compute_forward_conv_transpose_1d(const struct ggml_compute_params * params, struct ggml_tensor * dst);
6565
void ggml_compute_forward_im2col(const struct ggml_compute_params * params, struct ggml_tensor * dst);
6666
void ggml_compute_forward_im2col_back_f32(const struct ggml_compute_params * params, struct ggml_tensor * dst);
67+
void ggml_compute_forward_conv_2d(const struct ggml_compute_params * params, struct ggml_tensor * dst);
6768
void ggml_compute_forward_conv_transpose_2d(const struct ggml_compute_params * params, struct ggml_tensor * dst);
6869
void ggml_compute_forward_conv_2d_dw(const struct ggml_compute_params * params, struct ggml_tensor * dst);
6970
void ggml_compute_forward_pool_1d(const struct ggml_compute_params * params, struct ggml_tensor * dst);

ggml/src/ggml.c

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -983,7 +983,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
983983
"OPT_STEP_ADAMW",
984984
};
985985

986-
static_assert(GGML_OP_COUNT == 83, "GGML_OP_COUNT != 83");
986+
static_assert(GGML_OP_COUNT == 84, "GGML_OP_COUNT != 84");
987987

988988
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
989989
"none",
@@ -1040,6 +1040,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
10401040
"conv_transpose_1d(x)",
10411041
"im2col(x)",
10421042
"im2col_back(x)",
1043+
"conv_2d(x)",
10431044
"conv_2d_dw(x)",
10441045
"conv_transpose_2d(x)",
10451046
"pool_1d(x)",
@@ -1079,7 +1080,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
10791080
"adamw(x)",
10801081
};
10811082

1082-
static_assert(GGML_OP_COUNT == 83, "GGML_OP_COUNT != 83");
1083+
static_assert(GGML_OP_COUNT == 84, "GGML_OP_COUNT != 84");
10831084

10841085
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
10851086

@@ -4120,6 +4121,44 @@ struct ggml_tensor * ggml_conv_2d_dw_direct(
41204121
return result;
41214122
}
41224123

4124+
// ggml_conv_2d_direct
4125+
4126+
struct ggml_tensor * ggml_conv_2d_direct(
4127+
struct ggml_context * ctx,
4128+
struct ggml_tensor * a, // convolution kernel [KW, KH, IC, OC]
4129+
struct ggml_tensor * b, // input data [W, H, C, N]
4130+
int s0, // stride dimension 0
4131+
int s1, // stride dimension 1
4132+
int p0, // padding dimension 0
4133+
int p1, // padding dimension 1
4134+
int d0, // dilation dimension 0
4135+
int d1) {// dilation dimension 1
4136+
4137+
GGML_ASSERT(a->ne[2] == b->ne[2]);
4138+
GGML_ASSERT(a->type == b->type);
4139+
4140+
int64_t ne[4];
4141+
ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
4142+
ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1);
4143+
ne[2] = a->ne[3];
4144+
ne[3] = b->ne[3];
4145+
4146+
struct ggml_tensor * result = ggml_new_tensor(ctx, b->type, 4, ne);
4147+
4148+
ggml_set_op_params_i32(result, 0, s0);
4149+
ggml_set_op_params_i32(result, 1, s1);
4150+
ggml_set_op_params_i32(result, 2, p0);
4151+
ggml_set_op_params_i32(result, 3, p1);
4152+
ggml_set_op_params_i32(result, 4, d0);
4153+
ggml_set_op_params_i32(result, 5, d1);
4154+
4155+
result->op = GGML_OP_CONV_2D;
4156+
result->src[0] = a;
4157+
result->src[1] = b;
4158+
4159+
return result;
4160+
}
4161+
41234162
// ggml_conv_transpose_2d_p0
41244163

41254164
static int64_t ggml_calc_conv_transpose_output_size(int64_t ins, int64_t ks, int s, int p) {

0 commit comments

Comments
 (0)