Skip to content

Commit 28361c4

Browse files
committed
Merge remote-tracking branch 'origin/master' into GraniteFour
* origin/master: Add Vulkan images to docker.md (ggml-org#14472) CANN: update aclnnGroupedMatmulV2 to aclnnGroupedMatmulV3 (ggml-org#14411) vulkan: Split large mul_mat_id to fit in shared memory (ggml-org#14451) add GELU_ERF (ggml-org#14455) ggml : remove trailing whitespace (#0) sync : ggml ggml-cpu : "align corners" for bilinear upscale/downscale (ggml/1285) ggml-quants : rename best_mad to best_error (ggml/1283) opencl : add GEGLU, REGLU, SWIGLU (ggml-org#14456) Add Conv2d for CPU (ggml-org#14388)
2 parents 6cac586 + 1b2aaf2 commit 28361c4

File tree

16 files changed

+805
-48
lines changed

16 files changed

+805
-48
lines changed

docs/docker.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ Additionally, there the following images, similar to the above:
2525
- `ghcr.io/ggml-org/llama.cpp:full-intel`: Same as `full` but compiled with SYCL support. (platforms: `linux/amd64`)
2626
- `ghcr.io/ggml-org/llama.cpp:light-intel`: Same as `light` but compiled with SYCL support. (platforms: `linux/amd64`)
2727
- `ghcr.io/ggml-org/llama.cpp:server-intel`: Same as `server` but compiled with SYCL support. (platforms: `linux/amd64`)
28+
- `ghcr.io/ggml-org/llama.cpp:full-vulkan`: Same as `full` but compiled with Vulkan support. (platforms: `linux/amd64`)
29+
- `ghcr.io/ggml-org/llama.cpp:light-vulkan`: Same as `light` but compiled with Vulkan support. (platforms: `linux/amd64`)
30+
- `ghcr.io/ggml-org/llama.cpp:server-vulkan`: Same as `server` but compiled with Vulkan support. (platforms: `linux/amd64`)
2831

2932
The GPU enabled images are not currently tested by CI beyond being built. They are not built with any variation from the ones in the Dockerfiles defined in [.devops/](../.devops/) and the GitHub Action defined in [.github/workflows/docker.yml](../.github/workflows/docker.yml). If you need different settings (for example, a different CUDA, ROCm or MUSA library, you'll need to build the images locally for now).
3033

ggml/include/ggml.h

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,7 @@ extern "C" {
482482
GGML_OP_CONV_TRANSPOSE_1D,
483483
GGML_OP_IM2COL,
484484
GGML_OP_IM2COL_BACK,
485+
GGML_OP_CONV_2D,
485486
GGML_OP_CONV_2D_DW,
486487
GGML_OP_CONV_TRANSPOSE_2D,
487488
GGML_OP_POOL_1D,
@@ -1813,6 +1814,17 @@ extern "C" {
18131814
struct ggml_tensor * b,
18141815
int stride);
18151816

1817+
GGML_API struct ggml_tensor * ggml_conv_2d_direct(
1818+
struct ggml_context * ctx,
1819+
struct ggml_tensor * a, // convolution kernel [KW, KH, IC, OC]
1820+
struct ggml_tensor * b, // input data [W, H, C, N]
1821+
int s0, // stride dimension 0
1822+
int s1, // stride dimension 1
1823+
int p0, // padding dimension 0
1824+
int p1, // padding dimension 1
1825+
int d0, // dilation dimension 0
1826+
int d1); // dilation dimension 1
1827+
18161828
enum ggml_op_pool {
18171829
GGML_OP_POOL_MAX,
18181830
GGML_OP_POOL_AVG,
@@ -1855,6 +1867,12 @@ extern "C" {
18551867
enum ggml_scale_mode {
18561868
GGML_SCALE_MODE_NEAREST = 0,
18571869
GGML_SCALE_MODE_BILINEAR = 1,
1870+
1871+
GGML_SCALE_MODE_COUNT
1872+
};
1873+
1874+
enum ggml_scale_flag {
1875+
GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8)
18581876
};
18591877

18601878
// interpolate
@@ -1867,14 +1885,26 @@ extern "C" {
18671885

18681886
// interpolate
18691887
// interpolate scale to specified dimensions
1870-
GGML_API struct ggml_tensor * ggml_upscale_ext(
1888+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_upscale_ext(
18711889
struct ggml_context * ctx,
18721890
struct ggml_tensor * a,
18731891
int ne0,
18741892
int ne1,
18751893
int ne2,
18761894
int ne3,
1877-
enum ggml_scale_mode mode);
1895+
enum ggml_scale_mode mode),
1896+
"use ggml_interpolate instead");
1897+
1898+
// Up- or downsamples the input to the specified size.
1899+
// 2D scale modes (eg. bilinear) are applied to the first two dimensions.
1900+
GGML_API struct ggml_tensor * ggml_interpolate(
1901+
struct ggml_context * ctx,
1902+
struct ggml_tensor * a,
1903+
int64_t ne0,
1904+
int64_t ne1,
1905+
int64_t ne2,
1906+
int64_t ne3,
1907+
uint32_t mode); // ggml_scale_mode [ | ggml_scale_flag...]
18781908

18791909
// pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
18801910
GGML_API struct ggml_tensor * ggml_pad(

ggml/src/ggml-cann/aclnn_ops.cpp

Lines changed: 65 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@
6565
#include <aclnnop/aclnn_eq_tensor.h>
6666
#include <aclnnop/aclnn_gt_scalar.h>
6767
#include <aclnnop/aclnn_pow.h>
68-
#include <aclnnop/aclnn_grouped_matmul_v2.h>
68+
#include <aclnnop/aclnn_grouped_matmul_v3.h>
6969
#include <aclnnop/aclnn_fused_infer_attention_score_v2.h>
7070
#include <float.h>
7171

@@ -2654,6 +2654,67 @@ static void ggml_cann_mul_mat_id_fp(ggml_backend_cann_context& ctx, ggml_tensor*
26542654
memcpy(ori_src0_nb, cast_nb, sizeof(ori_src0_nb));
26552655
}
26562656

2657+
#ifdef ASCEND_310P
2658+
ggml_tensor src0_row = *src0;
2659+
ggml_tensor src1_row = *src1;
2660+
ggml_tensor dst_row = *dst;
2661+
2662+
if (src0->type == GGML_TYPE_F16) {
2663+
src0_row.type = GGML_TYPE_F32;
2664+
}
2665+
2666+
// src0_row [D, M, 1, 1] weight without permute
2667+
src0_row.ne[2] = 1;
2668+
src0_row.ne[3] = 1;
2669+
src0_row.nb[0] = ori_src0_nb[0];
2670+
src0_row.nb[1] = ori_src0_nb[1];
2671+
src0_row.nb[2] = ori_src0_nb[1];
2672+
src0_row.nb[3] = ori_src0_nb[1];
2673+
2674+
// src1_row [D, 1, 1, 1] -> input
2675+
src1_row.ne[1] = 1;
2676+
src1_row.ne[2] = 1;
2677+
src1_row.ne[3] = 1;
2678+
src1_row.nb[2] = nb11;
2679+
src1_row.nb[3] = nb11;
2680+
2681+
// dst_row [M, 1, 1, 1] -> out
2682+
dst_row.ne[1] = 1;
2683+
dst_row.ne[2] = 1;
2684+
dst_row.ne[3] = 1;
2685+
dst_row.nb[2] = nb1;
2686+
dst_row.nb[3] = nb1;
2687+
2688+
//create weight for one row
2689+
for (int64_t iid1 = 0; iid1 < ids->ne[1]; iid1++) {
2690+
for (int64_t id = 0; id < n_ids; id++) {
2691+
// expert index
2692+
int32_t i02 = *(int32_t *) (ids_host.data() + iid1*ids->nb[1] + id*ids->nb[0]);
2693+
GGML_ASSERT(i02 >= 0 && i02 < n_as);
2694+
2695+
// If B = 1 (broadcast), always use 0; otherwise, use id.
2696+
int64_t i11 = (ne11 == 1 ? 0 : id);
2697+
int64_t i12 = iid1;
2698+
2699+
int64_t i1 = id;
2700+
int64_t i2 = i12;
2701+
2702+
void* src0_tmp_ptr = src0_original + i02*ori_src0_nb[2];
2703+
void* src1_tmp_ptr = src1_original + i11*nb11 + i12*nb12;
2704+
void* dst_tmp_ptr = dst_original + i1*nb1 + i2*nb2;
2705+
2706+
src0_row.data = src0_tmp_ptr;
2707+
src1_row.data = src1_tmp_ptr;
2708+
dst_row.data = dst_tmp_ptr;
2709+
dst_row.src[0] = &src0_row;
2710+
dst_row.src[1] = &src1_row;
2711+
2712+
ggml_cann_mul_mat(ctx, &dst_row);
2713+
}
2714+
}
2715+
return;
2716+
#endif
2717+
26572718
std::vector<aclTensor*> src0_tensor_vec;
26582719
std::vector<aclTensor*> src1_tensor_vec;
26592720
std::vector<aclTensor*> dst_tensor_vec;
@@ -2701,9 +2762,9 @@ static void ggml_cann_mul_mat_id_fp(ggml_backend_cann_context& ctx, ggml_tensor*
27012762
}
27022763

27032764
size_t GROUP_SIZE = 128;
2704-
// GroupedMatmulV2 required tensor_list.size < 128
2765+
// GroupedMatmulV3 required tensor_list.size < 128
27052766
for (size_t i = 0; i < src0_tensor_vec.size(); i += GROUP_SIZE) {
2706-
// split and call GroupedMatmulV2
2767+
// split and call GroupedMatmulV3
27072768
size_t end = std::min(i + GROUP_SIZE, src0_tensor_vec.size());
27082769
std::vector<aclTensor*> src0_tensor_vec_split(src0_tensor_vec.begin() + i, src0_tensor_vec.begin() + end);
27092770
std::vector<aclTensor*> src1_tensor_vec_split(src1_tensor_vec.begin() + i, src1_tensor_vec.begin() + end);
@@ -2713,7 +2774,7 @@ static void ggml_cann_mul_mat_id_fp(ggml_backend_cann_context& ctx, ggml_tensor*
27132774
aclTensorList* src1_tensor_list = aclCreateTensorList(src1_tensor_vec_split.data(), src1_tensor_vec_split.size());
27142775
aclTensorList* dst_tensor_list = aclCreateTensorList(dst_tensor_vec_split.data(), dst_tensor_vec_split.size());
27152776

2716-
GGML_CANN_CALL_ACLNN_OP(ctx, GroupedMatmulV2, src1_tensor_list, src0_tensor_list,
2777+
GGML_CANN_CALL_ACLNN_OP(ctx, GroupedMatmulV3, src1_tensor_list, src0_tensor_list,
27172778
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, 0, -1, dst_tensor_list);
27182779

27192780
ggml_cann_release_resources(ctx, src0_tensor_list, src1_tensor_list, dst_tensor_list);

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1193,7 +1193,7 @@ static void ggml_compute_forward_mul_mat_one_chunk(
11931193
}
11941194
}
11951195

1196-
static void ggml_compute_forward_mul_mat(
1196+
void ggml_compute_forward_mul_mat(
11971197
const struct ggml_compute_params * params,
11981198
struct ggml_tensor * dst) {
11991199

@@ -1866,6 +1866,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
18661866
{
18671867
ggml_compute_forward_im2col_back_f32(params, tensor);
18681868
} break;
1869+
case GGML_OP_CONV_2D:
1870+
{
1871+
ggml_compute_forward_conv_2d(params, tensor);
1872+
} break;
18691873
case GGML_OP_CONV_2D_DW:
18701874
{
18711875
ggml_compute_forward_conv_2d_dw(params, tensor);
@@ -2228,6 +2232,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
22282232
} break;
22292233
case GGML_OP_IM2COL:
22302234
case GGML_OP_IM2COL_BACK:
2235+
case GGML_OP_CONV_2D:
22312236
case GGML_OP_CONV_2D_DW:
22322237
case GGML_OP_CONV_TRANSPOSE_1D:
22332238
case GGML_OP_CONV_TRANSPOSE_2D:
@@ -2746,6 +2751,10 @@ struct ggml_cplan ggml_graph_plan(
27462751
GGML_ABORT("fatal error");
27472752
}
27482753
} break;
2754+
case GGML_OP_CONV_2D:
2755+
{
2756+
cur = GGML_IM2COL_WORK_SIZE;
2757+
} break;
27492758
case GGML_OP_CONV_TRANSPOSE_2D:
27502759
{
27512760
const int64_t ne00 = node->src[0]->ne[0]; // W

0 commit comments

Comments
 (0)