@@ -432,6 +432,7 @@ struct vk_device_struct {
432
432
vk_pipeline pipeline_cos_f32;
433
433
vk_pipeline pipeline_clamp_f32;
434
434
vk_pipeline pipeline_pad_f32;
435
+ vk_pipeline pipeline_roll_f32;
435
436
vk_pipeline pipeline_repeat_f32, pipeline_repeat_back_f32;
436
437
vk_pipeline pipeline_cpy_f32_f32, pipeline_cpy_f32_f16, pipeline_cpy_f16_f16, pipeline_cpy_f16_f32, pipeline_cpy_f32_bf16;
437
438
vk_pipeline pipeline_contig_cpy_f32_f32, pipeline_contig_cpy_f32_f16, pipeline_contig_cpy_f16_f16, pipeline_contig_cpy_f16_f32, pipeline_contig_cpy_f32_bf16;
@@ -694,6 +695,37 @@ struct vk_op_unary_push_constants {
694
695
};
695
696
static_assert(sizeof(vk_op_unary_push_constants) <= 128, "sizeof(vk_op_unary_push_constants) must be <= 128");
696
697
698
+ static vk_op_unary_push_constants vk_op_unary_push_constants_init(const ggml_tensor * src0, const ggml_tensor * dst, int64_t ne = 0) {
699
+ GGML_ASSERT(ne != 0 || (ggml_nelements(src0) == ggml_nelements(dst)));
700
+ ne = ne != 0 ? ne : ggml_nelements(dst);
701
+ GGML_ASSERT(ne <= (int64_t)std::numeric_limits<uint32_t>::max());
702
+
703
+ vk_op_unary_push_constants p{};
704
+ p.ne = (uint32_t)ne;
705
+
706
+ size_t src0_tsize = ggml_type_size(src0->type);
707
+ p.ne00 = (uint32_t)src0->ne[0];
708
+ p.ne01 = (uint32_t)src0->ne[1];
709
+ p.ne02 = (uint32_t)src0->ne[2];
710
+ p.ne03 = (uint32_t)src0->ne[3];
711
+ p.nb00 = (uint32_t)(src0->nb[0] / src0_tsize);
712
+ p.nb01 = (uint32_t)(src0->nb[1] / src0_tsize);
713
+ p.nb02 = (uint32_t)(src0->nb[2] / src0_tsize);
714
+ p.nb03 = (uint32_t)(src0->nb[3] / src0_tsize);
715
+
716
+ size_t dst_tsize = ggml_type_size(dst->type);
717
+ p.ne10 = (uint32_t)dst->ne[0];
718
+ p.ne11 = (uint32_t)dst->ne[1];
719
+ p.ne12 = (uint32_t)dst->ne[2];
720
+ p.ne13 = (uint32_t)dst->ne[3];
721
+ p.nb10 = (uint32_t)(dst->nb[0] / dst_tsize);
722
+ p.nb11 = (uint32_t)(dst->nb[1] / dst_tsize);
723
+ p.nb12 = (uint32_t)(dst->nb[2] / dst_tsize);
724
+ p.nb13 = (uint32_t)(dst->nb[3] / dst_tsize);
725
+
726
+ return p; // fastdiv values and offsets are initialized later in ggml_vk_op
727
+ }
728
+
697
729
// See https://gmplib.org/~tege/divcnst-pldi94.pdf figure 4.1.
698
730
// Precompute mp (m' in the paper) and L such that division
699
731
// can be computed using a multiply (high 32b of 64b result)
@@ -2836,6 +2868,8 @@ static void ggml_vk_load_shaders(vk_device& device) {
2836
2868
2837
2869
ggml_vk_create_pipeline(device, device->pipeline_pad_f32, "pad_f32", pad_f32_len, pad_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
2838
2870
2871
+ ggml_vk_create_pipeline(device, device->pipeline_roll_f32, "roll_f32", roll_f32_len, roll_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
2872
+
2839
2873
ggml_vk_create_pipeline(device, device->pipeline_repeat_f32, "repeat_f32", repeat_f32_len, repeat_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
2840
2874
ggml_vk_create_pipeline(device, device->pipeline_repeat_back_f32, "repeat_back_f32", repeat_back_f32_len, repeat_back_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
2841
2875
@@ -6536,6 +6570,11 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const
6536
6570
return ctx->device->pipeline_pad_f32;
6537
6571
}
6538
6572
return nullptr;
6573
+ case GGML_OP_ROLL:
6574
+ if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
6575
+ return ctx->device->pipeline_roll_f32;
6576
+ }
6577
+ return nullptr;
6539
6578
case GGML_OP_REPEAT:
6540
6579
if (ggml_type_size(src0->type) == sizeof(float) && ggml_type_size(dst->type) == sizeof(float)) {
6541
6580
return ctx->device->pipeline_repeat_f32;
@@ -7085,6 +7124,7 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context& subctx, co
7085
7124
case GGML_OP_COS:
7086
7125
case GGML_OP_CLAMP:
7087
7126
case GGML_OP_PAD:
7127
+ case GGML_OP_ROLL:
7088
7128
case GGML_OP_REPEAT:
7089
7129
case GGML_OP_REPEAT_BACK:
7090
7130
case GGML_OP_CPY:
@@ -7561,117 +7601,61 @@ static void ggml_vk_upscale(ggml_backend_vk_context * ctx, vk_context& subctx, c
7561
7601
}
7562
7602
7563
7603
static void ggml_vk_scale(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) {
7564
- float * op_params = (float *) dst->op_params ;
7565
- const uint32_t src0_type_size = ggml_type_size(src0->type );
7566
- const uint32_t dst_type_size = ggml_type_size (dst->type );
7604
+ vk_op_unary_push_constants p = vk_op_unary_push_constants_init(src0, dst) ;
7605
+ p.param1 = ggml_get_op_params_f32(dst, 0 );
7606
+ p.param2 = ggml_get_op_params_f32 (dst, 1 );
7567
7607
7568
- ggml_vk_op_f32<vk_op_unary_push_constants>(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_SCALE, {
7569
- (uint32_t)ggml_nelements(src0),
7570
- (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->ne[2], (uint32_t)src0->ne[3], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)src0->nb[1] / src0_type_size, (uint32_t)src0->nb[2] / src0_type_size, (uint32_t)src0->nb[3] / src0_type_size,
7571
- (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size,
7572
- 0,
7573
- op_params[0], op_params[1],
7574
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7575
- }, dryrun);
7608
+ ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_SCALE, std::move(p), dryrun);
7576
7609
}
7577
7610
7578
7611
static void ggml_vk_sqr(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) {
7579
- const uint32_t src0_type_size = ggml_type_size(src0->type);
7580
- const uint32_t dst_type_size = ggml_type_size(dst->type);
7581
-
7582
- ggml_vk_op_f32<vk_op_unary_push_constants>(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_SQR, {
7583
- (uint32_t)ggml_nelements(src0),
7584
- (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->ne[2], (uint32_t)src0->ne[3], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)src0->nb[1] / src0_type_size, (uint32_t)src0->nb[2] / src0_type_size, (uint32_t)src0->nb[3] / src0_type_size,
7585
- (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size,
7586
- 0,
7587
- 0.0f, 0.0f,
7588
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7589
- }, dryrun);
7612
+ ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_SQR, vk_op_unary_push_constants_init(src0, dst), dryrun);
7590
7613
}
7591
7614
7592
7615
static void ggml_vk_sin(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) {
7593
- const uint32_t src0_type_size = ggml_type_size(src0->type);
7594
- const uint32_t dst_type_size = ggml_type_size(dst->type);
7595
-
7596
- ggml_vk_op_f32<vk_op_unary_push_constants>(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_SIN, {
7597
- (uint32_t)ggml_nelements(src0),
7598
- (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->ne[2], (uint32_t)src0->ne[3], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)src0->nb[1] / src0_type_size, (uint32_t)src0->nb[2] / src0_type_size, (uint32_t)src0->nb[3] / src0_type_size,
7599
- (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size,
7600
- 0,
7601
- 0.0f, 0.0f,
7602
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7603
- }, dryrun);
7616
+ ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_SIN, vk_op_unary_push_constants_init(src0, dst), dryrun);
7604
7617
}
7605
7618
7606
7619
static void ggml_vk_cos(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) {
7607
- const uint32_t src0_type_size = ggml_type_size(src0->type);
7608
- const uint32_t dst_type_size = ggml_type_size(dst->type);
7609
-
7610
- ggml_vk_op_f32<vk_op_unary_push_constants>(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_COS, {
7611
- (uint32_t)ggml_nelements(src0),
7612
- (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->ne[2], (uint32_t)src0->ne[3], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)src0->nb[1] / src0_type_size, (uint32_t)src0->nb[2] / src0_type_size, (uint32_t)src0->nb[3] / src0_type_size,
7613
- (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size,
7614
- 0,
7615
- 0.0f, 0.0f,
7616
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7617
- }, dryrun);
7620
+ ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_COS, vk_op_unary_push_constants_init(src0, dst), dryrun);
7618
7621
}
7619
7622
7620
7623
static void ggml_vk_clamp(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) {
7621
- float * op_params = (float *) dst->op_params ;
7622
- const uint32_t src0_type_size = ggml_type_size(src0->type );
7623
- const uint32_t dst_type_size = ggml_type_size (dst->type );
7624
+ vk_op_unary_push_constants p = vk_op_unary_push_constants_init(src0, dst) ;
7625
+ p.param1 = ggml_get_op_params_f32(dst, 0 );
7626
+ p.param2 = ggml_get_op_params_f32 (dst, 1 );
7624
7627
7625
- ggml_vk_op_f32<vk_op_unary_push_constants>(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_CLAMP, {
7626
- (uint32_t)ggml_nelements(src0),
7627
- (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->ne[2], (uint32_t)src0->ne[3], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)src0->nb[1] / src0_type_size, (uint32_t)src0->nb[2] / src0_type_size, (uint32_t)src0->nb[3] / src0_type_size,
7628
- (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size,
7629
- 0,
7630
- op_params[0], op_params[1],
7631
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7632
- }, dryrun);
7628
+ ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_CLAMP, std::move(p), dryrun);
7633
7629
}
7634
7630
7635
7631
static void ggml_vk_pad(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) {
7636
- const uint32_t src0_type_size = ggml_type_size(src0->type);
7637
- const uint32_t dst_type_size = ggml_type_size(dst->type);
7632
+ vk_op_unary_push_constants p = vk_op_unary_push_constants_init(src0, dst, ggml_nelements(dst));
7633
+ ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_PAD, std::move(p), dryrun);
7634
+ }
7638
7635
7639
- ggml_vk_op_f32<vk_op_unary_push_constants>(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_PAD, {
7640
- (uint32_t)ggml_nelements(dst),
7641
- (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->ne[2], (uint32_t)src0->ne[3], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)src0->nb[1] / src0_type_size, (uint32_t)src0->nb[2] / src0_type_size, (uint32_t)src0->nb[3] / src0_type_size,
7642
- (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size,
7643
- 0,
7644
- 0.0f, 0.0f,
7645
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7646
- }, dryrun);
7636
+ static void ggml_vk_roll(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) {
7637
+ const int32_t s0 = ggml_get_op_params_i32(dst, 0);
7638
+ const int32_t s1 = ggml_get_op_params_i32(dst, 1);
7639
+ const int32_t s2 = ggml_get_op_params_i32(dst, 2);
7640
+ const int32_t s3 = ggml_get_op_params_i32(dst, 3);
7641
+ const uint32_t s01_packed = ((s0 + 0x8000) << 16) | (s1 + 0x8000);
7642
+ const uint32_t s23_packed = ((s2 + 0x8000) << 16) | (s3 + 0x8000);
7643
+
7644
+ vk_op_unary_push_constants p = vk_op_unary_push_constants_init(src0, dst);
7645
+ memcpy(&p.param1, &s01_packed, sizeof(float));
7646
+ memcpy(&p.param2, &s23_packed, sizeof(float));
7647
+
7648
+ ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_ROLL, std::move(p), dryrun);
7647
7649
}
7648
7650
7649
7651
static void ggml_vk_repeat(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) {
7650
- const uint32_t src0_type_size = ggml_type_size(src0->type);
7651
- const uint32_t dst_type_size = ggml_type_size(dst->type);
7652
-
7653
- ggml_vk_op_f32<vk_op_unary_push_constants>(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_REPEAT, {
7654
- (uint32_t)ggml_nelements(dst),
7655
- (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->ne[2], (uint32_t)src0->ne[3], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)src0->nb[1] / src0_type_size, (uint32_t)src0->nb[2] / src0_type_size, (uint32_t)src0->nb[3] / src0_type_size,
7656
- (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size,
7657
- 0,
7658
- 0.0f, 0.0f,
7659
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7660
- }, dryrun);
7652
+ vk_op_unary_push_constants p = vk_op_unary_push_constants_init(src0, dst, ggml_nelements(dst));
7653
+ ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_REPEAT, std::move(p), dryrun);
7661
7654
}
7662
7655
7663
7656
static void ggml_vk_repeat_back(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) {
7664
- const uint32_t src0_type_size = ggml_type_size(src0->type);
7665
- const uint32_t dst_type_size = ggml_type_size(dst->type);
7666
-
7667
- ggml_vk_op_f32<vk_op_unary_push_constants>(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_REPEAT_BACK, {
7668
- (uint32_t)ggml_nelements(dst),
7669
- (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->ne[2], (uint32_t)src0->ne[3], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)src0->nb[1] / src0_type_size, (uint32_t)src0->nb[2] / src0_type_size, (uint32_t)src0->nb[3] / src0_type_size,
7670
- (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size,
7671
- 0,
7672
- 0.0f, 0.0f,
7673
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7674
- }, dryrun);
7657
+ vk_op_unary_push_constants p = vk_op_unary_push_constants_init(src0, dst, ggml_nelements(dst));
7658
+ ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_REPEAT_BACK, std::move(p), dryrun);
7675
7659
}
7676
7660
7677
7661
static void ggml_vk_cpy(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) {
@@ -7689,14 +7673,8 @@ static void ggml_vk_cpy(ggml_backend_vk_context * ctx, vk_context& subctx, const
7689
7673
}
7690
7674
}
7691
7675
7692
- ggml_vk_op_f32<vk_op_unary_push_constants>(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_CPY, {
7693
- ne,
7694
- (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->ne[2], (uint32_t)src0->ne[3], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)src0->nb[1] / src0_type_size, (uint32_t)src0->nb[2] / src0_type_size, (uint32_t)src0->nb[3] / src0_type_size,
7695
- (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size,
7696
- 0,
7697
- 0.0f, 0.0f,
7698
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7699
- }, dryrun);
7676
+ vk_op_unary_push_constants p = vk_op_unary_push_constants_init(src0, dst, ne);
7677
+ ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_CPY, std::move(p), dryrun);
7700
7678
}
7701
7679
7702
7680
static void ggml_vk_set_rows(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, bool dryrun = false) {
@@ -9033,6 +9011,7 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_cgraph * cgr
9033
9011
case GGML_OP_COS:
9034
9012
case GGML_OP_CLAMP:
9035
9013
case GGML_OP_PAD:
9014
+ case GGML_OP_ROLL:
9036
9015
case GGML_OP_CPY:
9037
9016
case GGML_OP_SET_ROWS:
9038
9017
case GGML_OP_CONT:
@@ -9204,6 +9183,10 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_cgraph * cgr
9204
9183
case GGML_OP_PAD:
9205
9184
ggml_vk_pad(ctx, compute_ctx, src0, node, dryrun);
9206
9185
9186
+ break;
9187
+ case GGML_OP_ROLL:
9188
+ ggml_vk_roll(ctx, compute_ctx, src0, node, dryrun);
9189
+
9207
9190
break;
9208
9191
case GGML_OP_CPY:
9209
9192
case GGML_OP_CONT:
@@ -9428,6 +9411,7 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_cgraph *
9428
9411
case GGML_OP_COS:
9429
9412
case GGML_OP_CLAMP:
9430
9413
case GGML_OP_PAD:
9414
+ case GGML_OP_ROLL:
9431
9415
case GGML_OP_CPY:
9432
9416
case GGML_OP_SET_ROWS:
9433
9417
case GGML_OP_CONT:
0 commit comments