|
6 | 6 | using vDSP_fn_t = void (*)(const float *, vDSP_Stride, const float *, vDSP_Stride, float *, vDSP_Stride, vDSP_Length);
|
7 | 7 | #endif
|
8 | 8 |
|
9 |
| -#if defined(GGML_USE_CLBLAST) // allow usage of CLBlast alongside Accelerate functions |
10 |
| -#include "ggml_v3b-opencl.h" |
11 |
| -#endif |
12 | 9 |
|
13 | 10 | static inline float op_add(float a, float b) {
|
14 | 11 | return a + b;
|
@@ -57,29 +54,6 @@ static void apply_binary_op(const ggml_compute_params * params, ggml_tensor * ds
|
57 | 54 |
|
58 | 55 | GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
|
59 | 56 |
|
60 |
| - // #if defined(GGML_USE_CLBLAST) |
61 |
| - // //do we even need this? it seems like its actually slower than just CPU |
62 |
| - // const int ith = params->ith; |
63 |
| - // if (op == op_add && src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32 && src1->clblast_offload_gpu) { |
64 |
| - // // TODO: OpenCL kernel support full broadcast |
65 |
| - // static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); |
66 |
| - // GGML_ASSERT((src1->ne[0] == src0->ne[0]) && ggml_can_repeat(src1, src0)); |
67 |
| - // if (ith == 0) { |
68 |
| - // ggml_cl_add(src0, src1, dst); |
69 |
| - // } |
70 |
| - // return; |
71 |
| - // } |
72 |
| - // if (op == op_mul && src0->type == GGML_TYPE_F32 && src1->clblast_offload_gpu) { |
73 |
| - // // TODO: OpenCL kernel support full broadcast |
74 |
| - // static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); |
75 |
| - // GGML_ASSERT((src1->ne[0] == src0->ne[0]) && ggml_can_repeat(src1, src0)); |
76 |
| - // if (ith == 0) { |
77 |
| - // ggml_cl_mul(src0, src1, dst); |
78 |
| - // } |
79 |
| - // return; |
80 |
| - // } |
81 |
| - // #endif |
82 |
| - |
83 | 57 | GGML_TENSOR_BINARY_OP_LOCALS
|
84 | 58 |
|
85 | 59 | GGML_ASSERT( nb0 == sizeof(dst_t));
|
|
0 commit comments