ggml : remove ggml_exp and ggml_soft_plus

compilade · compilade · commit 2e1bcffcdb06 · 2024-02-04T17:08:54.000-05:00
They did not exist anyway outside of this branch,
and since ggml_ssm_scan fused operations together, they are unused.
It's always possible to bring them back if needed.
diff --git a/ggml.c b/ggml.c
@@ -1458,7 +1458,6 @@ inline static void ggml_vec_scale_f32(const int n, float * y, const float   v) {
 inline static void ggml_vec_norm_f32 (const int n, float * s, const float * x) { ggml_vec_dot_f32(n, s, x, x); *s = sqrtf(*s);   }
 inline static void ggml_vec_sqr_f32  (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = x[i]*x[i];   }
 inline static void ggml_vec_sqrt_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = sqrtf(x[i]); }
-inline static void ggml_vec_exp_f32  (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = expf(x[i]); }
 inline static void ggml_vec_log_f32  (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = logf(x[i]);   }
 inline static void ggml_vec_abs_f32  (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = fabsf(x[i]); }
 inline static void ggml_vec_sgn_f32  (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? 1.f : ((x[i] < 0.f) ? -1.f : 0.f); }
@@ -1654,7 +1653,6 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
     "DIV",
     "SQR",
     "SQRT",
-    "EXP",
     "LOG",
     "SUM",
     "SUM_ROWS",
@@ -1688,7 +1686,6 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
     "DIAG_MASK_ZERO",
     "SOFT_MAX",
     "SOFT_MAX_BACK",
-    "SOFT_PLUS",
     "ROPE",
     "ROPE_BACK",
     "ALIBI",
@@ -1729,7 +1726,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
     "CROSS_ENTROPY_LOSS_BACK",
 };
 
-static_assert(GGML_OP_COUNT == 75, "GGML_OP_COUNT != 75");
+static_assert(GGML_OP_COUNT == 73, "GGML_OP_COUNT != 73");
 
 static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
     "none",
@@ -1743,7 +1740,6 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
     "x/y",
     "x^2",
     "√x",
-    "e^x", // or should this be "exp(x)"?
     "log(x)",
     "Σx",
     "Σx_k",
@@ -1777,7 +1773,6 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
     "diag_mask_zero(x)",
     "soft_max(x)",
     "soft_max_back(x)",
-    "soft_plus(x)",
     "rope(x)",
     "rope_back(x)",
     "alibi(x)",
@@ -1818,7 +1813,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
     "cross_entropy_loss_back(x,y)",
 };
 
-static_assert(GGML_OP_COUNT == 75, "GGML_OP_COUNT != 75");
+static_assert(GGML_OP_COUNT == 73, "GGML_OP_COUNT != 73");
 
 static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
 
@@ -3630,39 +3625,6 @@ struct ggml_tensor * ggml_sqrt_inplace(
     return ggml_sqrt_impl(ctx, a, true);
 }
 
-// ggml_exp
-
-static struct ggml_tensor * ggml_exp_impl(
-        struct ggml_context * ctx,
-        struct ggml_tensor  * a,
-        bool inplace) {
-    bool is_node = false;
-
-    if (!inplace && (a->grad)) {
-        is_node = true;
-    }
-
-    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
-
-    result->op   = GGML_OP_EXP;
-    result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
-    result->src[0] = a;
-
-    return result;
-}
-
-struct ggml_tensor * ggml_exp(
-        struct ggml_context * ctx,
-        struct ggml_tensor  * a) {
-    return ggml_exp_impl(ctx, a, false);
-}
-
-struct ggml_tensor * ggml_exp_inplace(
-        struct ggml_context * ctx,
-        struct ggml_tensor  * a) {
-    return ggml_exp_impl(ctx, a, true);
-}
-
 // ggml_log
 
 static struct ggml_tensor * ggml_log_impl(
@@ -5143,40 +5105,6 @@ struct ggml_tensor * ggml_soft_max_back_inplace(
     return ggml_soft_max_back_impl(ctx, a, b, true);
 }
 
-// ggml_soft_plus
-
-static struct ggml_tensor * ggml_soft_plus_impl(
-        struct ggml_context * ctx,
-        struct ggml_tensor  * a,
-        bool                  inplace) {
-
-    bool is_node = false;
-
-    if (a->grad) {
-        is_node = true; // TODO : implement backward pass
-    }
-
-    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
-
-    result->op   = GGML_OP_SOFT_PLUS;
-    result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
-    result->src[0] = a;
-
-    return result;
-}
-
-struct ggml_tensor * ggml_soft_plus(
-        struct ggml_context * ctx,
-        struct ggml_tensor  * a) {
-    return ggml_soft_plus_impl(ctx, a, false);
-}
-
-struct ggml_tensor * ggml_soft_plus_inplace(
-        struct ggml_context * ctx,
-        struct ggml_tensor  * a) {
-    return ggml_soft_plus_impl(ctx, a, true);
-}
-
 // ggml_rope
 
 static struct ggml_tensor * ggml_rope_impl(
@@ -8458,57 +8386,6 @@ static void ggml_compute_forward_sqrt(
     }
 }
 
-// ggml_compute_forward_exp
-
-static void ggml_compute_forward_exp_f32(
-        const struct ggml_compute_params * params,
-        const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
-    GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
-    GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
-    GGML_ASSERT(ggml_are_same_shape(src0, dst));
-
-    if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
-        return;
-    }
-
-    const int ith = params->ith;
-    const int nth = params->nth;
-
-    const int nc = src0->ne[0];
-    const int nr = ggml_nrows(src0);
-
-    // rows per thread
-    const int dr = (nr + nth - 1)/nth;
-
-    // row range for this thread
-    const int ir0 = dr*ith;
-    const int ir1 = MIN(ir0 + dr, nr);
-
-    for (int i1 = ir0; i1 < ir1; i1++) {
-        ggml_vec_exp_f32(nc,
-                (float *) ((char *) dst->data  + i1*( dst->nb[1])),
-                (float *) ((char *) src0->data + i1*(src0->nb[1])));
-    };
-}
-
-static void ggml_compute_forward_exp(
-        const struct ggml_compute_params * params,
-        const struct ggml_tensor * src0,
-        struct ggml_tensor * dst) {
-    switch (src0->type) {
-        case GGML_TYPE_F32:
-            {
-                ggml_compute_forward_exp_f32(params, src0, dst);
-            } break;
-        case GGML_TYPE_F16: // TODO: use ggml_table_exp_f16
-        default:
-            {
-                GGML_ASSERT(false);
-            } break;
-    }
-}
-
 // ggml_compute_forward_log
 
 static void ggml_compute_forward_log_f32(
@@ -11771,48 +11648,6 @@ static void ggml_compute_forward_soft_max_back(
     }
 }
 
-static void ggml_compute_forward_soft_plus_f32(
-        const struct ggml_compute_params * params,
-        const struct ggml_tensor * src0,
-              struct ggml_tensor * dst) {
-    GGML_ASSERT(params->ith == 0);
-    GGML_ASSERT(ggml_are_same_shape(src0, dst));
-
-    if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
-        return;
-    }
-
-    const int nc = src0->ne[0];
-    const int nr = ggml_nrows(src0);
-
-    GGML_ASSERT( dst->nb[0] == sizeof(float));
-    GGML_ASSERT(src0->nb[0] == sizeof(float));
-
-    for (int i = 0; i < nr; ++i) {
-        float * x = (float *) ((char *) dst->data  + i*( dst->nb[1]));
-        float * y = (float *) ((char *) src0->data + i*(src0->nb[1]));
-        for (int j = 0; j < nc; ++j) {
-            x[j] = logf(1.0f + expf(y[j]));
-        }
-    }
-}
-
-static void ggml_compute_forward_soft_plus(
-        const struct ggml_compute_params * params,
-        const struct ggml_tensor * src0,
-              struct ggml_tensor * dst) {
-    switch (src0->type) {
-        case GGML_TYPE_F32:
-            {
-                ggml_compute_forward_soft_plus_f32(params, src0, dst);
-            } break;
-        default:
-            {
-                GGML_ASSERT(false);
-            } break;
-    }
-}
-
 // ggml_compute_forward_alibi
 
 static void ggml_compute_forward_alibi_f32(
@@ -15220,10 +15055,6 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
             {
                 ggml_compute_forward_sqrt(params, tensor->src[0], tensor);
             } break;
-        case GGML_OP_EXP:
-            {
-                ggml_compute_forward_exp(params, tensor->src[0], tensor);
-            } break;
         case GGML_OP_LOG:
             {
                 ggml_compute_forward_log(params, tensor->src[0], tensor);
@@ -15348,10 +15179,6 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
             {
                 ggml_compute_forward_soft_max_back(params, tensor->src[0], tensor->src[1], tensor);
             } break;
-        case GGML_OP_SOFT_PLUS:
-            {
-                ggml_compute_forward_soft_plus(params, tensor->src[0], tensor);
-            } break;
         case GGML_OP_ROPE:
             {
                 ggml_compute_forward_rope(params, tensor->src[0], tensor->src[1], tensor);
@@ -15908,10 +15735,6 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
                                 zero_table);
                 }
             } break;
-        case GGML_OP_EXP:
-            {
-                GGML_ASSERT(false); // TODO: implement
-            } break;
         case GGML_OP_LOG:
             {
                 if (src0->grad) {
@@ -16290,10 +16113,6 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
             {
                 GGML_ASSERT(false); // TODO: not implemented
             } break;
-        case GGML_OP_SOFT_PLUS:
-            {
-                GGML_ASSERT(false); // TODO: not implemented
-            } break;
         case GGML_OP_ROPE:
             {
                 // necessary for llama
@@ -17015,7 +16834,6 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
         case GGML_OP_ADD:
         case GGML_OP_ADD1:
         case GGML_OP_ACC:
-        case GGML_OP_EXP:
             {
                 n_tasks = n_threads;
             } break;
@@ -17122,10 +16940,6 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
             {
                 n_tasks = MIN(n_threads, ggml_nrows(node->src[0]));
             } break;
-        case GGML_OP_SOFT_PLUS:
-            {
-                n_tasks = 1; //TODO
-            } break;
         case GGML_OP_CONV_TRANSPOSE_1D:
             {
                 n_tasks = n_threads;
@@ -17499,7 +17313,6 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
                     }
                 } break;
             case GGML_OP_SOFT_MAX:
-            case GGML_OP_SOFT_PLUS:
             case GGML_OP_ROPE:
                 {
                     cur = ggml_type_size(GGML_TYPE_F32) * node->ne[0] * n_tasks;
diff --git a/ggml.h b/ggml.h
@@ -406,7 +406,6 @@ extern "C" {
         GGML_OP_DIV,
         GGML_OP_SQR,
         GGML_OP_SQRT,
-        GGML_OP_EXP,
         GGML_OP_LOG,
         GGML_OP_SUM,
         GGML_OP_SUM_ROWS,
@@ -440,7 +439,6 @@ extern "C" {
         GGML_OP_DIAG_MASK_ZERO,
         GGML_OP_SOFT_MAX,
         GGML_OP_SOFT_MAX_BACK,
-        GGML_OP_SOFT_PLUS,
         GGML_OP_ROPE,
         GGML_OP_ROPE_BACK,
         GGML_OP_ALIBI,
@@ -900,14 +898,6 @@ extern "C" {
             struct ggml_context * ctx,
             struct ggml_tensor  * a);
 
-    GGML_API struct ggml_tensor * ggml_exp(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
-
-    GGML_API struct ggml_tensor * ggml_exp_inplace(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
-
     GGML_API struct ggml_tensor * ggml_log(
             struct ggml_context * ctx,
             struct ggml_tensor  * a);
@@ -1392,15 +1382,6 @@ extern "C" {
             struct ggml_tensor  * a,
             struct ggml_tensor  * b);
 
-    GGML_API struct ggml_tensor * ggml_soft_plus(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
-
-    // in-place, returns view(a)
-    GGML_API struct ggml_tensor * ggml_soft_plus_inplace(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a);
-
     // rotary position embedding
     // if mode & 1 == 1, skip n_past elements (DEPRECATED)
     // if mode & 2 == 1, GPT-NeoX style