@@ -40,7 +40,8 @@ __device__ float gelu_activate_kernel(float x){return (0.5*x*(1 + tanhf(0.797885
4040__device__ float softplus_kernel (float x, float threshold = 20 ) {
4141 if (x > threshold) return x; // too large
4242 else if (x < -threshold) return expf (x); // too small
43- return logf (expf (x) + 1 );
43+ return log1pf (expf (x));
44+ // return logf(expf(x) + 1);
4445}
4546__device__ float plse_activate_kernel (float x)
4647{
@@ -257,8 +258,8 @@ __global__ void activate_array_mish_kernel(float *x, int n, float *activation_in
257258 // Pytorch: https://github.com/thomasbrandon/mish-cuda/blob/master/csrc/mish.h#L17-L20
258259 // TF: https://github.com/tensorflow/addons/blob/093cdfa85d334cbe19a37624c33198f3140109ed/tensorflow_addons/custom_ops/activations/cc/kernels/mish_op.h#L40-L49
259260 // log1p(x) == log(x + 1)
260- output_gpu[i] = x_val * tanh_activate_kernel ( softplus_kernel (x_val, MISH_THRESHOLD) );
261- // output_gpu[i] = mish_yashas(x_val);
261+ // output_gpu[i] = x_val * tanh_activate_kernel( softplus_kernel(x_val, MISH_THRESHOLD) );
262+ output_gpu[i] = mish_yashas (x_val);
262263 // output_gpu[i] = mish_njuffa(x_val);
263264 }
264265}
@@ -355,7 +356,8 @@ __global__ void gradient_array_mish_kernel(int n, float *activation_input_gpu, f
355356 // log1p(x) == log(x + 1)
356357 const float inp = activation_input_gpu[i];
357358 const float sp = softplus_kernel (inp, MISH_THRESHOLD);
358- const float grad_sp = 1 - expf (-sp);
359+ const float grad_sp = -expm1f (-sp);
360+ // const float grad_sp = 1 - expf(-sp);
359361 const float tsp = tanh (sp);
360362 const float grad_tsp = (1 - tsp*tsp) * grad_sp;
361363 const float grad = inp * grad_tsp + tsp;
0 commit comments