[WIP] Improving activation function

Leguark · Leguark · commit 51c5fb5f9195 · 2023-11-10T12:50:14.000+01:00
diff --git a/gempy_engine/modules/activator/activator_interface.py b/gempy_engine/modules/activator/activator_interface.py
@@ -11,7 +11,10 @@ def activate_formation_block(exported_fields: ExportedFields, ids: np.ndarray, s
     Z_x: np.ndarray = exported_fields.scalar_field_everywhere
     scalar_value_at_sp: np.ndarray = exported_fields.scalar_field_at_surface_points
 
-    sigm = activate_formation_block_from_args(Z_x, ids, scalar_value_at_sp, sigmoid_slope)
+    if LEGACY :=False:
+        sigm = activate_formation_block_from_args(Z_x, ids, scalar_value_at_sp, sigmoid_slope)
+    else:
+        sigm = activate_formation_block_from_args_hard_sigmoid(Z_x, ids, scalar_value_at_sp, sigmoid_slope)
 
     return sigm
 
@@ -33,13 +36,40 @@ def activate_formation_block_from_args(Z_x, ids, scalar_value_at_sp, sigmoid_slo
     sigm = bt.t.zeros((1, Z_x.shape[0]), dtype=BackendTensor.dtype_obj)
 
     for i in range(len(ids)):
-        if LEGACY:=True:
-            sigm += _compute_sigmoid(Z_x, scalar_0_v[i], scalar_1_v[i], drift_0_v[i], drift_1_v[i], ids[i], sigmoid_slope)
-        else:
-            sigm += HardSigmoid.apply(Z_x, scalar_0_v[i], scalar_1_v[i])
+        sigm += _compute_sigmoid(Z_x, scalar_0_v[i], scalar_1_v[i], drift_0_v[i], drift_1_v[i], ids[i], sigmoid_slope)
     return sigm
 
 
+def activate_formation_block_from_args_hard_sigmoid(Z_x, ids, scalar_value_at_sp, sigmoid_slope):
+    element_0 = bt.t.array([0], dtype=BackendTensor.dtype_obj)
+
+    min_Z_x = BackendTensor.t.min(Z_x, axis=0).reshape(-1)  # ? Is this as good as it gets?
+    max_Z_x = BackendTensor.t.max(Z_x, axis=0)[0].reshape(-1)  # ? Is this as good as it gets?
+    
+    # Add 5%
+    min_Z_x = min_Z_x - 0.05 * (max_Z_x - min_Z_x)
+    max_Z_x = max_Z_x + 0.05 * (max_Z_x - min_Z_x)
+    
+    
+    drift_0_v = bt.tfnp.concatenate([min_Z_x, scalar_value_at_sp])
+    drift_1_v = bt.tfnp.concatenate([scalar_value_at_sp, max_Z_x])
+
+    ids = bt.t.array(ids, dtype="int32")
+    scalar_0_v = bt.t.copy(ids)
+    scalar_0_v[0] = 0
+    # 
+    # scalar_1_v = bt.t.copy(ids)
+    # scalar_1_v[-1] = 0
+
+    # * Iterate over surface
+    sigm = bt.t.zeros((1, Z_x.shape[0]), dtype=BackendTensor.dtype_obj)
+
+    for i in range(len(ids)):
+        # if (i == 3):
+            sigm += ids[i] * HardSigmoidModified.apply(Z_x, drift_0_v[i], drift_1_v[i])
+    return sigm.view(1, -1)
+
+
 def _compute_sigmoid(Z_x, scale_0, scale_1, drift_0, drift_1, drift_id, sigmoid_slope):
     # TODO: Test to remove reshape once multiple values are implemented
 
@@ -49,9 +79,9 @@ def _compute_sigmoid(Z_x, scale_0, scale_1, drift_0, drift_1, drift_id, sigmoid_
 
         sigmoid_slope_tensor = BackendTensor.t.array(sigmoid_slope, dtype=BackendTensor.dtype_obj)
 
-        active_denominator   = (1 + bt.tfnp.exp(-sigmoid_slope_tensor * (Z_x - drift_0)))
+        active_denominator = (1 + bt.tfnp.exp(-sigmoid_slope_tensor * (Z_x - drift_0)))
         deactive_denominator = (1 + bt.tfnp.exp(sigmoid_slope_tensor * (Z_x - drift_1)))
-        
+
         active_sig = -scale_0.reshape((-1, 1)) / active_denominator
         deactive_sig = -scale_1.reshape((-1, 1)) / deactive_denominator
         activation_sig = active_sig + deactive_sig
@@ -71,25 +101,61 @@ def _add_relu():
 
 # * This gets the scalar gradient
 import torch
-class HardSigmoid(torch.autograd.Function):
+
+
+class HardSigmoidModified(torch.autograd.Function):
     @staticmethod
     def forward(ctx, input, a, b):
         ctx.save_for_backward(input)
         ctx.bounds = (a, b)
-        slope = 1 / (b - a)
-        return torch.clamp(slope * (input - a) + 0.5, min=0, max=1)
+        output = torch.zeros_like(input)
+        slope_up = 100 / (b - a)
+
+        # For x in the range [a, b]
+        output[(input >= a) & (input <= b)] += torch.clamp(slope_up * (input[(input >= a) & (input <= b)] - a), min=0, max=1)
+
+        output[(input >= a) & (input <= b)] += torch.clamp(-slope_up * (input[(input >= a) & (input <= b)] - b), min=0, max=1)
+        
+        # Clamping the values outside the range [a, c] to zero
+        output[input < a] = 0
+        output[input >= b] = 0
+
+        return output
+
 
     @staticmethod
     def backward(ctx, grad_output):
         input, = ctx.saved_tensors
         a, b = ctx.bounds
+        midpoint = (a + b) / 2
         grad_input = grad_output.clone()
+
+        # Gradient is 1/(b-a) for x in [a, midpoint), -1/(b-a) for x in (midpoint, b], and 0 elsewhere
         grad_input[input < a] = 0
         grad_input[input > b] = 0
-        grad_input[(input >= a) & (input <= b)] = 1 / (b - a)
+        grad_input[(input >= a) & (input < midpoint)] = 1 / (b - a)
+        grad_input[(input > midpoint) & (input <= b)] = -1 / (b - a)
+
         return grad_input, None, None
 
 
+class HardSigmoid(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, input, a, b, c):
+        ctx.save_for_backward(input)
+        ctx.bounds = (a, b)
+        slope = 1000 / (b - a)
+        return torch.clamp(slope * (input - a) + 0.5, min=0, max=1)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, = ctx.saved_tensors
+        a, b = ctx.bounds
+        grad_input = grad_output.clone()
+        grad_input[input < a] = 0
+        grad_input[input > b] = 0
+        grad_input[(input >= a) & (input <= b)] = 1 / (b - a)
+        return grad_input, None, None
 
 
 class CustomSigmoidFunction(torch.autograd.Function):
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -16,7 +16,7 @@
 from tests.fixtures.heavy_models import *
 
 pykeops_enabled = False
-backend = AvailableBackends.numpy
+backend = AvailableBackends.PYTORCH
 use_gpu = False
 plot_pyvista = False  # ! Set here if you want to plot the results
 
diff --git a/tests/test_common/test_modules/test_activator.py b/tests/test_common/test_modules/test_activator.py
@@ -1,15 +1,15 @@
 import dataclasses
 import os
-import pytest
+
 import matplotlib.pyplot as plt
 import numpy as np
 
 from gempy_engine.API.interp_single._interp_scalar_field import _solve_interpolation, _evaluate_sys_eq
 from gempy_engine.API.interp_single._interp_single_feature import input_preprocess
+from gempy_engine.config import AvailableBackends
 from gempy_engine.core.data.internal_structs import SolverInput
-from gempy_engine.core.data.interp_output import InterpOutput
 from gempy_engine.modules.activator.activator_interface import activate_formation_block
-from gempy_engine.API.interp_single.interp_features import interpolate_single_field
+from gempy_engine.core.backend_tensor import BackendTensor
 
 dir_name = os.path.dirname(__file__)
 
@@ -27,13 +27,22 @@ def test_activator(simple_model_values_block_output):
     ids_block = activate_formation_block(simple_model_values_block_output.exported_fields, ids, 50000)[:, :-7]
     print(ids_block)
 
+    if BackendTensor.engine_backend == AvailableBackends.PYTORCH:
+        ids_block = ids_block.detach().numpy()
+        Z_x = Z_x.detach().numpy()
+        
     if plot:
         plt.contourf(Z_x.reshape(50, 5, 50)[:, 2, :].T, N=40, cmap="autumn")
         plt.colorbar()
 
         plt.show()
 
-        plt.contourf(ids_block[0].reshape(50, 5, 50)[:, 2, :].T, N=40, cmap="viridis")
+        plt.contourf(
+            ids_block[0].reshape(50, 5, 50)[:, 2, :].T,
+            N=40,
+            cmap="viridis",
+            # levels=[-1, 0.5, 1, 1.5, 2.5]
+        )
         plt.colorbar()
 
         plt.show()
@@ -71,8 +80,13 @@ def test_activator_3_layers(simple_model_3_layers, simple_grid_3d_more_points_gr
         exported_fields=exported_fields,
         ids= ids,
         sigmoid_slope=50000
-    )[:, :-7]
+    )[0, :-7]
 
+    if BackendTensor.engine_backend == AvailableBackends.PYTORCH:
+        ids_block = ids_block.detach().numpy()
+        Z_x = Z_x.detach().numpy()
+        interpolation_input.surface_points.sp_coords = interpolation_input.surface_points.sp_coords.detach().numpy()
+    
     if plot:
         plt.contourf(Z_x.reshape(50, 5, 50)[:, 0, :].T, N=40, cmap="autumn",
                      extent=(.25, .75, .25, .75))
@@ -83,7 +97,11 @@ def test_activator_3_layers(simple_model_3_layers, simple_grid_3d_more_points_gr
         
         plt.show()
 
-        plt.contourf(ids_block[0, :-4].reshape(50, 5, 50)[:, 2, :].T, N=40, cmap="viridis")
+        plt.contourf(
+            ids_block[:-4].reshape(50, 5, 50)[:, 2, :].T, 
+            N=250,
+            cmap="viridis"
+        )
         plt.colorbar()
 
         plt.show()