diff --git a/docs/dev_logs/2025_May.md b/docs/dev_logs/2025_May.md new file mode 100644 index 0000000..460024f --- /dev/null +++ b/docs/dev_logs/2025_May.md @@ -0,0 +1,27 @@ +# TODO: + +--- General +- [ ] Create a proper suit of tests + - [ ] Keep extracting code into modules and functions + - [ ] Posterior analysis for gempy +- [ ] Investigate speed +- [ ] Investigate segmentation function +- [ ] Create a robust api to make sure we do not mess up assigning priors to gempy parameters +- [ ] do we need to use index_put or just normal indexing? +--- Vector examples +- [ ] Bring the Vector example here... +- [ ] Using categories as likelihood function (maybe I need to check out with Sam) +--- MinEye +- [ ] Revive magnetics +- [ ] Add Simon's example + +### Doing: +- [ ] Compressing code +- [ ] GemPy posterior visualization + +### Possible Design: +- Geological Model +- Priors +- Likelihood +---- +- Having a runner or something like that? \ No newline at end of file diff --git a/tests/test_prob_modeling/__init__.py b/gempy_probability/modules/model_definition/__init__.py similarity index 100% rename from tests/test_prob_modeling/__init__.py rename to gempy_probability/modules/model_definition/__init__.py diff --git a/gempy_probability/modules/model_definition/model_examples.py b/gempy_probability/modules/model_definition/model_examples.py new file mode 100644 index 0000000..5765d86 --- /dev/null +++ b/gempy_probability/modules/model_definition/model_examples.py @@ -0,0 +1,80 @@ +import gempy as gp +import gempy.core.data +import gempy_engine +from gempy.modules.data_manipulation.engine_factory import interpolation_input_from_structural_frame + +import pyro +import pyro.distributions as dist +import torch + + +def model(geo_model: gempy.core.data.GeoModel, normal, y_obs_list): + """ + This Pyro model represents the probabilistic aspects of the geological model. + It defines a prior distribution for the top layer's location and + computes the thickness of the geological layer as an observed variable. + """ + # Define prior for the top layer's location: + # region Prior definition + + mu_top = pyro.sample( + name=r'$\mu_{top}$', + fn=normal + ) + # endregion + + # region Prior injection into the gempy model + + # * Update the model with the new top layer's location + interpolation_input = interpolation_input_from_structural_frame(geo_model) + + if True: # ?? I need to figure out if we need the index_put or not + indices__ = (torch.tensor([0]), torch.tensor([2])) # * This has to be Tensors + new_tensor: torch.Tensor = torch.index_put( + input=interpolation_input.surface_points.sp_coords, + indices=indices__, + values=mu_top + ) + interpolation_input.surface_points.sp_coords = new_tensor + else: + interpolation_input.surface_points.sp_coords[0, 2] = mu_top + + # endregion + + # region Forward model computation + + # * Compute the geological model + geo_model.solutions = gempy_engine.compute_model( + interpolation_input=interpolation_input, + options=geo_model.interpolation_options, + data_descriptor=geo_model.input_data_descriptor, + geophysics_input=geo_model.geophysics_input, + ) + + # Compute and observe the thickness of the geological layer + model_solutions: gp.data.Solutions = geo_model.solutions + thickness = _likelihood(model_solutions) + + # endregion + + posterior_dist_normal = dist.Normal(thickness, 25) + + # * This is used automagically by pyro to compute the log-probability + y_thickness = pyro.sample( + name=r'$y_{thickness}$', + fn=posterior_dist_normal, + obs=y_obs_list + ) + + +def _likelihood(model_solutions: gp.data.Solutions) -> torch.Tensor: + """ + This function computes the thickness of the geological layer. + """ + simulated_well = model_solutions.octrees_output[0].last_output_center.custom_grid_values + thickness = simulated_well.sum() + pyro.deterministic( + name=r'$\mu_{thickness}$', + value=thickness.detach() # * This is only for az to track progress + ) + return thickness diff --git a/tests/test_prob_model/__init__.py b/tests/test_prob_model/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_prob_model/test_prob_I.py b/tests/test_prob_model/test_prob_I.py new file mode 100644 index 0000000..ccccce8 --- /dev/null +++ b/tests/test_prob_model/test_prob_I.py @@ -0,0 +1,144 @@ +import numpy as np +import os +import pyro.distributions as dist +import torch + +import gempy as gp +from gempy_engine.core.backend_tensor import BackendTensor + + +def test_basic_gempy_I() -> None: + current_dir = os.path.dirname(os.path.abspath(__file__)) + data_path = os.path.abspath(os.path.join(current_dir, '..', '..', 'examples', 'tutorials', 'data')) + geo_model = gp.create_geomodel( + project_name='Wells', + extent=[0, 12000, -500, 500, 0, 4000], + refinement=3, + importer_helper=gp.data.ImporterHelper( + path_to_orientations=os.path.join(data_path, "2-layers", "2-layers_orientations.csv"), + path_to_surface_points=os.path.join(data_path, "2-layers", "2-layers_surface_points.csv") + ) + ) + + # TODO: Convert this into an options preset + geo_model.interpolation_options.uni_degree = 0 + geo_model.interpolation_options.mesh_extraction = False + geo_model.interpolation_options.sigmoid_slope = 1100. + + # region Minimal grid for the specific likelihood function + x_loc = 6000 + y_loc = 0 + z_loc = np.linspace(0, 4000, 100) + xyz_coord = np.array([[x_loc, y_loc, z] for z in z_loc]) + gp.set_custom_grid(geo_model.grid, xyz_coord=xyz_coord) + # endregion + + geo_model.grid.active_grids = gp.data.Grid.GridTypes.CUSTOM + assert geo_model.grid.values.shape[0] == 100, "Custom grid should have 100 cells" + gp.compute_model(gempy_model=geo_model) + + # TODO: Make this a more elegant way + BackendTensor.change_backend_gempy(engine_backend=gp.data.AvailableBackends.PYTORCH) + + normal = dist.Normal( + loc=(geo_model.surface_points_copy_transformed.xyz[0, 2]), + scale=torch.tensor(0.1, dtype=torch.float64) + ) + + from gempy_probability.modules.model_definition.model_examples import model + _prob_run( + geo_model=geo_model, + prob_model=model, + normal=normal, + y_obs_list=torch.tensor([200, 210, 190]) + ) + + +def _prob_run(geo_model: gp.data.GeoModel, prob_model: callable, + normal: torch.distributions.Distribution, y_obs_list: torch.Tensor) -> None: + # Run prior sampling and visualization + from pyro.infer import Predictive + import pyro + import arviz as az + import matplotlib.pyplot as plt + + from pyro.infer import NUTS + from pyro.infer import MCMC + from pyro.infer.autoguide import init_to_mean + + # region prior sampling + predictive = Predictive( + model=prob_model, + num_samples=50 + ) + prior = predictive(geo_model, normal, y_obs_list) + + data = az.from_pyro(prior=prior) + az.plot_trace(data.prior) + plt.show() + + # endregion + + # region inference + pyro.primitives.enable_validation(is_validate=True) + nuts_kernel = NUTS( + prob_model, + step_size=0.0085, + adapt_step_size=True, + target_accept_prob=0.9, + max_tree_depth=10, + init_strategy=init_to_mean + ) + mcmc = MCMC( + kernel=nuts_kernel, + num_samples=200, + warmup_steps=50, + disable_validation=False + ) + mcmc.run(geo_model, normal, y_obs_list) + posterior_samples = mcmc.get_samples() + posterior_predictive_fn = Predictive( + model=prob_model, + posterior_samples=posterior_samples + ) + posterior_predictive = posterior_predictive_fn(geo_model, normal, y_obs_list) + data = az.from_pyro(posterior=mcmc, prior=prior, posterior_predictive=posterior_predictive) + # endregion + + az.plot_trace(data) + plt.show() + from gempy_probability.modules.plot.plot_posterior import default_red, default_blue + az.plot_density( + data=[data.posterior_predictive, data.prior_predictive], + shade=.9, + var_names=[r'$\mu_{thickness}$'], + data_labels=["Posterior Predictive", "Prior Predictive"], + colors=[default_red, default_blue], + ) + plt.show() + az.plot_density( + data=[data, data.prior], + shade=.9, + hdi_prob=.99, + data_labels=["Posterior", "Prior"], + colors=[default_red, default_blue], + ) + plt.show() + + # Test posterior mean values + posterior_top_mean = float(data.posterior[r'$\mu_{top}$'].mean()) + target_top = 0.00875 + target_thickness = 223 + assert abs(posterior_top_mean - target_top) < 0.0200, f"Top layer mean {posterior_top_mean} outside expected range" + posterior_thickness_mean = float(data.posterior_predictive[r'$\mu_{thickness}$'].mean()) + assert abs(posterior_thickness_mean - target_thickness) < 5, f"Thickness mean {posterior_thickness_mean} outside expected range" + # Test convergence diagnostics + assert float(data.sample_stats.diverging.sum()) == 0, "MCMC sampling has divergences" + + print("Posterior mean values:") + print(f"Top layer mean: {posterior_top_mean}") + print(f"Thickness mean: {posterior_thickness_mean}") + print("MCMC convergence diagnostics:") + print(f"Divergences: {float(data.sample_stats.diverging.sum())}") + print(f"Acceptance rate: {float(data.sample_stats.acceptance_rate.mean())}") + print(f"Mean tree depth: {float(data.sample_stats.mean_tree_depth.mean())}")