diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 72b0e00d..13d211ed 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -24,7 +24,7 @@ jobs:
       - name: Setup environment
         run: pip install -e .[test]
       - name: Run doctests
-        run: pytest --doctest-modules --ignore=causalpy/tests/ causalpy/
+        run: pytest --doctest-modules --ignore=causalpy/tests/ causalpy/ --config-file=causalpy/tests/conftest.py
       - name: Run extra tests
         run: pytest docs/source/.codespell/test_notebook_to_markdown.py
       - name: Run tests
diff --git a/Makefile b/Makefile
index 9f2b1971..d109ae39 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-.PHONY: init lint check_lint test uml html cleandocs
+.PHONY: init lint check_lint test uml html cleandocs doctest
 
 init:
 	python -m pip install -e . --no-deps
@@ -13,7 +13,7 @@ check_lint:
 	interrogate .
 
 doctest:
-	pytest --doctest-modules --ignore=causalpy/tests/ causalpy/
+	pytest --doctest-modules --ignore=causalpy/tests/ causalpy/ --config-file=causalpy/tests/conftest.py
 
 test:
 	pytest
diff --git a/causalpy/experiments/prepostnegd.py b/causalpy/experiments/prepostnegd.py
index beec847e..7a5620fb 100644
--- a/causalpy/experiments/prepostnegd.py
+++ b/causalpy/experiments/prepostnegd.py
@@ -72,7 +72,7 @@ class PrePostNEGD(BaseExperiment):
     ...         }
     ...     ),
     ... )
-    >>> result.summary(round_to=1)  # doctest: +NUMBER
+    >>> result.summary(round_to=1)  # doctest: +SKIP
     ==================Pretest/posttest Nonequivalent Group Design===================
     Formula: post ~ 1 + C(group) + pre
     <BLANKLINE>
diff --git a/causalpy/tests/conftest.py b/causalpy/tests/conftest.py
index f01dc693..f966a785 100644
--- a/causalpy/tests/conftest.py
+++ b/causalpy/tests/conftest.py
@@ -20,6 +20,7 @@
 
 import numpy as np
 import pytest
+from pymc.testing import mock_sample, mock_sample_setup_and_teardown
 
 
 @pytest.fixture(scope="session")
@@ -27,3 +28,16 @@ def rng() -> np.random.Generator:
     """Random number generator that can persist through a pytest session"""
     seed: int = sum(map(ord, "causalpy"))
     return np.random.default_rng(seed=seed)
+
+
+mock_pymc_sample = pytest.fixture(mock_sample_setup_and_teardown, scope="session")
+
+
+@pytest.fixture(autouse=True)
+def mock_sample_for_doctest(request):
+    if not request.config.getoption("--doctest-modules", default=False):
+        return
+
+    import pymc as pm
+
+    pm.sample = mock_sample
diff --git a/causalpy/tests/test_integration_pymc_examples.py b/causalpy/tests/test_integration_pymc_examples.py
index 224e22fc..76f454ef 100644
--- a/causalpy/tests/test_integration_pymc_examples.py
+++ b/causalpy/tests/test_integration_pymc_examples.py
@@ -23,7 +23,7 @@
 
 
 @pytest.mark.integration
-def test_did():
+def test_did(mock_pymc_sample):
     """
     Test Difference in Differences (DID) PyMC experiment.
 
@@ -57,7 +57,7 @@ def test_did():
 
 
 @pytest.mark.integration
-def test_did_banks_simple():
+def test_did_banks_simple(mock_pymc_sample):
     """
     Test simple Differences In Differences Experiment on the 'banks' data set.
 
@@ -113,7 +113,7 @@ def test_did_banks_simple():
 
 
 @pytest.mark.integration
-def test_did_banks_multi():
+def test_did_banks_multi(mock_pymc_sample):
     """
     Test multiple regression Differences In Differences Experiment on the 'banks'
     data set.
@@ -168,7 +168,7 @@ def test_did_banks_multi():
 
 
 @pytest.mark.integration
-def test_rd():
+def test_rd(mock_pymc_sample):
     """
     Test Regression Discontinuity experiment.
 
@@ -199,7 +199,7 @@ def test_rd():
 
 
 @pytest.mark.integration
-def test_rd_bandwidth():
+def test_rd_bandwidth(mock_pymc_sample):
     """
     Test Regression Discontinuity experiment with bandwidth parameter.
 
@@ -229,7 +229,7 @@ def test_rd_bandwidth():
 
 
 @pytest.mark.integration
-def test_rd_drinking():
+def test_rd_drinking(mock_pymc_sample):
     """
     Test Regression Discontinuity experiment on drinking age data.
 
@@ -289,7 +289,7 @@ def reg_kink_function(x, beta, kink):
 
 
 @pytest.mark.integration
-def test_rkink():
+def test_rkink(mock_pymc_sample):
     """
     Test Regression Kink design.
 
@@ -320,7 +320,7 @@ def test_rkink():
 
 
 @pytest.mark.integration
-def test_rkink_bandwidth():
+def test_rkink_bandwidth(mock_pymc_sample):
     """
     Test Regression Kink experiment with bandwidth parameter.
 
@@ -350,7 +350,7 @@ def test_rkink_bandwidth():
 
 
 @pytest.mark.integration
-def test_its():
+def test_its(mock_pymc_sample):
     """
     Test Interrupted Time-Series experiment.
 
@@ -403,7 +403,7 @@ def test_its():
 
 
 @pytest.mark.integration
-def test_its_covid():
+def test_its_covid(mock_pymc_sample):
     """
     Test Interrupted Time-Series experiment on COVID data.
 
@@ -457,7 +457,7 @@ def test_its_covid():
 
 
 @pytest.mark.integration
-def test_sc():
+def test_sc(mock_pymc_sample):
     """
     Test Synthetic Control experiment.
 
@@ -516,7 +516,7 @@ def test_sc():
 
 
 @pytest.mark.integration
-def test_sc_brexit():
+def test_sc_brexit(mock_pymc_sample):
     """
     Test Synthetic Control experiment on Brexit data.
 
@@ -579,7 +579,7 @@ def test_sc_brexit():
 
 
 @pytest.mark.integration
-def test_ancova():
+def test_ancova(mock_pymc_sample):
     """
     Test Pre-PostNEGD experiment on anova1 data.
 
@@ -611,7 +611,7 @@ def test_ancova():
 
 
 @pytest.mark.integration
-def test_geolift1():
+def test_geolift1(mock_pymc_sample):
     """
     Test Synthetic Control experiment on geo lift data.
 
@@ -648,7 +648,7 @@ def test_geolift1():
 
 
 @pytest.mark.integration
-def test_iv_reg():
+def test_iv_reg(mock_pymc_sample):
     df = cp.load_data("risk")
     instruments_formula = "risk  ~ 1 + logmort0"
     formula = "loggdp ~  1 + risk"
@@ -676,7 +676,7 @@ def test_iv_reg():
 
 
 @pytest.mark.integration
-def test_inverse_prop():
+def test_inverse_prop(mock_pymc_sample):
     """Test the InversePropensityWeighting class."""
     df = cp.load_data("nhefs")
     sample_kwargs = {
diff --git a/causalpy/tests/test_pymc_models.py b/causalpy/tests/test_pymc_models.py
index 29664d18..bd6f799c 100644
--- a/causalpy/tests/test_pymc_models.py
+++ b/causalpy/tests/test_pymc_models.py
@@ -93,7 +93,7 @@ def test_fit_build_not_implemented(self):
         argvalues=[None, {"a": 1}],
         ids=["None-coords", "dict-coords"],
     )
-    def test_fit_predict(self, coords, rng) -> None:
+    def test_fit_predict(self, coords, rng, mock_pymc_sample) -> None:
         """
         Test fit and predict methods on MyToyModel.
 
@@ -122,7 +122,7 @@ def test_fit_predict(self, coords, rng) -> None:
         assert isinstance(predictions, az.InferenceData)
 
 
-def test_idata_property():
+def test_idata_property(mock_pymc_sample):
     """Test that we can access the idata property of the model"""
     df = cp.load_data("did")
     result = cp.DifferenceInDifferences(
@@ -140,7 +140,7 @@ def test_idata_property():
 
 
 @pytest.mark.parametrize("seed", seeds)
-def test_result_reproducibility(seed):
+def test_result_reproducibility(seed, mock_pymc_sample):
     """Test that we can reproduce the results from the model. We could in theory test
     this with all the model and experiment types, but what is being targeted is
     the PyMCModel.fit method, so we should be safe testing with just one model. Here
diff --git a/pyproject.toml b/pyproject.toml
index f26c8de3..4148a62e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -84,7 +84,6 @@ addopts = [
     "--strict-config",
     "--cov=causalpy",
     "--cov-report=term-missing",
-    "--doctest-modules",
 ]
 testpaths = "causalpy/tests"
 markers = [