Skip to content

Commit 35f8159

Browse files
ogriselglemaitre
andauthored
DOC Improve the plot_gpy_noisy.py example (scikit-learn#29788)
Co-authored-by: Guillaume Lemaitre <guillaume@probabl.ai>
1 parent 390ce98 commit 35f8159

File tree

1 file changed

+59
-23
lines changed

1 file changed

+59
-23
lines changed

examples/gaussian_process/plot_gpr_noisy.py

Lines changed: 59 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def target_generator(X, add_noise=False):
3333
# %%
3434
# Let's have a look to the target generator where we will not add any noise to
3535
# observe the signal that we would like to predict.
36-
X = np.linspace(0, 5, num=30).reshape(-1, 1)
36+
X = np.linspace(0, 5, num=80).reshape(-1, 1)
3737
y = target_generator(X, add_noise=False)
3838

3939
# %%
@@ -88,7 +88,7 @@ def target_generator(X, add_noise=False):
8888
from sklearn.gaussian_process.kernels import RBF, WhiteKernel
8989

9090
kernel = 1.0 * RBF(length_scale=1e1, length_scale_bounds=(1e-2, 1e3)) + WhiteKernel(
91-
noise_level=1, noise_level_bounds=(1e-5, 1e1)
91+
noise_level=1, noise_level_bounds=(1e-10, 1e1)
9292
)
9393
gpr = GaussianProcessRegressor(kernel=kernel, alpha=0.0)
9494
gpr.fit(X_train, y_train)
@@ -97,7 +97,7 @@ def target_generator(X, add_noise=False):
9797
# %%
9898
plt.plot(X, y, label="Expected signal")
9999
plt.scatter(x=X_train[:, 0], y=y_train, color="black", alpha=0.4, label="Observations")
100-
plt.errorbar(X, y_mean, y_std)
100+
plt.errorbar(X, y_mean, y_std, label="Posterior mean ± std")
101101
plt.legend()
102102
plt.xlabel("X")
103103
plt.ylabel("y")
@@ -109,15 +109,18 @@ def target_generator(X, add_noise=False):
109109
fontsize=8,
110110
)
111111
# %%
112-
# We see that the optimum kernel found still have a high noise level and
113-
# an even larger length scale. Furthermore, we observe that the
114-
# model does not provide faithful predictions.
112+
# We see that the optimum kernel found still has a high noise level and an even
113+
# larger length scale. The length scale reaches the maximum bound that we
114+
# allowed for this parameter and we got a warning as a result.
115115
#
116-
# Now, we will initialize the
117-
# :class:`~sklearn.gaussian_process.kernels.RBF` with a
118-
# larger `length_scale` and the
119-
# :class:`~sklearn.gaussian_process.kernels.WhiteKernel`
120-
# with a smaller noise level lower bound.
116+
# More importantly, we observe that the model does not provide useful
117+
# predictions: the mean prediction seems to be constant: it does not follow the
118+
# expected noise-free signal.
119+
#
120+
# Now, we will initialize the :class:`~sklearn.gaussian_process.kernels.RBF`
121+
# with a larger `length_scale` initial value and the
122+
# :class:`~sklearn.gaussian_process.kernels.WhiteKernel` with a smaller initial
123+
# noise level lower while keeping the parameter bounds unchanged.
121124
kernel = 1.0 * RBF(length_scale=1e-1, length_scale_bounds=(1e-2, 1e3)) + WhiteKernel(
122125
noise_level=1e-2, noise_level_bounds=(1e-10, 1e1)
123126
)
@@ -128,7 +131,7 @@ def target_generator(X, add_noise=False):
128131
# %%
129132
plt.plot(X, y, label="Expected signal")
130133
plt.scatter(x=X_train[:, 0], y=y_train, color="black", alpha=0.4, label="Observations")
131-
plt.errorbar(X, y_mean, y_std)
134+
plt.errorbar(X, y_mean, y_std, label="Posterior mean ± std")
132135
plt.legend()
133136
plt.xlabel("X")
134137
plt.ylabel("y")
@@ -153,21 +156,19 @@ def target_generator(X, add_noise=False):
153156
# for different hyperparameters to get a sense of the local minima.
154157
from matplotlib.colors import LogNorm
155158

156-
length_scale = np.logspace(-2, 4, num=50)
157-
noise_level = np.logspace(-2, 1, num=50)
159+
length_scale = np.logspace(-2, 4, num=80)
160+
noise_level = np.logspace(-2, 1, num=80)
158161
length_scale_grid, noise_level_grid = np.meshgrid(length_scale, noise_level)
159162

160163
log_marginal_likelihood = [
161164
gpr.log_marginal_likelihood(theta=np.log([0.36, scale, noise]))
162165
for scale, noise in zip(length_scale_grid.ravel(), noise_level_grid.ravel())
163166
]
164-
log_marginal_likelihood = np.reshape(
165-
log_marginal_likelihood, newshape=noise_level_grid.shape
166-
)
167+
log_marginal_likelihood = np.reshape(log_marginal_likelihood, noise_level_grid.shape)
167168

168169
# %%
169170
vmin, vmax = (-log_marginal_likelihood).min(), 50
170-
level = np.around(np.logspace(np.log10(vmin), np.log10(vmax), num=50), decimals=1)
171+
level = np.around(np.logspace(np.log10(vmin), np.log10(vmax), num=20), decimals=1)
171172
plt.contour(
172173
length_scale_grid,
173174
noise_level_grid,
@@ -184,8 +185,43 @@ def target_generator(X, add_noise=False):
184185
plt.show()
185186

186187
# %%
187-
# We see that there are two local minima that correspond to the combination
188-
# of hyperparameters previously found. Depending on the initial values for the
189-
# hyperparameters, the gradient-based optimization might converge whether or
190-
# not to the best model. It is thus important to repeat the optimization
191-
# several times for different initializations.
188+
#
189+
# We see that there are two local minima that correspond to the combination of
190+
# hyperparameters previously found. Depending on the initial values for the
191+
# hyperparameters, the gradient-based optimization might or might not
192+
# converge to the best model. It is thus important to repeat the optimization
193+
# several times for different initializations. This can be done by setting the
194+
# `n_restarts_optimizer` parameter of the
195+
# :class:`~sklearn.gaussian_process.GaussianProcessRegressor` class.
196+
#
197+
# Let's try again to fit our model with the bad initial values but this time
198+
# with 10 random restarts.
199+
200+
kernel = 1.0 * RBF(length_scale=1e1, length_scale_bounds=(1e-2, 1e3)) + WhiteKernel(
201+
noise_level=1, noise_level_bounds=(1e-10, 1e1)
202+
)
203+
gpr = GaussianProcessRegressor(
204+
kernel=kernel, alpha=0.0, n_restarts_optimizer=10, random_state=0
205+
)
206+
gpr.fit(X_train, y_train)
207+
y_mean, y_std = gpr.predict(X, return_std=True)
208+
209+
# %%
210+
plt.plot(X, y, label="Expected signal")
211+
plt.scatter(x=X_train[:, 0], y=y_train, color="black", alpha=0.4, label="Observations")
212+
plt.errorbar(X, y_mean, y_std, label="Posterior mean ± std")
213+
plt.legend()
214+
plt.xlabel("X")
215+
plt.ylabel("y")
216+
_ = plt.title(
217+
(
218+
f"Initial: {kernel}\nOptimum: {gpr.kernel_}\nLog-Marginal-Likelihood: "
219+
f"{gpr.log_marginal_likelihood(gpr.kernel_.theta)}"
220+
),
221+
fontsize=8,
222+
)
223+
224+
# %%
225+
#
226+
# As we hoped, random restarts allow the optimization to find the best set
227+
# of hyperparameters despite the bad initial values.

0 commit comments

Comments
 (0)