DOC fix typo and some refinement in plot_permutation_importance example (scikit-learn#30939)

sylvaincom · lucyleeow · web-flow · commit 2c0cdd479a2d · 2025-03-06T17:34:36.000-05:00
Co-authored-by: Lucy Liu &lt;jliu176@gmail.com&gt;
diff --git a/examples/inspection/plot_permutation_importance.py b/examples/inspection/plot_permutation_importance.py
@@ -95,11 +95,15 @@
 # %%
 # Accuracy of the Model
 # ---------------------
-# Prior to inspecting the feature importances, it is important to check that
-# the model predictive performance is high enough. Indeed there would be little
-# interest of inspecting the important features of a non-predictive model.
-#
-# Here one can observe that the train accuracy is very high (the forest model
+# Before inspecting the feature importances, it is important to check that
+# the model predictive performance is high enough. Indeed, there would be little
+# interest in inspecting the important features of a non-predictive model.
+
+print(f"RF train accuracy: {rf.score(X_train, y_train):.3f}")
+print(f"RF test accuracy: {rf.score(X_test, y_test):.3f}")
+
+# %%
+# Here, one can observe that the train accuracy is very high (the forest model
 # has enough capacity to completely memorize the training set) but it can still
 # generalize well enough to the test set thanks to the built-in bagging of
 # random forests.
@@ -110,12 +114,9 @@
 # ``min_samples_leaf=10``) so as to limit overfitting while not introducing too
 # much underfitting.
 #
-# However let's keep our high capacity random forest model for now so as to
-# illustrate some pitfalls with feature importance on variables with many
+# However, let us keep our high capacity random forest model for now so that we can
+# illustrate some pitfalls about feature importance on variables with many
 # unique values.
-print(f"RF train accuracy: {rf.score(X_train, y_train):.3f}")
-print(f"RF test accuracy: {rf.score(X_test, y_test):.3f}")
-
 
 # %%
 # Tree's Feature Importance from Mean Decrease in Impurity (MDI)
@@ -135,7 +136,7 @@
 #
 # The bias towards high cardinality features explains why the `random_num` has
 # a really large importance in comparison with `random_cat` while we would
-# expect both random features to have a null importance.
+# expect that both random features have a null importance.
 #
 # The fact that we use training set statistics explains why both the
 # `random_num` and `random_cat` features have a non-null importance.
@@ -155,11 +156,11 @@
 # %%
 # As an alternative, the permutation importances of ``rf`` are computed on a
 # held out test set. This shows that the low cardinality categorical feature,
-# `sex` and `pclass` are the most important feature. Indeed, permuting the
-# values of these features will lead to most decrease in accuracy score of the
+# `sex` and `pclass` are the most important features. Indeed, permuting the
+# values of these features will lead to the most decrease in accuracy score of the
 # model on the test set.
 #
-# Also note that both random features have very low importances (close to 0) as
+# Also, note that both random features have very low importances (close to 0) as
 # expected.
 from sklearn.inspection import permutation_importance