FAI-896: Doc standardization and cleanup (#126)

RobGeada · web-flow · commit fde2f468c3b6 · 2022-12-05T11:43:40.000Z
* default parameter doc standardization

* Standardized kwargs labelings, fixed typos

* linting + black

* Update counterfactuals.py
diff --git a/docs/api.rst b/docs/api.rst
@@ -46,22 +46,37 @@ Model Classes
 
 trustyai.explainers
 -------------------
+LIME
+####
 .. currentmodule:: trustyai.explainers
 .. explainers_api:
 .. autosummary::
 	:toctree: generated/
 
-	CounterfactualExplainer
-	CounterfactualResult
 	LimeExplainer
 	LimeResults
+
+SHAP
+####
+.. autosummary::
+	:toctree: generated/
+
 	SHAPExplainer
+	BackgroundGenerator
 	SHAPResults
 
+Counterfactuals
+###############
+.. autosummary::
+	:toctree: generated/
+
+	CounterfactualExplainer
+	CounterfactualResult
+
 trustyai.utils
--------------------
+--------------
 .. currentmodule:: trustyai.utils.tyrus
-.. explainers_api:
+.. utils_api:
 .. autosummary::
 	:toctree: generated/
 
diff --git a/src/trustyai/explainers/__init__.py b/src/trustyai/explainers/__init__.py
@@ -2,4 +2,4 @@
 # pylint: disable=duplicate-code
 from .counterfactuals import CounterfactualResult, CounterfactualExplainer
 from .lime import LimeExplainer, LimeResults
-from .shap import SHAPExplainer, SHAPResults
+from .shap import SHAPExplainer, SHAPResults, BackgroundGenerator
diff --git a/src/trustyai/explainers/counterfactuals.py b/src/trustyai/explainers/counterfactuals.py
@@ -196,14 +196,14 @@ def explain(
             The desired model outputs to be searched for in the counterfactual explanation.
             These can take the form of a: {}
         model : :obj:`~trustyai.model.PredictionProvider`
-            The TrustyAI PredictionProvider, as generated by :class:`~trustyai.model.Model` or
-             :class:`~trustyai.model.ArrowModel`.
+            The TrustyAI model as generated by :class:`~trustyai.model.Model` or a Java :class:`PredictionProvider`
         data_distribution : Optional[:class:`DataDistribution`]
             The :class:`DataDistribution` to use when sampling the inputs.
         uuid : Optional[:class:`_uuid.UUID`]
             The UUID to use during search.
         timeout : Optional[float]
                 The timeout time in seconds of the counterfactual explanation.
+
         Returns
         -------
         :class:`~CounterfactualResult`
diff --git a/src/trustyai/explainers/explanation_results.py b/src/trustyai/explainers/explanation_results.py
@@ -51,12 +51,12 @@ def plot(self, output_name=None, render_bokeh=False, block=True) -> None:
         Parameters
         ----------
         output_name : str
-            (default=None) The name of the output to be explainer. If `None`, all outputs will
+            (default= `None`) The name of the output to be explainer. If `None`, all outputs will
             be displayed
         render_bokeh : bool
-            (default: false) Whether to render as bokeh (true) or matplotlib (false)
+            (default= `False`) If true, render plot in bokeh, otherwise use matplotlib.
         block: bool
-            (default: true) Whether displaying the plot blocks subsequent code execution
+            (default= `True`) Whether displaying the plot blocks subsequent code execution
         """
         if output_name is None:
             for output_name_iterator in self.saliency_map().keys():
diff --git a/src/trustyai/explainers/lime.py b/src/trustyai/explainers/lime.py
@@ -212,7 +212,7 @@ class LimeExplainer:
     """
 
     def __init__(self, samples=10, **kwargs):
-        """Initialize the :class:`LimeExplainer`.
+        r"""Initialize the :class:`LimeExplainer`.
 
         Parameters
         ----------
@@ -221,23 +221,23 @@ def __init__(self, samples=10, **kwargs):
 
         Keyword Arguments:
             * penalise_sparse_balance : bool
-                (default=True) Whether to penalise features that are likely to produce linearly
+                (default= ``True``) Whether to penalise features that are likely to produce linearly
                 inseparable outputs. This can improve the efficacy and interpretability of the
                 outputted saliencies.
             * normalise_weights : bool
-                (default=False) Whether to normalise the saliencies generated by LIME. If selected,
+                (default= ``False``) Whether to normalise the saliencies generated by LIME. If selected,
                 saliencies will be normalized between 0 and 1.
             * use_wlr_model : bool
-                (default=True) Whether to use a weighted linear regression as the LIME explanatory
+                (default= ``True``) Whether to use a weighted linear regression as the LIME explanatory
                 model. If `false`, a multilayer perceptron is used, which generally has a slower
                 runtime,
             * seed: int
-                (default=0) The random seed to be used.
+                (default= ``0``) The random seed to be used.
             * perturbations: int
-                (default=1) The starting number of feature perturbations within the explanation
+                (default= ``1``) The starting number of feature perturbations within the explanation
                 process.
             * trackCounterfactuals : bool
-                (default=False) Keep track of produced byproduct counterfactuals during LIME run.
+                (default= ``False``) Keep track of produced byproduct counterfactuals during LIME run.
 
         """
         self._jrandom = Random()
diff --git a/src/trustyai/explainers/shap.py b/src/trustyai/explainers/shap.py
@@ -420,7 +420,9 @@ def _get_bokeh_plot_dict(self):
 
 
 class BackgroundGenerator:
-    r"""Generate a background for the SHAP explainer via one of three algorithms:
+    r"""Generate a background for the SHAP explainer
+
+    Generate a background for the SHAP explainer via one of three algorithms:
 
     * `sample`: Randomly sample a set of provided points
     * `kmeans`: Summarize a set of provided points into k centroids
@@ -502,33 +504,35 @@ def counterfactual(
             The number of background datapoints to generate per goal.
         Keyword Arguments:
             * k_seeds: int
-                (default=5) For each goal, a number of starting seeds from `datapoints` are used
+                (default= ``5``) For each goal, a number of starting seeds from `datapoints` are used
                 to start the search from. These are the `k_seeds` points within `datapoint`
                 whose corresponding outputs are closet to the goal output. Choose a larger
                 number to get a more diverse background dataset, but the search might require
                 larger `max_attempt_count`, `step_count`, and `timeout_seconds` to get good results.
             * goal_threshold: float
-                (default=.01) The distance (percentage) threshold defining whether
+                (default= ``.01``) The distance (percentage) threshold defining whether
                 a particular output satisfies the goal. Set to 0 to require an exact match, but
-                this will likey require larger `max_attempt_count`, `step_count`,
+                this will likely require larger `max_attempt_count`, `step_count`,
                 and `timeout_seconds` to get good results.
             * chain: boolean
-                (default=False) If chaining is set to `true`, found counterfactual datapoints
+                (default= ``False``) If chaining is set to `true`, found counterfactual datapoints
                 will be added to the search seeds for subsequent searches. This is useful when a
                 range of counterfactual outputs is desired; for example, if the desired goals are
-                [0, 1, 2, 3], whichever goal is closest to the closest point within `datapoints` will
-                be searched for first. The found counterfactuals from that search are then included
-                in the search for the second-closest goal, and so on. This is especially helpful
-                if the extremes of the goal range are far outside the range produced by the
+                [0, 1, 2, 3], whichever goal is closest to the closest point within `datapoints`
+                will be searched for first. The found counterfactuals from that search are then
+                included in the search for the second-closest goal, and so on. This is especially
+                helpful if the extremes of the goal range are far outside the range produced by the
                 `datapoints`. If only
             * max_attempt_count: int
-                If no valid counterfactual can be found for a starting seed in the search, the point
-                is slightly perturbed and search is retried. This parameter sets the maximum
-                number of perturbation-retry cycles are allowed during generation.
+                (default= ``5'`) If no valid counterfactual can be found for a starting seed in the
+                search, the point is slightly perturbed and search is retried. This parameter sets
+                the maximum number of perturbation-retry cycles are allowed during generation.
             * step_count: int
-                (default=10,000) The number of datapoints to evaluate during the search
+                (default= ``5_000``) The number of datapoints to evaluate during the search
             * timeout_seconds: int
-                (default=30) The maximum number of seconds allowed for each counterfactual search
+                (default= ``3``) The maximum number of seconds allowed for each counterfactual search.
+                This will set the maximum runtime of the search to roughly
+                `timeout_seconds` * `max_attempt_count` * `k_per_goal` * `len(goals)`
 
         Returns
         -------
@@ -610,19 +614,19 @@ def __init__(
             Otherwise, choose ``IDENTITY``.
         Keyword Arguments:
             * samples: int
-                (default=None) The number of samples to use when computing SHAP values. Higher
+                (default= ``None``) The number of samples to use when computing SHAP values. Higher
                 values will increase explanation accuracy, at the  cost of runtime. If none,
                 samples will equal 2048 + 2*n_features
             * seed: int
-                (default=0) The random seed to be used when generating explanations.
+                (default= ``0``) The random seed to be used when generating explanations.
             * batchSize: int
-                (default=20) The number of batches passed to the PredictionProvider at once.
-                When uusing :class:`~Model` with `arrow=False` this parameter has no effect.
+                (default= ``20``) The number of batches passed to the PredictionProvider at once.
+                When using :class:`~Model` with `disable_arrow=True` this parameter has no effect.
                 If `arrow=True`, `batch_sizes` of around
                 :math:`\frac{{2000}}{{\mathtt{{len(background)}}}}` can produce significant
                 performance gains.
             * trackCounterfactuals : bool
-                (default=False) Keep track of produced byproduct counterfactuals during SHAP run.
+                (default= ``False``) Keep track of produced byproduct counterfactuals during SHAP run.
 
         Returns
         -------
diff --git a/src/trustyai/model/__init__.py b/src/trustyai/model/__init__.py
@@ -728,7 +728,7 @@ def output(name, dtype, value=None, score=1.0) -> _Output:
         * ``categorical`` for categorical outputs.
 
         If `dtype` is unspecified or takes a different value than listed above, the
-         feature type will be set as `UNDEFINED`.
+        feature type will be set as `UNDEFINED`.
     value : Any
         The value of this output.
     score : float
diff --git a/src/trustyai/model/domain.py b/src/trustyai/model/domain.py
@@ -24,17 +24,17 @@ def feature_domain(values: Optional[Union[Tuple, List]]) -> Optional[FeatureDoma
     Parameters
     ----------
     values : Optional[Union[Tuple, List]]
-        The valid values of the feature. If `values` takes the form of:
+        The valid values of the feature. If ``values`` takes the form of:
 
-        * **A tuple of floats or integers:** The feature domain will be a continuous range from
+        * **A tuple of floats or integers**: The feature domain will be a continuous range from
           ``values[0]`` to ``values[1]``.
-        * **A list of floats or integers:**: The feature domain will be a *numeric* categorical,
-        where `values` contains all possible valid feature values.
-        * **A list of strings:** The feature domain will be a *string* categorical, where `values`
-         contains all possible valid feature values.
-        * **A list of objects:** The feature domain will be an *object* categorical, where `values`
-         contains all possible valid feature values. These may present an issue if the objects
-         are not natively Java serializable.
+        * **A list of floats or integers**: The feature domain will be a *numeric* categorical,
+          where `values` contains all possible valid feature values.
+        * **A list of strings**: The feature domain will be a *string* categorical, where ``values``
+          contains all possible valid feature values.
+        * **A list of objects**: The feature domain will be an *object* categorical, where
+          ``values`` contains all possible valid feature values. These may present an issue if the
+          objects are not natively Java serializable.
 
         Otherwise, the feature domain will be taken as `Empty`, which will mean it will be held
         fixed during the counterfactual explanation.
diff --git a/src/trustyai/utils/tyrus.py b/src/trustyai/utils/tyrus.py
@@ -116,7 +116,7 @@ def __init__(
                 (default=`0.1`) The fraction of found byproduct counterfactuals to display in the
                 dashboard, as a float between 0 and 1. Choose a larger number to see more,
                 but this will make plot rendering more expensive.
-            * notebook : `bool
+            * notebook : bool
                 (default=`False`) If true, Tyrus will launch the visualizations inline in a
                 Jupyter notebook. If false, the visualizations will be saved as HTML and opened
                 automatically in your default browser.