Add global max_errors setting (#8319)

okhat · web-flow · commit 19d846a0a0db · 2025-06-03T14:03:37.000-07:00
* feat(settings): expose max_errors globally

* Defer max_errors default to invocation

* correction in MIPRO
diff --git a/docs/docs/cheatsheet.md b/docs/docs/cheatsheet.md
@@ -271,7 +271,7 @@ your_dspy_program_compiled = labeled_fewshot_optimizer.compile(student = your_ds
 ```python
 from dspy.teleprompt import BootstrapFewShot
 
-fewshot_optimizer = BootstrapFewShot(metric=your_defined_metric, max_bootstrapped_demos=4, max_labeled_demos=16, max_rounds=1, max_errors=5)
+fewshot_optimizer = BootstrapFewShot(metric=your_defined_metric, max_bootstrapped_demos=4, max_labeled_demos=16, max_rounds=1, max_errors=10)
 
 your_dspy_program_compiled = fewshot_optimizer.compile(student = your_dspy_program, trainset=trainset)
 ```
@@ -281,7 +281,7 @@ your_dspy_program_compiled = fewshot_optimizer.compile(student = your_dspy_progr
 ```python
 from dspy.teleprompt import BootstrapFewShot
 
-fewshot_optimizer = BootstrapFewShot(metric=your_defined_metric, max_bootstrapped_demos=4, max_labeled_demos=16, max_rounds=1, max_errors=5, teacher_settings=dict(lm=gpt4))
+fewshot_optimizer = BootstrapFewShot(metric=your_defined_metric, max_bootstrapped_demos=4, max_labeled_demos=16, max_rounds=1, max_errors=10, teacher_settings=dict(lm=gpt4))
 
 your_dspy_program_compiled = fewshot_optimizer.compile(student = your_dspy_program, trainset=trainset)
 ```
diff --git a/dspy/dsp/utils/settings.py b/dspy/dsp/utils/settings.py
@@ -26,6 +26,7 @@
     stream_listeners=[],
     provide_traceback=False,  # Whether to include traceback information in error logs.
     num_threads=8,  # Number of threads to use for parallel processing.
+    max_errors=10,  # Maximum errors before halting operations.
 )
 
 # Global base configuration and owner tracking
diff --git a/dspy/evaluate/evaluate.py b/dspy/evaluate/evaluate.py
@@ -56,7 +56,7 @@ def __init__(
         num_threads: Optional[int] = None,
         display_progress: bool = False,
         display_table: Union[bool, int] = False,
-        max_errors: int = 5,
+        max_errors: Optional[int] = None,
         return_all_scores: bool = False,
         return_outputs: bool = False,
         provide_traceback: Optional[bool] = None,
@@ -71,7 +71,8 @@ def __init__(
             display_progress (bool): Whether to display progress during evaluation.
             display_table (Union[bool, int]): Whether to display the evaluation results in a table.
                 If a number is passed, the evaluation results will be truncated to that number before displayed.
-            max_errors (int): The maximum number of errors to allow before stopping evaluation.
+            max_errors (Optional[int]): The maximum number of errors to allow before
+                stopping evaluation. If ``None``, inherits from ``dspy.settings.max_errors``.
             return_all_scores (bool): Whether to return scores for every data record in `devset`.
             return_outputs (bool): Whether to return the dspy program's outputs for every data in `devset`.
             provide_traceback (Optional[bool]): Whether to provide traceback information during evaluation.
@@ -151,7 +152,11 @@ def __call__(
         executor = ParallelExecutor(
             num_threads=num_threads,
             disable_progress_bar=not display_progress,
-            max_errors=self.max_errors,
+            max_errors=(
+                self.max_errors
+                if self.max_errors is not None
+                else dspy.settings.max_errors
+            ),
             provide_traceback=self.provide_traceback,
             compare_results=True,
         )
diff --git a/dspy/predict/parallel.py b/dspy/predict/parallel.py
@@ -10,15 +10,15 @@ class Parallel:
     def __init__(
         self,
         num_threads: Optional[int] = None,
-        max_errors: int = 10,
+        max_errors: Optional[int] = None,
         access_examples: bool = True,
         return_failed_examples: bool = False,
         provide_traceback: Optional[bool] = None,
         disable_progress_bar: bool = False,
     ):
         super().__init__()
         self.num_threads = num_threads or settings.num_threads
-        self.max_errors = max_errors
+        self.max_errors = settings.max_errors if max_errors is None else max_errors
         self.access_examples = access_examples
         self.return_failed_examples = return_failed_examples
         self.provide_traceback = provide_traceback
diff --git a/dspy/primitives/program.py b/dspy/primitives/program.py
@@ -115,7 +115,7 @@ def batch(
         self,
         examples,
         num_threads: Optional[int] = None,
-        max_errors: int = 10,
+        max_errors: Optional[int] = None,
         return_failed_examples: bool = False,
         provide_traceback: Optional[bool] = None,
         disable_progress_bar: bool = False,
@@ -127,10 +127,11 @@ def batch(
             examples: List of dspy.Example instances to process.
             num_threads: Number of threads to use for parallel processing.
             max_errors: Maximum number of errors allowed before stopping execution.
+                If ``None``, inherits from ``dspy.settings.max_errors``.
             return_failed_examples: Whether to return failed examples and exceptions.
             provide_traceback: Whether to include traceback information in error logs.
             disable_progress_bar: Whether to display the progress bar.
-            
+
         Returns:
             List of results, and optionally failed examples and exceptions.
         """
diff --git a/dspy/teleprompt/bootstrap.py b/dspy/teleprompt/bootstrap.py
@@ -43,7 +43,7 @@ def __init__(
         max_bootstrapped_demos=4,
         max_labeled_demos=16,
         max_rounds=1,
-        max_errors=5,
+        max_errors=None,
     ):
         """A Teleprompter class that composes a set of demos/examples to go into a predictor's prompt.
         These demos come from a combination of labeled examples in the training set, and bootstrapped demos.
@@ -62,7 +62,8 @@ def __init__(
                 Defaults to 16.
             max_rounds (int): Number of iterations to attempt generating the required bootstrap
                 examples. If unsuccessful after `max_rounds`, the program ends. Defaults to 1.
-            max_errors (int): Maximum number of errors until program ends. Defaults to 5.
+            max_errors (Optional[int]): Maximum number of errors until program ends.
+                If ``None``, inherits from ``dspy.settings.max_errors``.
         """
         self.metric = metric
         self.metric_threshold = metric_threshold
@@ -210,7 +211,12 @@ def _bootstrap_one_example(self, example, round_idx=0):
             with self.error_lock:
                 self.error_count += 1
                 current_error_count = self.error_count
-            if current_error_count >= self.max_errors:
+            effective_max_errors = (
+                self.max_errors
+                if self.max_errors is not None
+                else dspy.settings.max_errors
+            )
+            if current_error_count >= effective_max_errors:
                 raise e
             logger.error(f"Failed to run or to evaluate example {example} with {self.metric} due to {e}.")
 
diff --git a/dspy/teleprompt/infer_rules.py b/dspy/teleprompt/infer_rules.py
@@ -19,7 +19,7 @@ def __init__(self, num_candidates=10, num_rules=10, num_threads=None, teacher_se
         self.num_threads = num_threads
         self.rules_induction_program = RulesInductionProgram(num_rules, teacher_settings=teacher_settings)
         self.metric = kwargs.get("metric")
-        self.max_errors = kwargs.get("max_errors", 10)
+        self.max_errors = kwargs.get("max_errors")
 
     def compile(self, student, *, teacher=None, trainset, valset=None):
         if valset is None:
@@ -109,11 +109,14 @@ def get_predictor_demos(self, trainset, predictor):
         ]
 
     def evaluate_program(self, program, dataset):
+        effective_max_errors = (
+            self.max_errors if self.max_errors is not None else dspy.settings.max_errors
+        )
         evaluate = Evaluate(
             devset=dataset,
             metric=self.metric,
             num_threads=self.num_threads,
-            max_errors=self.max_errors,
+            max_errors=effective_max_errors,
             display_table=False,
             display_progress=True,
             return_all_scores=True,
diff --git a/dspy/teleprompt/mipro_optimizer_v2.py b/dspy/teleprompt/mipro_optimizer_v2.py
@@ -5,24 +5,21 @@
 import textwrap
 import time
 from collections import defaultdict
-from typing import TYPE_CHECKING, Any, Callable, Dict, List, Literal, Optional, Tuple
+from typing import (TYPE_CHECKING, Any, Callable, Dict, List, Literal,
+                    Optional, Tuple)
 
 import numpy as np
 
 import dspy
 from dspy.evaluate.evaluate import Evaluate
 from dspy.propose import GroundedProposer
 from dspy.teleprompt.teleprompt import Teleprompter
-from dspy.teleprompt.utils import (
-    create_minibatch,
-    create_n_fewshot_demo_sets,
-    eval_candidate_program,
-    get_program_with_highest_avg_score,
-    get_signature,
-    print_full_program,
-    save_candidate_program,
-    set_signature,
-)
+from dspy.teleprompt.utils import (create_minibatch,
+                                   create_n_fewshot_demo_sets,
+                                   eval_candidate_program,
+                                   get_program_with_highest_avg_score,
+                                   get_signature, print_full_program,
+                                   save_candidate_program, set_signature)
 
 if TYPE_CHECKING:
     import optuna
@@ -60,7 +57,7 @@ def __init__(
         auto: Optional[Literal["light", "medium", "heavy"]] = "light",
         num_candidates: Optional[int] = None,
         num_threads: Optional[int] = None,
-        max_errors: int = 10,
+        max_errors: Optional[int] = None,
         seed: int = 9,
         init_temperature: float = 0.5,
         verbose: bool = False,
@@ -116,20 +113,28 @@ def compile(
         requires_permission_to_run: bool = True,
         provide_traceback: Optional[bool] = None,
     ) -> Any:
-
+        effective_max_errors = (
+            self.max_errors
+            if self.max_errors is not None
+            else dspy.settings.max_errors
+        )
         zeroshot_opt = (self.max_bootstrapped_demos == 0) and (self.max_labeled_demos == 0)
 
         # If auto is None, and num_trials is not provided (but num_candidates is), raise an error that suggests a good num_trials value
         if self.auto is None and (self.num_candidates is not None and num_trials is None):
-            raise ValueError(f"If auto is None, num_trials must also be provided. Given num_candidates={self.num_candidates}, we'd recommend setting num_trials to ~{self._set_num_trials_from_num_candidates(student, zeroshot_opt, self.num_candidates)}.")
+            raise ValueError(
+                f"If auto is None, num_trials must also be provided. Given num_candidates={self.num_candidates}, we'd recommend setting num_trials to ~{self._set_num_trials_from_num_candidates(student, zeroshot_opt, self.num_candidates)}."
+            )
 
         # If auto is None, and num_candidates or num_trials is None, raise an error
         if self.auto is None and (self.num_candidates is None or num_trials is None):
             raise ValueError("If auto is None, num_candidates must also be provided.")
 
         # If auto is provided, and either num_candidates or num_trials is not None, raise an error
         if self.auto is not None and (self.num_candidates is not None or num_trials is not None):
-            raise ValueError("If auto is not None, num_candidates and num_trials cannot be set, since they would be overrided by the auto settings. Please either set auto to None, or do not specify num_candidates and num_trials.")
+            raise ValueError(
+                "If auto is not None, num_candidates and num_trials cannot be set, since they would be overrided by the auto settings. Please either set auto to None, or do not specify num_candidates and num_trials."
+            )
 
         # Set random seeds
         seed = seed or self.seed
@@ -175,7 +180,7 @@ def compile(
             devset=valset,
             metric=self.metric,
             num_threads=self.num_threads,
-            max_errors=self.max_errors,
+            max_errors=effective_max_errors,
             display_table=False,
             display_progress=True,
             provide_traceback=provide_traceback,
@@ -382,7 +387,7 @@ def _get_user_confirmation(
         """
         )
 
-        print(f"{user_message}\n{user_confirmation_message}\nDo you wish to continue? (y/n): ", end='', flush=True)
+        print(f"{user_message}\n{user_confirmation_message}\nDo you wish to continue? (y/n): ", end="", flush=True)
 
         # Wait for input with timeout
         start_time = time.time()
@@ -409,6 +414,10 @@ def _bootstrap_fewshot_examples(self, program: Any, trainset: List, seed: int, t
         zeroshot = self.max_bootstrapped_demos == 0 and self.max_labeled_demos == 0
 
         try:
+            effective_max_errors = (
+                self.max_errors if self.max_errors is not None else dspy.settings.max_errors
+            )
+
             demo_candidates = create_n_fewshot_demo_sets(
                 student=program,
                 num_candidate_sets=self.num_fewshot_candidates,
@@ -418,7 +427,7 @@ def _bootstrap_fewshot_examples(self, program: Any, trainset: List, seed: int, t
                     BOOTSTRAPPED_FEWSHOT_EXAMPLES_IN_CONTEXT if zeroshot else self.max_bootstrapped_demos
                 ),
                 metric=self.metric,
-                max_errors=self.max_errors,
+                max_errors=effective_max_errors,
                 teacher=teacher,
                 teacher_settings=self.teacher_settings,
                 seed=seed,
@@ -498,6 +507,7 @@ def _optimize_prompt_parameters(
         seed: int,
     ) -> Optional[Any]:
         import optuna
+
         # Run optimization
         optuna.logging.set_verbosity(optuna.logging.WARNING)
         logger.info("==> STEP 3: FINDING OPTIMAL PROMPT PARAMETERS <==")
@@ -507,7 +517,11 @@ def _optimize_prompt_parameters(
 
         # Compute the adjusted total trials that we will run (including full evals)
         run_additional_full_eval_at_end = 1 if num_trials % minibatch_full_eval_steps != 0 else 0
-        adjusted_num_trials = int((num_trials + num_trials // minibatch_full_eval_steps + 1 + run_additional_full_eval_at_end) if minibatch else num_trials)
+        adjusted_num_trials = int(
+            (num_trials + num_trials // minibatch_full_eval_steps + 1 + run_additional_full_eval_at_end)
+            if minibatch
+            else num_trials
+        )
         logger.info(f"== Trial {1} / {adjusted_num_trials} - Full Evaluation of Default Program ==")
 
         default_score, _ = eval_candidate_program(
@@ -610,7 +624,9 @@ def objective(trial):
             )
 
             # If minibatch, perform full evaluation at intervals (and at the very end)
-            if minibatch and ((trial_num % (minibatch_full_eval_steps+1) == 0) or (trial_num == (adjusted_num_trials-1))):
+            if minibatch and (
+                (trial_num % (minibatch_full_eval_steps + 1) == 0) or (trial_num == (adjusted_num_trials - 1))
+            ):
                 best_score, best_program, total_eval_calls = self._perform_full_evaluation(
                     trial_num,
                     adjusted_num_trials,
@@ -759,6 +775,7 @@ def _select_and_insert_instructions_and_demos(
 
     def _get_param_distributions(self, program, instruction_candidates, demo_candidates):
         from optuna.distributions import CategoricalDistribution
+
         param_distributions = {}
 
         for i in range(len(instruction_candidates)):
@@ -788,6 +805,7 @@ def _perform_full_evaluation(
         demo_candidates: List,
     ):
         import optuna
+
         logger.info(f"===== Trial {trial_num + 1} / {adjusted_num_trials} - Full Evaluation =====")
 
         # Identify best program to evaluate fully
diff --git a/dspy/teleprompt/random_search.py b/dspy/teleprompt/random_search.py
@@ -1,5 +1,6 @@
 import random
 
+import dspy
 from dspy.evaluate.evaluate import Evaluate
 from dspy.teleprompt.teleprompt import Teleprompter
 
@@ -33,7 +34,7 @@ def __init__(
         max_rounds=1,
         num_candidate_programs=16,
         num_threads=None,
-        max_errors=10,
+        max_errors=None,
         stop_at_score=None,
         metric_threshold=None,
     ):
@@ -57,6 +58,12 @@ def compile(self, student, *, teacher=None, trainset, valset=None, restrict=None
         self.trainset = trainset
         self.valset = valset or trainset  # TODO: FIXME: Note this choice.
 
+        effective_max_errors = (
+            self.max_errors
+            if self.max_errors is not None
+            else dspy.settings.max_errors
+        )
+
         scores = []
         all_subscores = []
         score_data = []
@@ -85,7 +92,7 @@ def compile(self, student, *, teacher=None, trainset, valset=None, restrict=None
                     max_labeled_demos=self.max_labeled_demos,
                     teacher_settings=self.teacher_settings,
                     max_rounds=self.max_rounds,
-                    max_errors=self.max_errors,
+                    max_errors=effective_max_errors,
                 )
                 program = optimizer.compile(student, teacher=teacher, trainset=trainset_copy)
 
@@ -102,7 +109,7 @@ def compile(self, student, *, teacher=None, trainset, valset=None, restrict=None
                     max_labeled_demos=self.max_labeled_demos,
                     teacher_settings=self.teacher_settings,
                     max_rounds=self.max_rounds,
-                    max_errors=self.max_errors,
+                    max_errors=effective_max_errors,
                 )
 
                 program = optimizer.compile(student, teacher=teacher, trainset=trainset_copy)
@@ -111,7 +118,7 @@ def compile(self, student, *, teacher=None, trainset, valset=None, restrict=None
                 devset=self.valset,
                 metric=self.metric,
                 num_threads=self.num_threads,
-                max_errors=self.max_errors,
+                max_errors=effective_max_errors,
                 display_table=False,
                 display_progress=True,
             )
@@ -143,7 +150,9 @@ def compile(self, student, *, teacher=None, trainset, valset=None, restrict=None
 
         # To best program, attach all program candidates in decreasing average score
         best_program.candidate_programs = score_data
-        best_program.candidate_programs = sorted(best_program.candidate_programs, key=lambda x: x["score"], reverse=True)
+        best_program.candidate_programs = sorted(
+            best_program.candidate_programs, key=lambda x: x["score"], reverse=True
+        )
 
         print(f"{len(best_program.candidate_programs)} candidate programs found.")
 
diff --git a/dspy/teleprompt/utils.py b/dspy/teleprompt/utils.py
diff --git a/dspy/utils/parallelizer.py b/dspy/utils/parallelizer.py

Original file line number	Diff line number	Diff line change
`@@ -26,6 +26,7 @@`
`26`	`26`	`stream_listeners=[],`
`27`	`27`	`provide_traceback=False, # Whether to include traceback information in error logs.`
`28`	`28`	`num_threads=8, # Number of threads to use for parallel processing.`
	`29`	`+ max_errors=10, # Maximum errors before halting operations.`
`29`	`30`	`)`
`30`	`31`
`31`	`32`	`# Global base configuration and owner tracking`