Skip to content

Commit 19d846a

Browse files
authored
Add global max_errors setting (#8319)
* feat(settings): expose max_errors globally * Defer max_errors default to invocation * correction in MIPRO
1 parent 4f154a7 commit 19d846a

File tree

11 files changed

+146
-76
lines changed

11 files changed

+146
-76
lines changed

docs/docs/cheatsheet.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ your_dspy_program_compiled = labeled_fewshot_optimizer.compile(student = your_ds
271271
```python
272272
from dspy.teleprompt import BootstrapFewShot
273273

274-
fewshot_optimizer = BootstrapFewShot(metric=your_defined_metric, max_bootstrapped_demos=4, max_labeled_demos=16, max_rounds=1, max_errors=5)
274+
fewshot_optimizer = BootstrapFewShot(metric=your_defined_metric, max_bootstrapped_demos=4, max_labeled_demos=16, max_rounds=1, max_errors=10)
275275

276276
your_dspy_program_compiled = fewshot_optimizer.compile(student = your_dspy_program, trainset=trainset)
277277
```
@@ -281,7 +281,7 @@ your_dspy_program_compiled = fewshot_optimizer.compile(student = your_dspy_progr
281281
```python
282282
from dspy.teleprompt import BootstrapFewShot
283283

284-
fewshot_optimizer = BootstrapFewShot(metric=your_defined_metric, max_bootstrapped_demos=4, max_labeled_demos=16, max_rounds=1, max_errors=5, teacher_settings=dict(lm=gpt4))
284+
fewshot_optimizer = BootstrapFewShot(metric=your_defined_metric, max_bootstrapped_demos=4, max_labeled_demos=16, max_rounds=1, max_errors=10, teacher_settings=dict(lm=gpt4))
285285

286286
your_dspy_program_compiled = fewshot_optimizer.compile(student = your_dspy_program, trainset=trainset)
287287
```

dspy/dsp/utils/settings.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
stream_listeners=[],
2727
provide_traceback=False, # Whether to include traceback information in error logs.
2828
num_threads=8, # Number of threads to use for parallel processing.
29+
max_errors=10, # Maximum errors before halting operations.
2930
)
3031

3132
# Global base configuration and owner tracking

dspy/evaluate/evaluate.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def __init__(
5656
num_threads: Optional[int] = None,
5757
display_progress: bool = False,
5858
display_table: Union[bool, int] = False,
59-
max_errors: int = 5,
59+
max_errors: Optional[int] = None,
6060
return_all_scores: bool = False,
6161
return_outputs: bool = False,
6262
provide_traceback: Optional[bool] = None,
@@ -71,7 +71,8 @@ def __init__(
7171
display_progress (bool): Whether to display progress during evaluation.
7272
display_table (Union[bool, int]): Whether to display the evaluation results in a table.
7373
If a number is passed, the evaluation results will be truncated to that number before displayed.
74-
max_errors (int): The maximum number of errors to allow before stopping evaluation.
74+
max_errors (Optional[int]): The maximum number of errors to allow before
75+
stopping evaluation. If ``None``, inherits from ``dspy.settings.max_errors``.
7576
return_all_scores (bool): Whether to return scores for every data record in `devset`.
7677
return_outputs (bool): Whether to return the dspy program's outputs for every data in `devset`.
7778
provide_traceback (Optional[bool]): Whether to provide traceback information during evaluation.
@@ -151,7 +152,11 @@ def __call__(
151152
executor = ParallelExecutor(
152153
num_threads=num_threads,
153154
disable_progress_bar=not display_progress,
154-
max_errors=self.max_errors,
155+
max_errors=(
156+
self.max_errors
157+
if self.max_errors is not None
158+
else dspy.settings.max_errors
159+
),
155160
provide_traceback=self.provide_traceback,
156161
compare_results=True,
157162
)

dspy/predict/parallel.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,15 @@ class Parallel:
1010
def __init__(
1111
self,
1212
num_threads: Optional[int] = None,
13-
max_errors: int = 10,
13+
max_errors: Optional[int] = None,
1414
access_examples: bool = True,
1515
return_failed_examples: bool = False,
1616
provide_traceback: Optional[bool] = None,
1717
disable_progress_bar: bool = False,
1818
):
1919
super().__init__()
2020
self.num_threads = num_threads or settings.num_threads
21-
self.max_errors = max_errors
21+
self.max_errors = settings.max_errors if max_errors is None else max_errors
2222
self.access_examples = access_examples
2323
self.return_failed_examples = return_failed_examples
2424
self.provide_traceback = provide_traceback

dspy/primitives/program.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def batch(
115115
self,
116116
examples,
117117
num_threads: Optional[int] = None,
118-
max_errors: int = 10,
118+
max_errors: Optional[int] = None,
119119
return_failed_examples: bool = False,
120120
provide_traceback: Optional[bool] = None,
121121
disable_progress_bar: bool = False,
@@ -127,10 +127,11 @@ def batch(
127127
examples: List of dspy.Example instances to process.
128128
num_threads: Number of threads to use for parallel processing.
129129
max_errors: Maximum number of errors allowed before stopping execution.
130+
If ``None``, inherits from ``dspy.settings.max_errors``.
130131
return_failed_examples: Whether to return failed examples and exceptions.
131132
provide_traceback: Whether to include traceback information in error logs.
132133
disable_progress_bar: Whether to display the progress bar.
133-
134+
134135
Returns:
135136
List of results, and optionally failed examples and exceptions.
136137
"""

dspy/teleprompt/bootstrap.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def __init__(
4343
max_bootstrapped_demos=4,
4444
max_labeled_demos=16,
4545
max_rounds=1,
46-
max_errors=5,
46+
max_errors=None,
4747
):
4848
"""A Teleprompter class that composes a set of demos/examples to go into a predictor's prompt.
4949
These demos come from a combination of labeled examples in the training set, and bootstrapped demos.
@@ -62,7 +62,8 @@ def __init__(
6262
Defaults to 16.
6363
max_rounds (int): Number of iterations to attempt generating the required bootstrap
6464
examples. If unsuccessful after `max_rounds`, the program ends. Defaults to 1.
65-
max_errors (int): Maximum number of errors until program ends. Defaults to 5.
65+
max_errors (Optional[int]): Maximum number of errors until program ends.
66+
If ``None``, inherits from ``dspy.settings.max_errors``.
6667
"""
6768
self.metric = metric
6869
self.metric_threshold = metric_threshold
@@ -210,7 +211,12 @@ def _bootstrap_one_example(self, example, round_idx=0):
210211
with self.error_lock:
211212
self.error_count += 1
212213
current_error_count = self.error_count
213-
if current_error_count >= self.max_errors:
214+
effective_max_errors = (
215+
self.max_errors
216+
if self.max_errors is not None
217+
else dspy.settings.max_errors
218+
)
219+
if current_error_count >= effective_max_errors:
214220
raise e
215221
logger.error(f"Failed to run or to evaluate example {example} with {self.metric} due to {e}.")
216222

dspy/teleprompt/infer_rules.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def __init__(self, num_candidates=10, num_rules=10, num_threads=None, teacher_se
1919
self.num_threads = num_threads
2020
self.rules_induction_program = RulesInductionProgram(num_rules, teacher_settings=teacher_settings)
2121
self.metric = kwargs.get("metric")
22-
self.max_errors = kwargs.get("max_errors", 10)
22+
self.max_errors = kwargs.get("max_errors")
2323

2424
def compile(self, student, *, teacher=None, trainset, valset=None):
2525
if valset is None:
@@ -109,11 +109,14 @@ def get_predictor_demos(self, trainset, predictor):
109109
]
110110

111111
def evaluate_program(self, program, dataset):
112+
effective_max_errors = (
113+
self.max_errors if self.max_errors is not None else dspy.settings.max_errors
114+
)
112115
evaluate = Evaluate(
113116
devset=dataset,
114117
metric=self.metric,
115118
num_threads=self.num_threads,
116-
max_errors=self.max_errors,
119+
max_errors=effective_max_errors,
117120
display_table=False,
118121
display_progress=True,
119122
return_all_scores=True,

dspy/teleprompt/mipro_optimizer_v2.py

Lines changed: 38 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -5,24 +5,21 @@
55
import textwrap
66
import time
77
from collections import defaultdict
8-
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Literal, Optional, Tuple
8+
from typing import (TYPE_CHECKING, Any, Callable, Dict, List, Literal,
9+
Optional, Tuple)
910

1011
import numpy as np
1112

1213
import dspy
1314
from dspy.evaluate.evaluate import Evaluate
1415
from dspy.propose import GroundedProposer
1516
from dspy.teleprompt.teleprompt import Teleprompter
16-
from dspy.teleprompt.utils import (
17-
create_minibatch,
18-
create_n_fewshot_demo_sets,
19-
eval_candidate_program,
20-
get_program_with_highest_avg_score,
21-
get_signature,
22-
print_full_program,
23-
save_candidate_program,
24-
set_signature,
25-
)
17+
from dspy.teleprompt.utils import (create_minibatch,
18+
create_n_fewshot_demo_sets,
19+
eval_candidate_program,
20+
get_program_with_highest_avg_score,
21+
get_signature, print_full_program,
22+
save_candidate_program, set_signature)
2623

2724
if TYPE_CHECKING:
2825
import optuna
@@ -60,7 +57,7 @@ def __init__(
6057
auto: Optional[Literal["light", "medium", "heavy"]] = "light",
6158
num_candidates: Optional[int] = None,
6259
num_threads: Optional[int] = None,
63-
max_errors: int = 10,
60+
max_errors: Optional[int] = None,
6461
seed: int = 9,
6562
init_temperature: float = 0.5,
6663
verbose: bool = False,
@@ -116,20 +113,28 @@ def compile(
116113
requires_permission_to_run: bool = True,
117114
provide_traceback: Optional[bool] = None,
118115
) -> Any:
119-
116+
effective_max_errors = (
117+
self.max_errors
118+
if self.max_errors is not None
119+
else dspy.settings.max_errors
120+
)
120121
zeroshot_opt = (self.max_bootstrapped_demos == 0) and (self.max_labeled_demos == 0)
121122

122123
# If auto is None, and num_trials is not provided (but num_candidates is), raise an error that suggests a good num_trials value
123124
if self.auto is None and (self.num_candidates is not None and num_trials is None):
124-
raise ValueError(f"If auto is None, num_trials must also be provided. Given num_candidates={self.num_candidates}, we'd recommend setting num_trials to ~{self._set_num_trials_from_num_candidates(student, zeroshot_opt, self.num_candidates)}.")
125+
raise ValueError(
126+
f"If auto is None, num_trials must also be provided. Given num_candidates={self.num_candidates}, we'd recommend setting num_trials to ~{self._set_num_trials_from_num_candidates(student, zeroshot_opt, self.num_candidates)}."
127+
)
125128

126129
# If auto is None, and num_candidates or num_trials is None, raise an error
127130
if self.auto is None and (self.num_candidates is None or num_trials is None):
128131
raise ValueError("If auto is None, num_candidates must also be provided.")
129132

130133
# If auto is provided, and either num_candidates or num_trials is not None, raise an error
131134
if self.auto is not None and (self.num_candidates is not None or num_trials is not None):
132-
raise ValueError("If auto is not None, num_candidates and num_trials cannot be set, since they would be overrided by the auto settings. Please either set auto to None, or do not specify num_candidates and num_trials.")
135+
raise ValueError(
136+
"If auto is not None, num_candidates and num_trials cannot be set, since they would be overrided by the auto settings. Please either set auto to None, or do not specify num_candidates and num_trials."
137+
)
133138

134139
# Set random seeds
135140
seed = seed or self.seed
@@ -175,7 +180,7 @@ def compile(
175180
devset=valset,
176181
metric=self.metric,
177182
num_threads=self.num_threads,
178-
max_errors=self.max_errors,
183+
max_errors=effective_max_errors,
179184
display_table=False,
180185
display_progress=True,
181186
provide_traceback=provide_traceback,
@@ -382,7 +387,7 @@ def _get_user_confirmation(
382387
"""
383388
)
384389

385-
print(f"{user_message}\n{user_confirmation_message}\nDo you wish to continue? (y/n): ", end='', flush=True)
390+
print(f"{user_message}\n{user_confirmation_message}\nDo you wish to continue? (y/n): ", end="", flush=True)
386391

387392
# Wait for input with timeout
388393
start_time = time.time()
@@ -409,6 +414,10 @@ def _bootstrap_fewshot_examples(self, program: Any, trainset: List, seed: int, t
409414
zeroshot = self.max_bootstrapped_demos == 0 and self.max_labeled_demos == 0
410415

411416
try:
417+
effective_max_errors = (
418+
self.max_errors if self.max_errors is not None else dspy.settings.max_errors
419+
)
420+
412421
demo_candidates = create_n_fewshot_demo_sets(
413422
student=program,
414423
num_candidate_sets=self.num_fewshot_candidates,
@@ -418,7 +427,7 @@ def _bootstrap_fewshot_examples(self, program: Any, trainset: List, seed: int, t
418427
BOOTSTRAPPED_FEWSHOT_EXAMPLES_IN_CONTEXT if zeroshot else self.max_bootstrapped_demos
419428
),
420429
metric=self.metric,
421-
max_errors=self.max_errors,
430+
max_errors=effective_max_errors,
422431
teacher=teacher,
423432
teacher_settings=self.teacher_settings,
424433
seed=seed,
@@ -498,6 +507,7 @@ def _optimize_prompt_parameters(
498507
seed: int,
499508
) -> Optional[Any]:
500509
import optuna
510+
501511
# Run optimization
502512
optuna.logging.set_verbosity(optuna.logging.WARNING)
503513
logger.info("==> STEP 3: FINDING OPTIMAL PROMPT PARAMETERS <==")
@@ -507,7 +517,11 @@ def _optimize_prompt_parameters(
507517

508518
# Compute the adjusted total trials that we will run (including full evals)
509519
run_additional_full_eval_at_end = 1 if num_trials % minibatch_full_eval_steps != 0 else 0
510-
adjusted_num_trials = int((num_trials + num_trials // minibatch_full_eval_steps + 1 + run_additional_full_eval_at_end) if minibatch else num_trials)
520+
adjusted_num_trials = int(
521+
(num_trials + num_trials // minibatch_full_eval_steps + 1 + run_additional_full_eval_at_end)
522+
if minibatch
523+
else num_trials
524+
)
511525
logger.info(f"== Trial {1} / {adjusted_num_trials} - Full Evaluation of Default Program ==")
512526

513527
default_score, _ = eval_candidate_program(
@@ -610,7 +624,9 @@ def objective(trial):
610624
)
611625

612626
# If minibatch, perform full evaluation at intervals (and at the very end)
613-
if minibatch and ((trial_num % (minibatch_full_eval_steps+1) == 0) or (trial_num == (adjusted_num_trials-1))):
627+
if minibatch and (
628+
(trial_num % (minibatch_full_eval_steps + 1) == 0) or (trial_num == (adjusted_num_trials - 1))
629+
):
614630
best_score, best_program, total_eval_calls = self._perform_full_evaluation(
615631
trial_num,
616632
adjusted_num_trials,
@@ -759,6 +775,7 @@ def _select_and_insert_instructions_and_demos(
759775

760776
def _get_param_distributions(self, program, instruction_candidates, demo_candidates):
761777
from optuna.distributions import CategoricalDistribution
778+
762779
param_distributions = {}
763780

764781
for i in range(len(instruction_candidates)):
@@ -788,6 +805,7 @@ def _perform_full_evaluation(
788805
demo_candidates: List,
789806
):
790807
import optuna
808+
791809
logger.info(f"===== Trial {trial_num + 1} / {adjusted_num_trials} - Full Evaluation =====")
792810

793811
# Identify best program to evaluate fully

dspy/teleprompt/random_search.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import random
22

3+
import dspy
34
from dspy.evaluate.evaluate import Evaluate
45
from dspy.teleprompt.teleprompt import Teleprompter
56

@@ -33,7 +34,7 @@ def __init__(
3334
max_rounds=1,
3435
num_candidate_programs=16,
3536
num_threads=None,
36-
max_errors=10,
37+
max_errors=None,
3738
stop_at_score=None,
3839
metric_threshold=None,
3940
):
@@ -57,6 +58,12 @@ def compile(self, student, *, teacher=None, trainset, valset=None, restrict=None
5758
self.trainset = trainset
5859
self.valset = valset or trainset # TODO: FIXME: Note this choice.
5960

61+
effective_max_errors = (
62+
self.max_errors
63+
if self.max_errors is not None
64+
else dspy.settings.max_errors
65+
)
66+
6067
scores = []
6168
all_subscores = []
6269
score_data = []
@@ -85,7 +92,7 @@ def compile(self, student, *, teacher=None, trainset, valset=None, restrict=None
8592
max_labeled_demos=self.max_labeled_demos,
8693
teacher_settings=self.teacher_settings,
8794
max_rounds=self.max_rounds,
88-
max_errors=self.max_errors,
95+
max_errors=effective_max_errors,
8996
)
9097
program = optimizer.compile(student, teacher=teacher, trainset=trainset_copy)
9198

@@ -102,7 +109,7 @@ def compile(self, student, *, teacher=None, trainset, valset=None, restrict=None
102109
max_labeled_demos=self.max_labeled_demos,
103110
teacher_settings=self.teacher_settings,
104111
max_rounds=self.max_rounds,
105-
max_errors=self.max_errors,
112+
max_errors=effective_max_errors,
106113
)
107114

108115
program = optimizer.compile(student, teacher=teacher, trainset=trainset_copy)
@@ -111,7 +118,7 @@ def compile(self, student, *, teacher=None, trainset, valset=None, restrict=None
111118
devset=self.valset,
112119
metric=self.metric,
113120
num_threads=self.num_threads,
114-
max_errors=self.max_errors,
121+
max_errors=effective_max_errors,
115122
display_table=False,
116123
display_progress=True,
117124
)
@@ -143,7 +150,9 @@ def compile(self, student, *, teacher=None, trainset, valset=None, restrict=None
143150

144151
# To best program, attach all program candidates in decreasing average score
145152
best_program.candidate_programs = score_data
146-
best_program.candidate_programs = sorted(best_program.candidate_programs, key=lambda x: x["score"], reverse=True)
153+
best_program.candidate_programs = sorted(
154+
best_program.candidate_programs, key=lambda x: x["score"], reverse=True
155+
)
147156

148157
print(f"{len(best_program.candidate_programs)} candidate programs found.")
149158

0 commit comments

Comments
 (0)