Skip to content

Remove user confirmation from MiproV2 #8552

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion docs/docs/api/optimizers/MIPROv2.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ gsm8k = GSM8K()
optimized_program = teleprompter.compile(
dspy.ChainOfThought("question -> answer"),
trainset=gsm8k.train,
requires_permission_to_run=False,
)

# Save optimize program for future use
Expand Down
2 changes: 0 additions & 2 deletions docs/docs/cheatsheet.md
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,6 @@ optimized_program = teleprompter.compile(
trainset=trainset,
max_bootstrapped_demos=3,
max_labeled_demos=4,
requires_permission_to_run=False,
)

# Save optimize program for future use
Expand Down Expand Up @@ -337,7 +336,6 @@ optimized_program = teleprompter.compile(
trainset=trainset,
max_bootstrapped_demos=0,
max_labeled_demos=0,
requires_permission_to_run=False,
)

# Save optimize program for future use
Expand Down
1 change: 0 additions & 1 deletion docs/docs/tutorials/entity_extraction/index.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,6 @@
" people_extractor,\n",
" trainset=train_set,\n",
" max_bootstrapped_demos=4,\n",
" requires_permission_to_run=False,\n",
" minibatch=False\n",
")"
]
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/tutorials/games/index.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -572,7 +572,7 @@
"optimizer = dspy.MIPROv2(metric=metric, auto=\"light\", num_threads=16, prompt_model=gpt4o)\n",
"\n",
"config = dict(max_bootstrapped_demos=1, max_labeled_demos=0, minibatch_size=40)\n",
"optimized_4o = optimizer.compile(agent_4o, trainset=trainset, **config, requires_permission_to_run=False)"
"optimized_4o = optimizer.compile(agent_4o, trainset=trainset, **config)"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/tutorials/math/index.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,7 @@
"kwargs = dict(num_threads=THREADS, teacher_settings=dict(lm=gpt4o), prompt_model=gpt4o_mini)\n",
"optimizer = dspy.MIPROv2(metric=dataset.metric, auto=\"medium\", **kwargs)\n",
"\n",
"kwargs = dict(requires_permission_to_run=False, max_bootstrapped_demos=4, max_labeled_demos=4)\n",
"kwargs = dict(max_bootstrapped_demos=4, max_labeled_demos=4)\n",
"optimized_module = optimizer.compile(module, trainset=dataset.train, **kwargs)"
]
},
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/tutorials/multihop_search/index.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -574,7 +574,7 @@
"models = dict(prompt_model=gpt4o, teacher_settings=dict(lm=gpt4o))\n",
"tp = dspy.MIPROv2(metric=top5_recall, auto=\"medium\", num_threads=16, **models)\n",
"\n",
"kwargs = dict(minibatch_size=40, minibatch_full_eval_steps=4, requires_permission_to_run=False)\n",
"kwargs = dict(minibatch_size=40, minibatch_full_eval_steps=4)\n",
"optimized = tp.compile(Hop(), trainset=trainset, max_bootstrapped_demos=4, max_labeled_demos=4, **kwargs)"
]
},
Expand Down
3 changes: 1 addition & 2 deletions docs/docs/tutorials/rag/index.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1143,8 +1143,7 @@
"tp = dspy.MIPROv2(metric=metric, auto=\"medium\", num_threads=24) # use fewer threads if your rate limit is small\n",
"\n",
"optimized_rag = tp.compile(RAG(), trainset=trainset,\n",
" max_bootstrapped_demos=2, max_labeled_demos=2,\n",
" requires_permission_to_run=False)"
" max_bootstrapped_demos=2, max_labeled_demos=2)"
]
},
{
Expand Down
93 changes: 1 addition & 92 deletions dspy/teleprompt/mipro_optimizer_v2.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
import logging
import random
import sys
import textwrap
import time
from collections import defaultdict
from typing import TYPE_CHECKING, Any, Callable, Literal

Expand Down Expand Up @@ -112,7 +109,7 @@ def compile(
view_data_batch_size: int = 10,
tip_aware_proposer: bool = True,
fewshot_aware_proposer: bool = True,
requires_permission_to_run: bool = True,
requires_permission_to_run: bool = True, # deprecated
Copy link
Collaborator Author

@TomeHirata TomeHirata Jul 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately, removing this argument will result in breaking the users' code that passes requires_permission_to_run=False. Do we think this breaking change is acceptable in DSPy 3? We could add **kwargs instead, but it would lead to a loss of undefined argument detection.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can introduce the wildcard kwargs and move the deprecated args there, and print a clear warning that it's deprecated.

Copy link
Collaborator Author

@TomeHirata TomeHirata Jul 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that's feasible. But I would be cautious about introducing **kwarg as users won't be able to know when they mistakenly pass an undefined argument (or typo). What do you think?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that's a solid concern! IMO it's a tradeoff between safety and unexpected user misbehavior. We do have wild card kwargs for a few methods now, e.g.,

def __call__(self, *args, **kwargs):
, so I won't worry much about it.

Copy link
Collaborator Author

@TomeHirata TomeHirata Jul 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, we have **kwargs in many modules, shall we remove the requires_permission_to_run parameter and add kwargs then? cc: @klopsahlong @omkar-sh

provide_traceback: bool | None = None,
) -> Any:
effective_max_errors = (
Expand Down Expand Up @@ -162,20 +159,6 @@ def compile(
if minibatch and minibatch_size > len(valset):
raise ValueError(f"Minibatch size cannot exceed the size of the valset. Valset size: {len(valset)}.")

# Estimate LM calls and get user confirmation
if requires_permission_to_run:
if not self._get_user_confirmation(
student,
num_trials,
minibatch,
minibatch_size,
minibatch_full_eval_steps,
valset,
program_aware_proposer,
):
logger.info("Compilation aborted by the user.")
return student # Return the original student program

# Initialize program and evaluator
program = student.deepcopy()
evaluate = Evaluate(
Expand Down Expand Up @@ -336,80 +319,6 @@ def _estimate_lm_calls(

return prompt_model_line, task_model_line

def _get_user_confirmation(
self,
program: Any,
num_trials: int,
minibatch: bool,
minibatch_size: int,
minibatch_full_eval_steps: int,
valset: list,
program_aware_proposer: bool,
) -> bool:
prompt_model_line, task_model_line = self._estimate_lm_calls(
program,
num_trials,
minibatch,
minibatch_size,
minibatch_full_eval_steps,
valset,
program_aware_proposer,
)

user_message = textwrap.dedent(
f"""\
{YELLOW}{BOLD}Projected Language Model (LM) Calls{ENDC}

Based on the parameters you have set, the maximum number of LM calls is projected as follows:

{prompt_model_line}
{task_model_line}

{YELLOW}{BOLD}Estimated Cost Calculation:{ENDC}

{YELLOW}Total Cost = (Number of calls to task model * (Avg Input Token Length per Call * Task Model Price per Input Token + Avg Output Token Length per Call * Task Model Price per Output Token)
+ (Number of program calls * (Avg Input Token Length per Call * Task Prompt Price per Input Token + Avg Output Token Length per Call * Prompt Model Price per Output Token).{ENDC}

For a preliminary estimate of potential costs, we recommend you perform your own calculations based on the task
and prompt models you intend to use. If the projected costs exceed your budget or expectations, you may consider:

{YELLOW}- Reducing the number of trials (`num_trials`), the size of the valset, or the number of LM calls in your program.{ENDC}
{YELLOW}- Using a cheaper task model to optimize the prompt.{ENDC}
{YELLOW}- Setting `minibatch=True` if you haven't already.{ENDC}\n"""
)

user_confirmation_message = textwrap.dedent(
f"""\
To proceed with the execution of this program, please confirm by typing {BLUE}'y'{ENDC} for yes or {BLUE}'n'{ENDC} for no.
If no input is received within 20 seconds, the program will proceed automatically.

If you would like to bypass this confirmation step in future executions, set the {YELLOW}`requires_permission_to_run`{ENDC} flag to {YELLOW}`False`{ENDC} when calling compile.

{YELLOW}Awaiting your input...{ENDC}
"""
)

print(f"{user_message}\n{user_confirmation_message}\nDo you wish to continue? (y/n): ", end="", flush=True)

# Wait for input with timeout
start_time = time.time()
while time.time() - start_time < 20:
if sys.platform == "win32":
import msvcrt
if msvcrt.kbhit():
user_input = msvcrt.getch().decode("utf-8").strip().lower()
print(user_input) # Echo the input
return user_input == "y"
else:
import select
if select.select([sys.stdin], [], [], 0.1)[0]:
user_input = sys.stdin.readline().strip().lower()
return user_input == "y"
time.sleep(0.1)

print("\nNo input received within 20 seconds. Proceeding with execution...")
return True

def _bootstrap_fewshot_examples(self, program: Any, trainset: list, seed: int, teacher: Any) -> list | None:
logger.info("\n==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==")
if self.max_bootstrapped_demos > 0:
Expand Down