Skip to content

Fix BootstrapFinetune example in index doc and add basic tests for bootstrap_finetune. #8435

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
5 changes: 4 additions & 1 deletion docs/docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -403,17 +403,20 @@ Given a few tens or hundreds of representative _inputs_ of your task and a _metr

```python linenums="1"
import dspy
dspy.configure(lm=dspy.LM("openai/gpt-4o-mini-2024-07-18"))
lm=dspy.LM('openai/gpt-4o-mini-2024-07-18')

# Define the DSPy module for classification. It will use the hint at training time, if available.
signature = dspy.Signature("text, hint -> label").with_updated_fields("label", type_=Literal[tuple(CLASSES)])
classify = dspy.ChainOfThought(signature)
classify.set_lm(lm)

# Optimize via BootstrapFinetune.
optimizer = dspy.BootstrapFinetune(metric=(lambda x, y, trace=None: x.label == y.label), num_threads=24)
optimized = optimizer.compile(classify, trainset=trainset)

optimized(text="What does a pending cash withdrawal mean?")

# For a complete fine-tuning tutorial, see: https://dspy.ai/tutorials/classification_finetuning/
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@chenmoneygithub let me know if there are better way refer to another doc page.

```

**Possible Output (from the last line):**
Expand Down
10 changes: 7 additions & 3 deletions docs/docs/learn/optimization/optimizers.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,17 +58,18 @@ These optimizers produce optimal instructions for the prompt and, in the case of

6. [**`MIPROv2`**](../../api/optimizers/MIPROv2.md): Generates instructions *and* few-shot examples in each step. The instruction generation is data-aware and demonstration-aware. Uses Bayesian Optimization to effectively search over the space of generation instructions/demonstrations across your modules.

7. [**`SIMBA`**](../../api/optimizers/SIMBA.md)

### Automatic Finetuning

This optimizer is used to fine-tune the underlying LLM(s).

7. [**`BootstrapFinetune`**](../../api/optimizers/BootstrapFinetune.md): Distills a prompt-based DSPy program into weight updates. The output is a DSPy program that has the same steps, but where each step is conducted by a finetuned model instead of a prompted LM.
8. [**`BootstrapFinetune`**](/api/optimizers/BootstrapFinetune): Distills a prompt-based DSPy program into weight updates. The output is a DSPy program that has the same steps, but where each step is conducted by a finetuned model instead of a prompted LM. [See the classification fine-tuning tutorial](https://dspy.ai/tutorials/classification_finetuning/) for a complete example.


### Program Transformations

8. [**`Ensemble`**](../../api/optimizers/Ensemble.md): Ensembles a set of DSPy programs and either uses the full set or randomly samples a subset into a single program.
9. [**`Ensemble`**](../../api/optimizers/Ensemble.md): Ensembles a set of DSPy programs and either uses the full set or randomly samples a subset into a single program.


## Which optimizer should I use?
Expand Down Expand Up @@ -176,17 +177,20 @@ optimized_program = teleprompter.compile(YOUR_PROGRAM_HERE, trainset=YOUR_TRAINS

```python linenums="1"
import dspy
dspy.configure(lm=dspy.LM('openai/gpt-4o-mini-2024-07-18'))
lm=dspy.LM('openai/gpt-4o-mini-2024-07-18')

# Define the DSPy module for classification. It will use the hint at training time, if available.
signature = dspy.Signature("text, hint -> label").with_updated_fields('label', type_=Literal[tuple(CLASSES)])
classify = dspy.ChainOfThought(signature)
classify.set_lm(lm)

# Optimize via BootstrapFinetune.
optimizer = dspy.BootstrapFinetune(metric=(lambda x, y, trace=None: x.label == y.label), num_threads=24)
optimized = optimizer.compile(classify, trainset=trainset)

optimized(text="What does a pending cash withdrawal mean?")

# For a complete fine-tuning tutorial, see: https://dspy.ai/tutorials/classification_finetuning/
```

**Possible Output (from the last line):**
Expand Down
7 changes: 7 additions & 0 deletions dspy/teleprompt/bootstrap_finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,14 @@ def compile(
key_to_data = {}
for pred_ind, pred in enumerate(student.predictors()):
data_pred_ind = None if self.multitask else pred_ind
if pred.lm is None:
raise ValueError(
f"Predictor {pred_ind} does not have an LM assigned. "
f"Please ensure the module's predictors have their LM set before fine-tuning. "
f"You can set it using: your_module.set_lm(your_lm)"
)
training_key = (pred.lm, data_pred_ind)

if training_key not in key_to_data:
train_data, data_format = self._prepare_finetune_data(
trace_data=trace_data, lm=pred.lm, pred_ind=data_pred_ind
Expand Down
2 changes: 1 addition & 1 deletion tests/signatures/test_signature.py
Original file line number Diff line number Diff line change
Expand Up @@ -514,7 +514,7 @@ class Sig1(Signature):
output: int | str = OutputField()

class Sig2(Signature):
input: Optional[str] = InputField() # noqa: UP045
input: str | None = InputField()
output: Union[int, str] = OutputField() # noqa: UP007

# PEP 604 union types in class signatures should be equivalent to Optional and Union types
Expand Down
81 changes: 81 additions & 0 deletions tests/teleprompt/test_bootstrap_finetune.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
from unittest.mock import patch

import dspy
from dspy import Example
from dspy.predict import Predict
from dspy.teleprompt import BootstrapFinetune
from dspy.utils.dummies import DummyLM


# Define a simple metric function for testing
def simple_metric(example, prediction, trace=None):
return example.output == prediction.output


examples = [
Example(input="What is the color of the sky?", output="blue").with_inputs("input"),
Example(input="What does the fox say?", output="Ring-ding-ding-ding-dingeringeding!").with_inputs("input"),
]
trainset = [examples[0]]


def test_bootstrap_finetune_initialization():
"""Test BootstrapFinetune initialization with various parameters."""
bootstrap = BootstrapFinetune(metric=simple_metric)
assert bootstrap.metric == simple_metric, "Metric not correctly initialized"
assert bootstrap.multitask == True, "Multitask should default to True"


class SimpleModule(dspy.Module):
def __init__(self, signature):
super().__init__()
self.predictor = Predict(signature)

def forward(self, **kwargs):
return self.predictor(**kwargs)


def test_compile_with_predict_instances():
"""Test BootstrapFinetune compilation with Predict instances."""
# Create SimpleModule instances for student and teacher
student = SimpleModule("input -> output")
teacher = SimpleModule("input -> output")

lm = DummyLM([{"output": "blue"}, {"output": "Ring-ding-ding-ding-dingeringeding!"}])
dspy.settings.configure(lm=lm)

# Set LM for both student and teacher
student.set_lm(lm)
teacher.set_lm(lm)

bootstrap = BootstrapFinetune(metric=simple_metric)

# Mock the fine-tuning process since DummyLM doesn't support it
with patch.object(bootstrap, "finetune_lms") as mock_finetune:
mock_finetune.return_value = {(lm, None): lm}
compiled_student = bootstrap.compile(student, teacher=teacher, trainset=trainset)

assert compiled_student is not None, "Failed to compile student"
assert hasattr(compiled_student, "_compiled") and compiled_student._compiled, "Student compilation flag not set"

mock_finetune.assert_called_once()


def test_error_handling_missing_lm():
"""Test error handling when predictor doesn't have an LM assigned."""

lm = DummyLM([{"output": "test"}])
dspy.settings.configure(lm=lm)

student = SimpleModule("input -> output")
# Intentionally NOT setting LM for the student module

bootstrap = BootstrapFinetune(metric=simple_metric)

# This should raise ValueError about missing LM and hint to use set_lm
try:
bootstrap.compile(student, trainset=trainset)
assert False, "Should have raised ValueError for missing LM"
except ValueError as e:
assert "does not have an LM assigned" in str(e)
assert "set_lm" in str(e)