From 03190bc6ebfc3695a9ee2d64fc8720d4461eee3a Mon Sep 17 00:00:00 2001
From: Yiwei Dai <yiwei.dai@databricks.com>
Date: Sun, 22 Jun 2025 17:09:26 -0700
Subject: [PATCH 01/13] done

---
 dspy/predict/chain_of_thought_with_hint.py | 28 ++++++++++++++++++++++
 dspy/teleprompt/bootstrap_finetune.py      | 11 +++++----
 2 files changed, 34 insertions(+), 5 deletions(-)
 create mode 100644 dspy/predict/chain_of_thought_with_hint.py

diff --git a/dspy/predict/chain_of_thought_with_hint.py b/dspy/predict/chain_of_thought_with_hint.py
new file mode 100644
index 0000000000..5b2b509148
--- /dev/null
+++ b/dspy/predict/chain_of_thought_with_hint.py
@@ -0,0 +1,28 @@
+import dspy
+
+from .predict import Module
+
+
+class ChainOfThoughtWithHint(Module):
+    def __init__(self, signature, rationale_field_type=None, **config):
+        self.signature = dspy.ensure_signature(signature)
+        self.module = dspy.ChainOfThought(signature, rationale_field_type=rationale_field_type, **config)
+
+    def forward(self, **kwargs):
+        if kwargs.get("hint"):
+            hint = f"\n\t\t(secret hint: {kwargs.pop('hint')})"
+            original_kwargs = kwargs.copy()
+
+            # Convert the first field's value to string and append the hint
+            last_key = list(self.signature.input_fields.keys())[-1]
+            kwargs[last_key] = str(kwargs[last_key]) + hint
+
+            # Run CoT then update the trace with original kwargs, i.e. without the hint.
+            with dspy.context(trace=[]):
+                pred = self.module(**kwargs)
+                this_trace = dspy.settings.trace[-1]
+
+            dspy.settings.trace.append((this_trace[0], original_kwargs, this_trace[2]))
+            return pred
+
+        return self.module(**kwargs)
diff --git a/dspy/teleprompt/bootstrap_finetune.py b/dspy/teleprompt/bootstrap_finetune.py
index 19db70432f..286f247e37 100644
--- a/dspy/teleprompt/bootstrap_finetune.py
+++ b/dspy/teleprompt/bootstrap_finetune.py
@@ -81,17 +81,18 @@ def compile(
         key_to_data = {}
         for pred_ind, pred in enumerate(student.predictors()):
             data_pred_ind = None if self.multitask else pred_ind
-            training_key = (pred.lm, data_pred_ind)
+            lm = pred.lm or settings.lm
+            training_key = (lm, data_pred_ind)
             if training_key not in key_to_data:
                 train_data, data_format = self._prepare_finetune_data(
-                    trace_data=trace_data, lm=pred.lm, pred_ind=data_pred_ind
+                    trace_data=trace_data, lm=lm, pred_ind=data_pred_ind
                 )
-                logger.info(f"Using {len(train_data)} data points for fine-tuning the model: {pred.lm.model}")
+                logger.info(f"Using {len(train_data)} data points for fine-tuning the model: {lm.model}")
                 finetune_kwargs = {
-                    "lm": pred.lm,
+                    "lm": lm,
                     "train_data": train_data,
                     "train_data_format": data_format,
-                    "train_kwargs": self.train_kwargs[pred.lm],
+                    "train_kwargs": self.train_kwargs[lm],
                 }
                 key_to_data[training_key] = finetune_kwargs
 

From 4638146c7fa57725d3cea637fd13c5b5ef4c8c71 Mon Sep 17 00:00:00 2001
From: Yiwei Dai <yiwei.dai@databricks.com>
Date: Sun, 22 Jun 2025 17:40:01 -0700
Subject: [PATCH 02/13] done

---
 dspy/predict/chain_of_thought_with_hint.py  | 28 --------
 dspy/teleprompt/bootstrap_finetune.py       |  3 +-
 tests/teleprompt/test_bootstrap_finetune.py | 77 +++++++++++++++++++++
 3 files changed, 79 insertions(+), 29 deletions(-)
 delete mode 100644 dspy/predict/chain_of_thought_with_hint.py
 create mode 100644 tests/teleprompt/test_bootstrap_finetune.py

diff --git a/dspy/predict/chain_of_thought_with_hint.py b/dspy/predict/chain_of_thought_with_hint.py
deleted file mode 100644
index 5b2b509148..0000000000
--- a/dspy/predict/chain_of_thought_with_hint.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import dspy
-
-from .predict import Module
-
-
-class ChainOfThoughtWithHint(Module):
-    def __init__(self, signature, rationale_field_type=None, **config):
-        self.signature = dspy.ensure_signature(signature)
-        self.module = dspy.ChainOfThought(signature, rationale_field_type=rationale_field_type, **config)
-
-    def forward(self, **kwargs):
-        if kwargs.get("hint"):
-            hint = f"\n\t\t(secret hint: {kwargs.pop('hint')})"
-            original_kwargs = kwargs.copy()
-
-            # Convert the first field's value to string and append the hint
-            last_key = list(self.signature.input_fields.keys())[-1]
-            kwargs[last_key] = str(kwargs[last_key]) + hint
-
-            # Run CoT then update the trace with original kwargs, i.e. without the hint.
-            with dspy.context(trace=[]):
-                pred = self.module(**kwargs)
-                this_trace = dspy.settings.trace[-1]
-
-            dspy.settings.trace.append((this_trace[0], original_kwargs, this_trace[2]))
-            return pred
-
-        return self.module(**kwargs)
diff --git a/dspy/teleprompt/bootstrap_finetune.py b/dspy/teleprompt/bootstrap_finetune.py
index 286f247e37..d41bac25b7 100644
--- a/dspy/teleprompt/bootstrap_finetune.py
+++ b/dspy/teleprompt/bootstrap_finetune.py
@@ -83,6 +83,7 @@ def compile(
             data_pred_ind = None if self.multitask else pred_ind
             lm = pred.lm or settings.lm
             training_key = (lm, data_pred_ind)
+            
             if training_key not in key_to_data:
                 train_data, data_format = self._prepare_finetune_data(
                     trace_data=trace_data, lm=lm, pred_ind=data_pred_ind
@@ -115,7 +116,7 @@ def compile(
         logger.info("Updating the student program with the fine-tuned LMs...")
         for pred_ind, pred in enumerate(student.predictors()):
             data_pred_ind = None if self.multitask else pred_ind
-            training_key = (pred.lm, data_pred_ind)
+            training_key = (pred.lm or settings.lm, data_pred_ind)
             finetuned_lm = key_to_lm[training_key]
             if isinstance(finetuned_lm, Exception):
                 raise RuntimeError(f"Finetuned LM for predictor {pred_ind} failed.") from finetuned_lm
diff --git a/tests/teleprompt/test_bootstrap_finetune.py b/tests/teleprompt/test_bootstrap_finetune.py
new file mode 100644
index 0000000000..8c7d2bd7d1
--- /dev/null
+++ b/tests/teleprompt/test_bootstrap_finetune.py
@@ -0,0 +1,77 @@
+import dspy
+from dspy import Example
+from dspy.predict import Predict
+from dspy.teleprompt import BootstrapFinetune
+from dspy.utils.dummies import DummyLM
+
+
+# Define a simple metric function for testing
+def simple_metric(example, prediction, trace=None):
+    # Simplified metric for testing: true if prediction matches expected output
+    return example.output == prediction.output
+
+
+examples = [
+    Example(input="What is the color of the sky?", output="blue").with_inputs("input"),
+    Example(input="What does the fox say?", output="Ring-ding-ding-ding-dingeringeding!"),
+]
+trainset = [examples[0]]
+
+
+def test_bootstrap_finetune_initialization():
+    # Initialize BootstrapFinetune with a dummy metric and minimal setup
+    bootstrap = BootstrapFinetune(metric=simple_metric)
+    assert bootstrap.metric == simple_metric, "Metric not correctly initialized"
+    assert bootstrap.multitask == True, "Multitask should default to True"
+
+
+class SimpleModule(dspy.Module):
+    def __init__(self, signature, lm=None):
+        super().__init__()
+        self.predictor = Predict(signature)
+        if lm:
+            self.predictor.lm = lm
+
+    def forward(self, **kwargs):
+        return self.predictor(**kwargs)
+
+
+def test_compile_with_predict_instances_no_explicit_lm():
+    """Test BootstrapFinetune compile with predictors that don't have explicit LMs."""
+    from unittest.mock import patch
+    
+    # Create student and teacher modules without explicit LMs in predictors
+    # This tests the fix: lm = pred.lm or settings.lm
+    student = SimpleModule("input -> output")
+    teacher = SimpleModule("input -> output")
+
+    # Set up LM in settings - this will be the fallback
+    lm = DummyLM(["Initial thoughts", "Finish[blue]"])
+    original_lm = dspy.settings.lm
+    dspy.settings.configure(lm=lm)
+
+
+    # Verify that the predictor doesn't have an explicit LM
+    assert student.predictor.lm is None
+
+    # Initialize BootstrapFinetune - this should work without AttributeError
+    bootstrap = BootstrapFinetune(metric=simple_metric)
+    
+    # Mock all the components that would fail without proper setup
+    with patch('dspy.teleprompt.bootstrap_finetune.all_predictors_have_lms'), \
+            patch('dspy.teleprompt.bootstrap_finetune.prepare_teacher', return_value=teacher), \
+            patch('dspy.teleprompt.bootstrap_finetune.bootstrap_trace_data', return_value=[]), \
+            patch.object(bootstrap, '_prepare_finetune_data', return_value=([], 'openai')), \
+            patch.object(bootstrap, 'finetune_lms') as mock_finetune_lms:
+        
+        # Mock the finetune_lms to return a mapping from training key to LM
+        mock_finetune_lms.return_value = {(lm, None): lm}
+        
+        # This should not raise AttributeError due to the fix
+        compiled_student = bootstrap.compile(student, teacher=teacher, trainset=trainset)
+        
+        assert compiled_student is not None, "Failed to compile student"
+        # Verify that finetune_lms was called
+        mock_finetune_lms.assert_called_once()
+        
+

From 5330fbfd9ebe20ad2584018cf339c3d6943acc42 Mon Sep 17 00:00:00 2001
From: Yiwei Dai <yiwei.dai@databricks.com>
Date: Sun, 22 Jun 2025 17:41:04 -0700
Subject: [PATCH 03/13] done

---
 tests/teleprompt/test_bootstrap_finetune.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/tests/teleprompt/test_bootstrap_finetune.py b/tests/teleprompt/test_bootstrap_finetune.py
index 8c7d2bd7d1..b4bb29e8ac 100644
--- a/tests/teleprompt/test_bootstrap_finetune.py
+++ b/tests/teleprompt/test_bootstrap_finetune.py
@@ -7,7 +7,6 @@
 
 # Define a simple metric function for testing
 def simple_metric(example, prediction, trace=None):
-    # Simplified metric for testing: true if prediction matches expected output
     return example.output == prediction.output
 
 
@@ -41,20 +40,15 @@ def test_compile_with_predict_instances_no_explicit_lm():
     from unittest.mock import patch
     
     # Create student and teacher modules without explicit LMs in predictors
-    # This tests the fix: lm = pred.lm or settings.lm
     student = SimpleModule("input -> output")
     teacher = SimpleModule("input -> output")
 
-    # Set up LM in settings - this will be the fallback
     lm = DummyLM(["Initial thoughts", "Finish[blue]"])
     original_lm = dspy.settings.lm
     dspy.settings.configure(lm=lm)
 
-
     # Verify that the predictor doesn't have an explicit LM
     assert student.predictor.lm is None
-
-    # Initialize BootstrapFinetune - this should work without AttributeError
     bootstrap = BootstrapFinetune(metric=simple_metric)
     
     # Mock all the components that would fail without proper setup
@@ -64,14 +58,12 @@ def test_compile_with_predict_instances_no_explicit_lm():
             patch.object(bootstrap, '_prepare_finetune_data', return_value=([], 'openai')), \
             patch.object(bootstrap, 'finetune_lms') as mock_finetune_lms:
         
-        # Mock the finetune_lms to return a mapping from training key to LM
         mock_finetune_lms.return_value = {(lm, None): lm}
         
         # This should not raise AttributeError due to the fix
         compiled_student = bootstrap.compile(student, teacher=teacher, trainset=trainset)
         
         assert compiled_student is not None, "Failed to compile student"
-        # Verify that finetune_lms was called
         mock_finetune_lms.assert_called_once()
         
 

From 6373e579c7a6308d0321737d26e76c4228971b33 Mon Sep 17 00:00:00 2001
From: Yiwei Dai <yiwei.dai@databricks.com>
Date: Sun, 22 Jun 2025 17:43:12 -0700
Subject: [PATCH 04/13] format

---
 dspy/teleprompt/bootstrap_finetune.py       |  2 +-
 tests/teleprompt/test_bootstrap_finetune.py | 22 ++++++++++-----------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/dspy/teleprompt/bootstrap_finetune.py b/dspy/teleprompt/bootstrap_finetune.py
index d41bac25b7..7425fff31e 100644
--- a/dspy/teleprompt/bootstrap_finetune.py
+++ b/dspy/teleprompt/bootstrap_finetune.py
@@ -83,7 +83,7 @@ def compile(
             data_pred_ind = None if self.multitask else pred_ind
             lm = pred.lm or settings.lm
             training_key = (lm, data_pred_ind)
-            
+
             if training_key not in key_to_data:
                 train_data, data_format = self._prepare_finetune_data(
                     trace_data=trace_data, lm=lm, pred_ind=data_pred_ind
diff --git a/tests/teleprompt/test_bootstrap_finetune.py b/tests/teleprompt/test_bootstrap_finetune.py
index b4bb29e8ac..5fc09ee320 100644
--- a/tests/teleprompt/test_bootstrap_finetune.py
+++ b/tests/teleprompt/test_bootstrap_finetune.py
@@ -38,7 +38,7 @@ def forward(self, **kwargs):
 def test_compile_with_predict_instances_no_explicit_lm():
     """Test BootstrapFinetune compile with predictors that don't have explicit LMs."""
     from unittest.mock import patch
-    
+
     # Create student and teacher modules without explicit LMs in predictors
     student = SimpleModule("input -> output")
     teacher = SimpleModule("input -> output")
@@ -50,20 +50,20 @@ def test_compile_with_predict_instances_no_explicit_lm():
     # Verify that the predictor doesn't have an explicit LM
     assert student.predictor.lm is None
     bootstrap = BootstrapFinetune(metric=simple_metric)
-    
+
     # Mock all the components that would fail without proper setup
-    with patch('dspy.teleprompt.bootstrap_finetune.all_predictors_have_lms'), \
-            patch('dspy.teleprompt.bootstrap_finetune.prepare_teacher', return_value=teacher), \
-            patch('dspy.teleprompt.bootstrap_finetune.bootstrap_trace_data', return_value=[]), \
-            patch.object(bootstrap, '_prepare_finetune_data', return_value=([], 'openai')), \
-            patch.object(bootstrap, 'finetune_lms') as mock_finetune_lms:
-        
+    with patch("dspy.teleprompt.bootstrap_finetune.all_predictors_have_lms"), \
+            patch("dspy.teleprompt.bootstrap_finetune.prepare_teacher", return_value=teacher), \
+            patch("dspy.teleprompt.bootstrap_finetune.bootstrap_trace_data", return_value=[]), \
+            patch.object(bootstrap, "_prepare_finetune_data", return_value=([], "openai")), \
+            patch.object(bootstrap, "finetune_lms") as mock_finetune_lms:
+
         mock_finetune_lms.return_value = {(lm, None): lm}
-        
+
         # This should not raise AttributeError due to the fix
         compiled_student = bootstrap.compile(student, teacher=teacher, trainset=trainset)
-        
+
         assert compiled_student is not None, "Failed to compile student"
         mock_finetune_lms.assert_called_once()
-        
+
 

From db80a1171ad3959ed8e1e98fb247539478ea3084 Mon Sep 17 00:00:00 2001
From: Yiwei Dai <yiwei.dai@databricks.com>
Date: Sun, 22 Jun 2025 17:53:08 -0700
Subject: [PATCH 05/13] done

---
 dspy/adapters/types/audio.py                | 2 +-
 dspy/evaluate/evaluate.py                   | 4 ++--
 dspy/primitives/base_module.py              | 8 ++++----
 dspy/utils/langchain_tool.py                | 2 +-
 tests/primitives/test_base_module.py        | 4 ++--
 tests/signatures/test_custom_types.py       | 1 -
 tests/streaming/test_streaming.py           | 4 ++--
 tests/teleprompt/test_bootstrap_finetune.py | 3 +--
 8 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/dspy/adapters/types/audio.py b/dspy/adapters/types/audio.py
index b4637a0632..db0c1776b3 100644
--- a/dspy/adapters/types/audio.py
+++ b/dspy/adapters/types/audio.py
@@ -114,7 +114,7 @@ def __repr__(self) -> str:
 def encode_audio(audio: Union[str, bytes, dict, "Audio", Any], sampling_rate: int = 16000, format: str = "wav") -> dict:
     """
     Encode audio to a dict with 'data' and 'audio_format'.
-    
+
     Accepts: local file path, URL, data URI, dict, Audio instance, numpy array, or bytes (with known format).
     """
     if isinstance(audio, dict) and "data" in audio and "audio_format" in audio:
diff --git a/dspy/evaluate/evaluate.py b/dspy/evaluate/evaluate.py
index 0fdec0b6e8..fe804d7a3d 100644
--- a/dspy/evaluate/evaluate.py
+++ b/dspy/evaluate/evaluate.py
@@ -126,9 +126,9 @@ def __call__(
 
         Returns:
             The evaluation results are returned as a dspy.EvaluationResult object containing the following attributes:
-            
+
             - score: A float percentage score (e.g., 67.30) representing overall performance
-            
+
             - results: a list of (example, prediction, score) tuples for each example in devset
         """
         metric = metric if metric is not None else self.metric
diff --git a/dspy/primitives/base_module.py b/dspy/primitives/base_module.py
index d700514265..4f50df60b9 100644
--- a/dspy/primitives/base_module.py
+++ b/dspy/primitives/base_module.py
@@ -169,10 +169,10 @@ def save(self, path, save_program=False, modules_to_serialize=None):
         - `save_program=True`: Save the whole module to a directory via cloudpickle, which contains both the state and
             architecture of the model.
 
-        If `save_program=True` and `modules_to_serialize` are provided, it will register those modules for serialization 
-        with cloudpickle's `register_pickle_by_value`. This causes cloudpickle to serialize the module by value rather 
-        than by reference, ensuring the module is fully preserved along with the saved program. This is useful 
-        when you have custom modules that need to be serialized alongside your program. If None, then no modules 
+        If `save_program=True` and `modules_to_serialize` are provided, it will register those modules for serialization
+        with cloudpickle's `register_pickle_by_value`. This causes cloudpickle to serialize the module by value rather
+        than by reference, ensuring the module is fully preserved along with the saved program. This is useful
+        when you have custom modules that need to be serialized alongside your program. If None, then no modules
         will be registered for serialization.
 
         We also save the dependency versions, so that the loaded model can check if there is a version mismatch on
diff --git a/dspy/utils/langchain_tool.py b/dspy/utils/langchain_tool.py
index 306ede8b92..fdb1eebe03 100644
--- a/dspy/utils/langchain_tool.py
+++ b/dspy/utils/langchain_tool.py
@@ -9,7 +9,7 @@
 
 def convert_langchain_tool(tool: "BaseTool") -> Tool:
     """Build a DSPy tool from a LangChain tool.
-    
+
     This function converts a LangChain tool (either created with @tool decorator
     or by subclassing BaseTool) into a DSPy Tool.
 
diff --git a/tests/primitives/test_base_module.py b/tests/primitives/test_base_module.py
index ee6255bb06..617c8c444f 100644
--- a/tests/primitives/test_base_module.py
+++ b/tests/primitives/test_base_module.py
@@ -306,8 +306,8 @@ def __call__(self, question: str) -> str:
     assert results[0].get_lm_usage() is not None
     assert results[1].get_lm_usage() is not None
 
-    assert results[0].get_lm_usage().keys() == set(["openai/gpt-4o-mini"])
-    assert results[1].get_lm_usage().keys() == set(["openai/gpt-3.5-turbo"])
+    assert results[0].get_lm_usage().keys() == {"openai/gpt-4o-mini"}
+    assert results[1].get_lm_usage().keys() == {"openai/gpt-3.5-turbo"}
 
 
 @pytest.mark.asyncio
diff --git a/tests/signatures/test_custom_types.py b/tests/signatures/test_custom_types.py
index 5393dc24d0..edd08489b6 100644
--- a/tests/signatures/test_custom_types.py
+++ b/tests/signatures/test_custom_types.py
@@ -29,7 +29,6 @@ class Container:
         class NestedType(pydantic.BaseModel):
             value: str
 
-    NestedType = Container.NestedType
     alias_sig = Signature("input: str -> output: NestedType")
     assert alias_sig.output_fields["output"].annotation == Container.NestedType
 
diff --git a/tests/streaming/test_streaming.py b/tests/streaming/test_streaming.py
index d79402d2ef..cc06fbd427 100644
--- a/tests/streaming/test_streaming.py
+++ b/tests/streaming/test_streaming.py
@@ -388,7 +388,7 @@ async def gpt_4o_mini_stream_2():
     async def completion_side_effect(*args, **kwargs):
         return stream_generators.pop(0)()  # return new async generator instance
 
-    with mock.patch("litellm.acompletion", side_effect=completion_side_effect) as mock_completion:
+    with mock.patch("litellm.acompletion", side_effect=completion_side_effect):
         program = dspy.streamify(
             MyProgram(),
             stream_listeners=[
@@ -484,7 +484,7 @@ async def gpt_4o_mini_stream_2(*args, **kwargs):
 
     with mock.patch(
         "litellm.acompletion", new_callable=AsyncMock, side_effect=[gpt_4o_mini_stream_1(), gpt_4o_mini_stream_2()]
-    ) as mock_completion:
+    ):
         program = dspy.streamify(
             MyProgram(),
             stream_listeners=[
diff --git a/tests/teleprompt/test_bootstrap_finetune.py b/tests/teleprompt/test_bootstrap_finetune.py
index 5fc09ee320..25214aae7b 100644
--- a/tests/teleprompt/test_bootstrap_finetune.py
+++ b/tests/teleprompt/test_bootstrap_finetune.py
@@ -21,7 +21,7 @@ def test_bootstrap_finetune_initialization():
     # Initialize BootstrapFinetune with a dummy metric and minimal setup
     bootstrap = BootstrapFinetune(metric=simple_metric)
     assert bootstrap.metric == simple_metric, "Metric not correctly initialized"
-    assert bootstrap.multitask == True, "Multitask should default to True"
+    assert bootstrap.multitask, "Multitask should default to True"
 
 
 class SimpleModule(dspy.Module):
@@ -44,7 +44,6 @@ def test_compile_with_predict_instances_no_explicit_lm():
     teacher = SimpleModule("input -> output")
 
     lm = DummyLM(["Initial thoughts", "Finish[blue]"])
-    original_lm = dspy.settings.lm
     dspy.settings.configure(lm=lm)
 
     # Verify that the predictor doesn't have an explicit LM

From 1e56d8a115245e32a8466cfd174e319e48defed8 Mon Sep 17 00:00:00 2001
From: Yiwei Dai <yiwei.dai@databricks.com>
Date: Mon, 23 Jun 2025 08:37:26 -0700
Subject: [PATCH 06/13] Revert specific files to commit c3962172

---
 dspy/adapters/types/audio.py          | 2 +-
 dspy/evaluate/evaluate.py             | 4 ++--
 dspy/primitives/base_module.py        | 8 ++++----
 dspy/utils/langchain_tool.py          | 2 +-
 tests/primitives/test_base_module.py  | 4 ++--
 tests/signatures/test_custom_types.py | 1 +
 tests/streaming/test_streaming.py     | 4 ++--
 7 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/dspy/adapters/types/audio.py b/dspy/adapters/types/audio.py
index db0c1776b3..b4637a0632 100644
--- a/dspy/adapters/types/audio.py
+++ b/dspy/adapters/types/audio.py
@@ -114,7 +114,7 @@ def __repr__(self) -> str:
 def encode_audio(audio: Union[str, bytes, dict, "Audio", Any], sampling_rate: int = 16000, format: str = "wav") -> dict:
     """
     Encode audio to a dict with 'data' and 'audio_format'.
-
+    
     Accepts: local file path, URL, data URI, dict, Audio instance, numpy array, or bytes (with known format).
     """
     if isinstance(audio, dict) and "data" in audio and "audio_format" in audio:
diff --git a/dspy/evaluate/evaluate.py b/dspy/evaluate/evaluate.py
index fe804d7a3d..0fdec0b6e8 100644
--- a/dspy/evaluate/evaluate.py
+++ b/dspy/evaluate/evaluate.py
@@ -126,9 +126,9 @@ def __call__(
 
         Returns:
             The evaluation results are returned as a dspy.EvaluationResult object containing the following attributes:
-
+            
             - score: A float percentage score (e.g., 67.30) representing overall performance
-
+            
             - results: a list of (example, prediction, score) tuples for each example in devset
         """
         metric = metric if metric is not None else self.metric
diff --git a/dspy/primitives/base_module.py b/dspy/primitives/base_module.py
index 4f50df60b9..d700514265 100644
--- a/dspy/primitives/base_module.py
+++ b/dspy/primitives/base_module.py
@@ -169,10 +169,10 @@ def save(self, path, save_program=False, modules_to_serialize=None):
         - `save_program=True`: Save the whole module to a directory via cloudpickle, which contains both the state and
             architecture of the model.
 
-        If `save_program=True` and `modules_to_serialize` are provided, it will register those modules for serialization
-        with cloudpickle's `register_pickle_by_value`. This causes cloudpickle to serialize the module by value rather
-        than by reference, ensuring the module is fully preserved along with the saved program. This is useful
-        when you have custom modules that need to be serialized alongside your program. If None, then no modules
+        If `save_program=True` and `modules_to_serialize` are provided, it will register those modules for serialization 
+        with cloudpickle's `register_pickle_by_value`. This causes cloudpickle to serialize the module by value rather 
+        than by reference, ensuring the module is fully preserved along with the saved program. This is useful 
+        when you have custom modules that need to be serialized alongside your program. If None, then no modules 
         will be registered for serialization.
 
         We also save the dependency versions, so that the loaded model can check if there is a version mismatch on
diff --git a/dspy/utils/langchain_tool.py b/dspy/utils/langchain_tool.py
index fdb1eebe03..306ede8b92 100644
--- a/dspy/utils/langchain_tool.py
+++ b/dspy/utils/langchain_tool.py
@@ -9,7 +9,7 @@
 
 def convert_langchain_tool(tool: "BaseTool") -> Tool:
     """Build a DSPy tool from a LangChain tool.
-
+    
     This function converts a LangChain tool (either created with @tool decorator
     or by subclassing BaseTool) into a DSPy Tool.
 
diff --git a/tests/primitives/test_base_module.py b/tests/primitives/test_base_module.py
index 617c8c444f..ee6255bb06 100644
--- a/tests/primitives/test_base_module.py
+++ b/tests/primitives/test_base_module.py
@@ -306,8 +306,8 @@ def __call__(self, question: str) -> str:
     assert results[0].get_lm_usage() is not None
     assert results[1].get_lm_usage() is not None
 
-    assert results[0].get_lm_usage().keys() == {"openai/gpt-4o-mini"}
-    assert results[1].get_lm_usage().keys() == {"openai/gpt-3.5-turbo"}
+    assert results[0].get_lm_usage().keys() == set(["openai/gpt-4o-mini"])
+    assert results[1].get_lm_usage().keys() == set(["openai/gpt-3.5-turbo"])
 
 
 @pytest.mark.asyncio
diff --git a/tests/signatures/test_custom_types.py b/tests/signatures/test_custom_types.py
index edd08489b6..5393dc24d0 100644
--- a/tests/signatures/test_custom_types.py
+++ b/tests/signatures/test_custom_types.py
@@ -29,6 +29,7 @@ class Container:
         class NestedType(pydantic.BaseModel):
             value: str
 
+    NestedType = Container.NestedType
     alias_sig = Signature("input: str -> output: NestedType")
     assert alias_sig.output_fields["output"].annotation == Container.NestedType
 
diff --git a/tests/streaming/test_streaming.py b/tests/streaming/test_streaming.py
index cc06fbd427..d79402d2ef 100644
--- a/tests/streaming/test_streaming.py
+++ b/tests/streaming/test_streaming.py
@@ -388,7 +388,7 @@ async def gpt_4o_mini_stream_2():
     async def completion_side_effect(*args, **kwargs):
         return stream_generators.pop(0)()  # return new async generator instance
 
-    with mock.patch("litellm.acompletion", side_effect=completion_side_effect):
+    with mock.patch("litellm.acompletion", side_effect=completion_side_effect) as mock_completion:
         program = dspy.streamify(
             MyProgram(),
             stream_listeners=[
@@ -484,7 +484,7 @@ async def gpt_4o_mini_stream_2(*args, **kwargs):
 
     with mock.patch(
         "litellm.acompletion", new_callable=AsyncMock, side_effect=[gpt_4o_mini_stream_1(), gpt_4o_mini_stream_2()]
-    ):
+    ) as mock_completion:
         program = dspy.streamify(
             MyProgram(),
             stream_listeners=[

From e7e0039c4c6434d0905e04c0bf7ad1cd7e14d202 Mon Sep 17 00:00:00 2001
From: Yiwei Dai <yiwei.dai@databricks.com>
Date: Thu, 26 Jun 2025 09:38:33 -0700
Subject: [PATCH 07/13] fix set_lm

---
 docs/docs/index.md                            |  3 +-
 docs/docs/learn/optimization/optimizers.md    |  3 +-
 .../classification_finetuning/index.ipynb     |  4 +-
 dspy/teleprompt/bootstrap_finetune.py         | 17 +++--
 tests/teleprompt/test_bootstrap_finetune.py   | 62 +++++++++----------
 5 files changed, 47 insertions(+), 42 deletions(-)

diff --git a/docs/docs/index.md b/docs/docs/index.md
index 4c5ff98c8e..70ef1deb4c 100644
--- a/docs/docs/index.md
+++ b/docs/docs/index.md
@@ -403,11 +403,12 @@ Given a few tens or hundreds of representative _inputs_ of your task and a _metr
 
         ```python linenums="1"
         import dspy
-        dspy.configure(lm=dspy.LM("openai/gpt-4o-mini-2024-07-18"))
+        lm=dspy.LM('openai/gpt-4o-mini-2024-07-18')
 
         # Define the DSPy module for classification. It will use the hint at training time, if available.
         signature = dspy.Signature("text, hint -> label").with_updated_fields("label", type_=Literal[tuple(CLASSES)])
         classify = dspy.ChainOfThought(signature)
+        classify.set_lm(lm)
 
         # Optimize via BootstrapFinetune.
         optimizer = dspy.BootstrapFinetune(metric=(lambda x, y, trace=None: x.label == y.label), num_threads=24)
diff --git a/docs/docs/learn/optimization/optimizers.md b/docs/docs/learn/optimization/optimizers.md
index cb428c8155..ff6cbacbc7 100644
--- a/docs/docs/learn/optimization/optimizers.md
+++ b/docs/docs/learn/optimization/optimizers.md
@@ -176,11 +176,12 @@ optimized_program = teleprompter.compile(YOUR_PROGRAM_HERE, trainset=YOUR_TRAINS
 
         ```python linenums="1"
         import dspy
-        dspy.configure(lm=dspy.LM('openai/gpt-4o-mini-2024-07-18'))
+        lm=dspy.LM('openai/gpt-4o-mini-2024-07-18')
 
         # Define the DSPy module for classification. It will use the hint at training time, if available.
         signature = dspy.Signature("text, hint -> label").with_updated_fields('label', type_=Literal[tuple(CLASSES)])
         classify = dspy.ChainOfThought(signature)
+        classify.set_lm(lm)
 
         # Optimize via BootstrapFinetune.
         optimizer = dspy.BootstrapFinetune(metric=(lambda x, y, trace=None: x.label == y.label), num_threads=24)
diff --git a/docs/docs/tutorials/classification_finetuning/index.ipynb b/docs/docs/tutorials/classification_finetuning/index.ipynb
index 80f949a43f..95f0e15ac8 100644
--- a/docs/docs/tutorials/classification_finetuning/index.ipynb
+++ b/docs/docs/tutorials/classification_finetuning/index.ipynb
@@ -86,7 +86,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -97,7 +97,7 @@
     "\n",
     "# Load the Banking77 dataset.\n",
     "CLASSES = load_dataset(\"PolyAI/banking77\", split=\"train\", trust_remote_code=True).features['label'].names\n",
-    "kwargs = dict(fields=(\"text\", \"label\"), input_keys=(\"text\",), split=\"train\", trust_remote_code=True)\n",
+    "kwargs = dict(fields=(\"text\", \"label\"), input_keys=(\"text\",\"hint\"), split=\"train\", trust_remote_code=True)\n",
     "\n",
     "# Load the first 2000 examples from the dataset, and assign a hint to each *training* example.\n",
     "raw_data = [\n",
diff --git a/dspy/teleprompt/bootstrap_finetune.py b/dspy/teleprompt/bootstrap_finetune.py
index 7425fff31e..ea766b7b54 100644
--- a/dspy/teleprompt/bootstrap_finetune.py
+++ b/dspy/teleprompt/bootstrap_finetune.py
@@ -81,19 +81,24 @@ def compile(
         key_to_data = {}
         for pred_ind, pred in enumerate(student.predictors()):
             data_pred_ind = None if self.multitask else pred_ind
-            lm = pred.lm or settings.lm
-            training_key = (lm, data_pred_ind)
+            if pred.lm is None:
+                raise ValueError(
+                    f"Predictor {pred_ind} does not have an LM assigned. "
+                    f"Please ensure the module's predictors have their LM set before fine-tuning. "
+                    f"You can set it using: your_module.set_lm(your_lm)"
+                )
+            training_key = (pred.lm, data_pred_ind)
 
             if training_key not in key_to_data:
                 train_data, data_format = self._prepare_finetune_data(
-                    trace_data=trace_data, lm=lm, pred_ind=data_pred_ind
+                    trace_data=trace_data, lm=pred.lm, pred_ind=data_pred_ind
                 )
-                logger.info(f"Using {len(train_data)} data points for fine-tuning the model: {lm.model}")
+                logger.info(f"Using {len(train_data)} data points for fine-tuning the model: {pred.lm.model}")
                 finetune_kwargs = {
-                    "lm": lm,
+                    "lm": pred.lm,
                     "train_data": train_data,
                     "train_data_format": data_format,
-                    "train_kwargs": self.train_kwargs[lm],
+                    "train_kwargs": self.train_kwargs[pred.lm],
                 }
                 key_to_data[training_key] = finetune_kwargs
 
diff --git a/tests/teleprompt/test_bootstrap_finetune.py b/tests/teleprompt/test_bootstrap_finetune.py
index 25214aae7b..7f89583ec4 100644
--- a/tests/teleprompt/test_bootstrap_finetune.py
+++ b/tests/teleprompt/test_bootstrap_finetune.py
@@ -1,3 +1,5 @@
+import pytest
+
 import dspy
 from dspy import Example
 from dspy.predict import Predict
@@ -12,57 +14,53 @@ def simple_metric(example, prediction, trace=None):
 
 examples = [
     Example(input="What is the color of the sky?", output="blue").with_inputs("input"),
-    Example(input="What does the fox say?", output="Ring-ding-ding-ding-dingeringeding!"),
+    Example(input="What does the fox say?", output="Ring-ding-ding-ding-dingeringeding!").with_inputs("input"),
 ]
 trainset = [examples[0]]
 
 
 def test_bootstrap_finetune_initialization():
-    # Initialize BootstrapFinetune with a dummy metric and minimal setup
+    """Test BootstrapFinetune initialization with various parameters."""
     bootstrap = BootstrapFinetune(metric=simple_metric)
     assert bootstrap.metric == simple_metric, "Metric not correctly initialized"
-    assert bootstrap.multitask, "Multitask should default to True"
+    assert bootstrap.multitask == True, "Multitask should default to True"
 
 
 class SimpleModule(dspy.Module):
-    def __init__(self, signature, lm=None):
+    def __init__(self, signature):
         super().__init__()
         self.predictor = Predict(signature)
-        if lm:
-            self.predictor.lm = lm
 
     def forward(self, **kwargs):
         return self.predictor(**kwargs)
 
 
-def test_compile_with_predict_instances_no_explicit_lm():
-    """Test BootstrapFinetune compile with predictors that don't have explicit LMs."""
-    from unittest.mock import patch
+def test_error_handling_during_bootstrap():
+    """Test error handling during the bootstrapping process."""
+    
+    class BuggyModule(dspy.Module):
+        def __init__(self, signature):
+            super().__init__()
+            self.predictor = Predict(signature)
 
-    # Create student and teacher modules without explicit LMs in predictors
-    student = SimpleModule("input -> output")
-    teacher = SimpleModule("input -> output")
+        def forward(self, **kwargs):
+            raise RuntimeError("Simulated error")
 
-    lm = DummyLM(["Initial thoughts", "Finish[blue]"])
+    student = SimpleModule("input -> output")
+    teacher = BuggyModule("input -> output")
+
+    # Setup DummyLM to simulate an error scenario
+    lm = DummyLM(
+        [
+            {"output": "Initial thoughts"},  # Simulate initial teacher's prediction
+        ]
+    )
     dspy.settings.configure(lm=lm)
 
-    # Verify that the predictor doesn't have an explicit LM
-    assert student.predictor.lm is None
-    bootstrap = BootstrapFinetune(metric=simple_metric)
-
-    # Mock all the components that would fail without proper setup
-    with patch("dspy.teleprompt.bootstrap_finetune.all_predictors_have_lms"), \
-            patch("dspy.teleprompt.bootstrap_finetune.prepare_teacher", return_value=teacher), \
-            patch("dspy.teleprompt.bootstrap_finetune.bootstrap_trace_data", return_value=[]), \
-            patch.object(bootstrap, "_prepare_finetune_data", return_value=([], "openai")), \
-            patch.object(bootstrap, "finetune_lms") as mock_finetune_lms:
-
-        mock_finetune_lms.return_value = {(lm, None): lm}
-
-        # This should not raise AttributeError due to the fix
-        compiled_student = bootstrap.compile(student, teacher=teacher, trainset=trainset)
-
-        assert compiled_student is not None, "Failed to compile student"
-        mock_finetune_lms.assert_called_once()
-
+    bootstrap = BootstrapFinetune(
+        metric=simple_metric,
+        max_errors=1,
+    )
 
+    with pytest.raises(RuntimeError, match="Simulated error"):
+        bootstrap.compile(student, teacher=teacher, trainset=trainset)
\ No newline at end of file

From 6cd9a329f4e61c8deba678394ef6c390756b614e Mon Sep 17 00:00:00 2001
From: Yiwei Dai <yiwei.dai@databricks.com>
Date: Thu, 26 Jun 2025 09:42:26 -0700
Subject: [PATCH 08/13] fix

---
 tests/teleprompt/test_bootstrap_finetune.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/teleprompt/test_bootstrap_finetune.py b/tests/teleprompt/test_bootstrap_finetune.py
index 7f89583ec4..d8998e7b5e 100644
--- a/tests/teleprompt/test_bootstrap_finetune.py
+++ b/tests/teleprompt/test_bootstrap_finetune.py
@@ -37,7 +37,7 @@ def forward(self, **kwargs):
 
 def test_error_handling_during_bootstrap():
     """Test error handling during the bootstrapping process."""
-    
+
     class BuggyModule(dspy.Module):
         def __init__(self, signature):
             super().__init__()
@@ -63,4 +63,4 @@ def forward(self, **kwargs):
     )
 
     with pytest.raises(RuntimeError, match="Simulated error"):
-        bootstrap.compile(student, teacher=teacher, trainset=trainset)
\ No newline at end of file
+        bootstrap.compile(student, teacher=teacher, trainset=trainset)

From ac91e91df90f2f1ffabacd78cb0e62c1f081c6ff Mon Sep 17 00:00:00 2001
From: Yiwei Dai <yiwei.dai@databricks.com>
Date: Thu, 26 Jun 2025 09:56:28 -0700
Subject: [PATCH 09/13] fix testing

---
 .../classification_finetuning/index.ipynb     |  4 +--
 dspy/teleprompt/bootstrap_finetune.py         |  7 +++++-
 tests/teleprompt/test_bootstrap_finetune.py   | 25 ++++++++++++++-----
 3 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/docs/docs/tutorials/classification_finetuning/index.ipynb b/docs/docs/tutorials/classification_finetuning/index.ipynb
index 95f0e15ac8..80f949a43f 100644
--- a/docs/docs/tutorials/classification_finetuning/index.ipynb
+++ b/docs/docs/tutorials/classification_finetuning/index.ipynb
@@ -86,7 +86,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -97,7 +97,7 @@
     "\n",
     "# Load the Banking77 dataset.\n",
     "CLASSES = load_dataset(\"PolyAI/banking77\", split=\"train\", trust_remote_code=True).features['label'].names\n",
-    "kwargs = dict(fields=(\"text\", \"label\"), input_keys=(\"text\",\"hint\"), split=\"train\", trust_remote_code=True)\n",
+    "kwargs = dict(fields=(\"text\", \"label\"), input_keys=(\"text\",), split=\"train\", trust_remote_code=True)\n",
     "\n",
     "# Load the first 2000 examples from the dataset, and assign a hint to each *training* example.\n",
     "raw_data = [\n",
diff --git a/dspy/teleprompt/bootstrap_finetune.py b/dspy/teleprompt/bootstrap_finetune.py
index ea766b7b54..1184557f9e 100644
--- a/dspy/teleprompt/bootstrap_finetune.py
+++ b/dspy/teleprompt/bootstrap_finetune.py
@@ -121,7 +121,7 @@ def compile(
         logger.info("Updating the student program with the fine-tuned LMs...")
         for pred_ind, pred in enumerate(student.predictors()):
             data_pred_ind = None if self.multitask else pred_ind
-            training_key = (pred.lm or settings.lm, data_pred_ind)
+            training_key = (pred.lm, data_pred_ind)
             finetuned_lm = key_to_lm[training_key]
             if isinstance(finetuned_lm, Exception):
                 raise RuntimeError(f"Finetuned LM for predictor {pred_ind} failed.") from finetuned_lm
@@ -295,6 +295,11 @@ def wrapped_program(**kwargs):
                     )
 
                 return failed_pred, trace
+            except Exception as e:
+                # Handle other exceptions (like RuntimeError from BuggyModule)
+                trace = dspy.settings.trace.copy()
+                failed_pred = FailedPrediction(completion_text=str(e), format_reward=format_failure_score)
+                return failed_pred, trace
 
     results = evaluator(wrapped_program, metric=wrapped_metric).results
 
diff --git a/tests/teleprompt/test_bootstrap_finetune.py b/tests/teleprompt/test_bootstrap_finetune.py
index d8998e7b5e..86a664fed5 100644
--- a/tests/teleprompt/test_bootstrap_finetune.py
+++ b/tests/teleprompt/test_bootstrap_finetune.py
@@ -1,4 +1,5 @@
 import pytest
+from unittest.mock import patch
 
 import dspy
 from dspy import Example
@@ -46,9 +47,6 @@ def __init__(self, signature):
         def forward(self, **kwargs):
             raise RuntimeError("Simulated error")
 
-    student = SimpleModule("input -> output")
-    teacher = BuggyModule("input -> output")
-
     # Setup DummyLM to simulate an error scenario
     lm = DummyLM(
         [
@@ -57,10 +55,25 @@ def forward(self, **kwargs):
     )
     dspy.settings.configure(lm=lm)
 
+    student = SimpleModule("input -> output")
+    teacher = BuggyModule("input -> output")
+    
+    # Set LM for the student module
+    student.set_lm(lm)
+    teacher.set_lm(lm)
+
     bootstrap = BootstrapFinetune(
         metric=simple_metric,
-        max_errors=1,
     )
 
-    with pytest.raises(RuntimeError, match="Simulated error"):
-        bootstrap.compile(student, teacher=teacher, trainset=trainset)
+    # Mock the fine-tuning process since DummyLM doesn't support it
+    with patch.object(bootstrap, 'finetune_lms') as mock_finetune:
+        mock_finetune.return_value = {(lm, None): lm}
+        
+        # The bootstrap should complete successfully even with a buggy teacher
+        # because we now handle exceptions gracefully
+        compiled_student = bootstrap.compile(student, teacher=teacher, trainset=trainset)
+        assert compiled_student is not None, "Bootstrap should complete successfully despite teacher errors"
+        
+        # Verify that fine-tuning was attempted (but with empty data due to the failed teacher)
+        mock_finetune.assert_called_once()

From 83a6a04acedaf19f5940a4528697f40e35afb3bc Mon Sep 17 00:00:00 2001
From: Yiwei Dai <yiwei.dai@databricks.com>
Date: Thu, 26 Jun 2025 09:59:01 -0700
Subject: [PATCH 10/13] lint

---
 tests/teleprompt/test_bootstrap_finetune.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/tests/teleprompt/test_bootstrap_finetune.py b/tests/teleprompt/test_bootstrap_finetune.py
index 86a664fed5..e9cd35df7e 100644
--- a/tests/teleprompt/test_bootstrap_finetune.py
+++ b/tests/teleprompt/test_bootstrap_finetune.py
@@ -1,4 +1,3 @@
-import pytest
 from unittest.mock import patch
 
 import dspy
@@ -57,7 +56,7 @@ def forward(self, **kwargs):
 
     student = SimpleModule("input -> output")
     teacher = BuggyModule("input -> output")
-    
+
     # Set LM for the student module
     student.set_lm(lm)
     teacher.set_lm(lm)
@@ -67,13 +66,13 @@ def forward(self, **kwargs):
     )
 
     # Mock the fine-tuning process since DummyLM doesn't support it
-    with patch.object(bootstrap, 'finetune_lms') as mock_finetune:
+    with patch.object(bootstrap, "finetune_lms") as mock_finetune:
         mock_finetune.return_value = {(lm, None): lm}
-        
+
         # The bootstrap should complete successfully even with a buggy teacher
         # because we now handle exceptions gracefully
         compiled_student = bootstrap.compile(student, teacher=teacher, trainset=trainset)
         assert compiled_student is not None, "Bootstrap should complete successfully despite teacher errors"
-        
+
         # Verify that fine-tuning was attempted (but with empty data due to the failed teacher)
         mock_finetune.assert_called_once()

From e57109e53980808f68075539aebc9dd63d38e428 Mon Sep 17 00:00:00 2001
From: Yiwei Dai <yiwei.dai@databricks.com>
Date: Thu, 26 Jun 2025 10:09:27 -0700
Subject: [PATCH 11/13] link the tutorial to index sessions for clarification

---
 docs/docs/index.md                         | 2 ++
 docs/docs/learn/optimization/optimizers.md | 4 +++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/docs/docs/index.md b/docs/docs/index.md
index 70ef1deb4c..4c6507e3dd 100644
--- a/docs/docs/index.md
+++ b/docs/docs/index.md
@@ -415,6 +415,8 @@ Given a few tens or hundreds of representative _inputs_ of your task and a _metr
         optimized = optimizer.compile(classify, trainset=trainset)
 
         optimized(text="What does a pending cash withdrawal mean?")
+        
+        # For a complete fine-tuning tutorial, see: https://dspy.ai/tutorials/classification_finetuning/
         ```
 
         **Possible Output (from the last line):**
diff --git a/docs/docs/learn/optimization/optimizers.md b/docs/docs/learn/optimization/optimizers.md
index ff6cbacbc7..fb0e68e0ce 100644
--- a/docs/docs/learn/optimization/optimizers.md
+++ b/docs/docs/learn/optimization/optimizers.md
@@ -63,7 +63,7 @@ These optimizers produce optimal instructions for the prompt and, in the case of
 
 This optimizer is used to fine-tune the underlying LLM(s).
 
-7. [**`BootstrapFinetune`**](/api/optimizers/BootstrapFinetune): Distills a prompt-based DSPy program into weight updates. The output is a DSPy program that has the same steps, but where each step is conducted by a finetuned model instead of a prompted LM.
+7. [**`BootstrapFinetune`**](/api/optimizers/BootstrapFinetune): Distills a prompt-based DSPy program into weight updates. The output is a DSPy program that has the same steps, but where each step is conducted by a finetuned model instead of a prompted LM. [See the classification fine-tuning tutorial](https://dspy.ai/tutorials/classification_finetuning/) for a complete example.
 
 
 ### Program Transformations
@@ -188,6 +188,8 @@ optimized_program = teleprompter.compile(YOUR_PROGRAM_HERE, trainset=YOUR_TRAINS
         optimized = optimizer.compile(classify, trainset=trainset)
 
         optimized(text="What does a pending cash withdrawal mean?")
+        
+        # For a complete fine-tuning tutorial, see: https://dspy.ai/tutorials/classification_finetuning/
         ```
 
         **Possible Output (from the last line):**

From 5427474ae56368052bf8b435fea202f8af4b98cd Mon Sep 17 00:00:00 2001
From: Yiwei Dai <yiwei.dai@databricks.com>
Date: Sat, 5 Jul 2025 08:54:02 -0700
Subject: [PATCH 12/13] resolve comments

---
 dspy/teleprompt/bootstrap_finetune.py       |  5 --
 tests/teleprompt/test_bootstrap_finetune.py | 63 +++++++++++----------
 2 files changed, 33 insertions(+), 35 deletions(-)

diff --git a/dspy/teleprompt/bootstrap_finetune.py b/dspy/teleprompt/bootstrap_finetune.py
index 1184557f9e..97ecba79c3 100644
--- a/dspy/teleprompt/bootstrap_finetune.py
+++ b/dspy/teleprompt/bootstrap_finetune.py
@@ -295,11 +295,6 @@ def wrapped_program(**kwargs):
                     )
 
                 return failed_pred, trace
-            except Exception as e:
-                # Handle other exceptions (like RuntimeError from BuggyModule)
-                trace = dspy.settings.trace.copy()
-                failed_pred = FailedPrediction(completion_text=str(e), format_reward=format_failure_score)
-                return failed_pred, trace
 
     results = evaluator(wrapped_program, metric=wrapped_metric).results
 
diff --git a/tests/teleprompt/test_bootstrap_finetune.py b/tests/teleprompt/test_bootstrap_finetune.py
index e9cd35df7e..7d655520f2 100644
--- a/tests/teleprompt/test_bootstrap_finetune.py
+++ b/tests/teleprompt/test_bootstrap_finetune.py
@@ -35,44 +35,47 @@ def forward(self, **kwargs):
         return self.predictor(**kwargs)
 
 
-def test_error_handling_during_bootstrap():
-    """Test error handling during the bootstrapping process."""
-
-    class BuggyModule(dspy.Module):
-        def __init__(self, signature):
-            super().__init__()
-            self.predictor = Predict(signature)
-
-        def forward(self, **kwargs):
-            raise RuntimeError("Simulated error")
-
-    # Setup DummyLM to simulate an error scenario
-    lm = DummyLM(
-        [
-            {"output": "Initial thoughts"},  # Simulate initial teacher's prediction
-        ]
-    )
-    dspy.settings.configure(lm=lm)
-
+def test_compile_with_predict_instances():
+    """Test BootstrapFinetune compilation with Predict instances."""
+    # Create SimpleModule instances for student and teacher
     student = SimpleModule("input -> output")
-    teacher = BuggyModule("input -> output")
+    teacher = SimpleModule("input -> output")
+
+    lm = DummyLM([{"output": "blue"}, {"output": "Ring-ding-ding-ding-dingeringeding!"}])
+    dspy.settings.configure(lm=lm)
 
-    # Set LM for the student module
+    # Set LM for both student and teacher
     student.set_lm(lm)
     teacher.set_lm(lm)
 
-    bootstrap = BootstrapFinetune(
-        metric=simple_metric,
-    )
+    bootstrap = BootstrapFinetune(metric=simple_metric)
 
     # Mock the fine-tuning process since DummyLM doesn't support it
     with patch.object(bootstrap, "finetune_lms") as mock_finetune:
         mock_finetune.return_value = {(lm, None): lm}
-
-        # The bootstrap should complete successfully even with a buggy teacher
-        # because we now handle exceptions gracefully
         compiled_student = bootstrap.compile(student, teacher=teacher, trainset=trainset)
-        assert compiled_student is not None, "Bootstrap should complete successfully despite teacher errors"
-
-        # Verify that fine-tuning was attempted (but with empty data due to the failed teacher)
+        
+        assert compiled_student is not None, "Failed to compile student"
+        assert hasattr(compiled_student, "_compiled") and compiled_student._compiled, "Student compilation flag not set"
+    
         mock_finetune.assert_called_once()
+
+
+def test_error_handling_missing_lm():
+    """Test error handling when predictor doesn't have an LM assigned."""
+    
+    lm = DummyLM([{"output": "test"}])
+    dspy.settings.configure(lm=lm)
+
+    student = SimpleModule("input -> output")
+    # Intentionally NOT setting LM for the student module
+
+    bootstrap = BootstrapFinetune(metric=simple_metric)
+
+    # This should raise ValueError about missing LM and hint to use set_lm
+    try:
+        bootstrap.compile(student, trainset=trainset)
+        assert False, "Should have raised ValueError for missing LM"
+    except ValueError as e:
+        assert "does not have an LM assigned" in str(e)
+        assert "set_lm" in str(e)

From 2a47ab75c2b3749b0ec5da618bced896b8fee095 Mon Sep 17 00:00:00 2001
From: Yiwei Dai <yiwei.dai@databricks.com>
Date: Sat, 5 Jul 2025 09:02:28 -0700
Subject: [PATCH 13/13] format

---
 tests/signatures/test_signature.py          | 2 +-
 tests/teleprompt/test_bootstrap_finetune.py | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/signatures/test_signature.py b/tests/signatures/test_signature.py
index 2a18d5ac50..12e23b46ad 100644
--- a/tests/signatures/test_signature.py
+++ b/tests/signatures/test_signature.py
@@ -514,7 +514,7 @@ class Sig1(Signature):
         output: int | str = OutputField()
 
     class Sig2(Signature):
-        input: Optional[str] = InputField()  # noqa: UP045
+        input: str | None = InputField()
         output: Union[int, str] = OutputField()  # noqa: UP007
 
     # PEP 604 union types in class signatures should be equivalent to Optional and Union types
diff --git a/tests/teleprompt/test_bootstrap_finetune.py b/tests/teleprompt/test_bootstrap_finetune.py
index 7d655520f2..59c685902f 100644
--- a/tests/teleprompt/test_bootstrap_finetune.py
+++ b/tests/teleprompt/test_bootstrap_finetune.py
@@ -54,16 +54,16 @@ def test_compile_with_predict_instances():
     with patch.object(bootstrap, "finetune_lms") as mock_finetune:
         mock_finetune.return_value = {(lm, None): lm}
         compiled_student = bootstrap.compile(student, teacher=teacher, trainset=trainset)
-        
+
         assert compiled_student is not None, "Failed to compile student"
         assert hasattr(compiled_student, "_compiled") and compiled_student._compiled, "Student compilation flag not set"
-    
+
         mock_finetune.assert_called_once()
 
 
 def test_error_handling_missing_lm():
     """Test error handling when predictor doesn't have an LM assigned."""
-    
+
     lm = DummyLM([{"output": "test"}])
     dspy.settings.configure(lm=lm)