fix set_lm

Hangzhi · Hangzhi · commit e7e0039c4c64 · 2025-06-26T09:40:49.000-07:00
diff --git a/docs/docs/index.md b/docs/docs/index.md
@@ -403,11 +403,12 @@ Given a few tens or hundreds of representative _inputs_ of your task and a _metr
 
         ```python linenums="1"
         import dspy
-        dspy.configure(lm=dspy.LM("openai/gpt-4o-mini-2024-07-18"))
+        lm=dspy.LM('openai/gpt-4o-mini-2024-07-18')
 
         # Define the DSPy module for classification. It will use the hint at training time, if available.
         signature = dspy.Signature("text, hint -> label").with_updated_fields("label", type_=Literal[tuple(CLASSES)])
         classify = dspy.ChainOfThought(signature)
+        classify.set_lm(lm)
 
         # Optimize via BootstrapFinetune.
         optimizer = dspy.BootstrapFinetune(metric=(lambda x, y, trace=None: x.label == y.label), num_threads=24)
diff --git a/docs/docs/learn/optimization/optimizers.md b/docs/docs/learn/optimization/optimizers.md
@@ -176,11 +176,12 @@ optimized_program = teleprompter.compile(YOUR_PROGRAM_HERE, trainset=YOUR_TRAINS
 
         ```python linenums="1"
         import dspy
-        dspy.configure(lm=dspy.LM('openai/gpt-4o-mini-2024-07-18'))
+        lm=dspy.LM('openai/gpt-4o-mini-2024-07-18')
 
         # Define the DSPy module for classification. It will use the hint at training time, if available.
         signature = dspy.Signature("text, hint -> label").with_updated_fields('label', type_=Literal[tuple(CLASSES)])
         classify = dspy.ChainOfThought(signature)
+        classify.set_lm(lm)
 
         # Optimize via BootstrapFinetune.
         optimizer = dspy.BootstrapFinetune(metric=(lambda x, y, trace=None: x.label == y.label), num_threads=24)
diff --git a/docs/docs/tutorials/classification_finetuning/index.ipynb b/docs/docs/tutorials/classification_finetuning/index.ipynb
@@ -86,7 +86,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -97,7 +97,7 @@
     "\n",
     "# Load the Banking77 dataset.\n",
     "CLASSES = load_dataset(\"PolyAI/banking77\", split=\"train\", trust_remote_code=True).features['label'].names\n",
-    "kwargs = dict(fields=(\"text\", \"label\"), input_keys=(\"text\",), split=\"train\", trust_remote_code=True)\n",
+    "kwargs = dict(fields=(\"text\", \"label\"), input_keys=(\"text\",\"hint\"), split=\"train\", trust_remote_code=True)\n",
     "\n",
     "# Load the first 2000 examples from the dataset, and assign a hint to each *training* example.\n",
     "raw_data = [\n",
diff --git a/dspy/teleprompt/bootstrap_finetune.py b/dspy/teleprompt/bootstrap_finetune.py
@@ -81,19 +81,24 @@ def compile(
         key_to_data = {}
         for pred_ind, pred in enumerate(student.predictors()):
             data_pred_ind = None if self.multitask else pred_ind
-            lm = pred.lm or settings.lm
-            training_key = (lm, data_pred_ind)
+            if pred.lm is None:
+                raise ValueError(
+                    f"Predictor {pred_ind} does not have an LM assigned. "
+                    f"Please ensure the module's predictors have their LM set before fine-tuning. "
+                    f"You can set it using: your_module.set_lm(your_lm)"
+                )
+            training_key = (pred.lm, data_pred_ind)
 
             if training_key not in key_to_data:
                 train_data, data_format = self._prepare_finetune_data(
-                    trace_data=trace_data, lm=lm, pred_ind=data_pred_ind
+                    trace_data=trace_data, lm=pred.lm, pred_ind=data_pred_ind
                 )
-                logger.info(f"Using {len(train_data)} data points for fine-tuning the model: {lm.model}")
+                logger.info(f"Using {len(train_data)} data points for fine-tuning the model: {pred.lm.model}")
                 finetune_kwargs = {
-                    "lm": lm,
+                    "lm": pred.lm,
                     "train_data": train_data,
                     "train_data_format": data_format,
-                    "train_kwargs": self.train_kwargs[lm],
+                    "train_kwargs": self.train_kwargs[pred.lm],
                 }
                 key_to_data[training_key] = finetune_kwargs
 
diff --git a/tests/teleprompt/test_bootstrap_finetune.py b/tests/teleprompt/test_bootstrap_finetune.py
@@ -1,3 +1,5 @@
+import pytest
+
 import dspy
 from dspy import Example
 from dspy.predict import Predict
@@ -12,57 +14,53 @@ def simple_metric(example, prediction, trace=None):
 
 examples = [
     Example(input="What is the color of the sky?", output="blue").with_inputs("input"),
-    Example(input="What does the fox say?", output="Ring-ding-ding-ding-dingeringeding!"),
+    Example(input="What does the fox say?", output="Ring-ding-ding-ding-dingeringeding!").with_inputs("input"),
 ]
 trainset = [examples[0]]
 
 
 def test_bootstrap_finetune_initialization():
-    # Initialize BootstrapFinetune with a dummy metric and minimal setup
+    """Test BootstrapFinetune initialization with various parameters."""
     bootstrap = BootstrapFinetune(metric=simple_metric)
     assert bootstrap.metric == simple_metric, "Metric not correctly initialized"
-    assert bootstrap.multitask, "Multitask should default to True"
+    assert bootstrap.multitask == True, "Multitask should default to True"
 
 
 class SimpleModule(dspy.Module):
-    def __init__(self, signature, lm=None):
+    def __init__(self, signature):
         super().__init__()
         self.predictor = Predict(signature)
-        if lm:
-            self.predictor.lm = lm
 
     def forward(self, **kwargs):
         return self.predictor(**kwargs)
 
 
-def test_compile_with_predict_instances_no_explicit_lm():
-    """Test BootstrapFinetune compile with predictors that don't have explicit LMs."""
-    from unittest.mock import patch
+def test_error_handling_during_bootstrap():
+    """Test error handling during the bootstrapping process."""
+    
+    class BuggyModule(dspy.Module):
+        def __init__(self, signature):
+            super().__init__()
+            self.predictor = Predict(signature)
 
-    # Create student and teacher modules without explicit LMs in predictors
-    student = SimpleModule("input -> output")
-    teacher = SimpleModule("input -> output")
+        def forward(self, **kwargs):
+            raise RuntimeError("Simulated error")
 
-    lm = DummyLM(["Initial thoughts", "Finish[blue]"])
+    student = SimpleModule("input -> output")
+    teacher = BuggyModule("input -> output")
+
+    # Setup DummyLM to simulate an error scenario
+    lm = DummyLM(
+        [
+            {"output": "Initial thoughts"},  # Simulate initial teacher's prediction
+        ]
+    )
     dspy.settings.configure(lm=lm)
 
-    # Verify that the predictor doesn't have an explicit LM
-    assert student.predictor.lm is None
-    bootstrap = BootstrapFinetune(metric=simple_metric)
-
-    # Mock all the components that would fail without proper setup
-    with patch("dspy.teleprompt.bootstrap_finetune.all_predictors_have_lms"), \
-            patch("dspy.teleprompt.bootstrap_finetune.prepare_teacher", return_value=teacher), \
-            patch("dspy.teleprompt.bootstrap_finetune.bootstrap_trace_data", return_value=[]), \
-            patch.object(bootstrap, "_prepare_finetune_data", return_value=([], "openai")), \
-            patch.object(bootstrap, "finetune_lms") as mock_finetune_lms:
-
-        mock_finetune_lms.return_value = {(lm, None): lm}
-
-        # This should not raise AttributeError due to the fix
-        compiled_student = bootstrap.compile(student, teacher=teacher, trainset=trainset)
-
-        assert compiled_student is not None, "Failed to compile student"
-        mock_finetune_lms.assert_called_once()
-
+    bootstrap = BootstrapFinetune(
+        metric=simple_metric,
+        max_errors=1,
+    )
 
+    with pytest.raises(RuntimeError, match="Simulated error"):
+        bootstrap.compile(student, teacher=teacher, trainset=trainset)