Updated Cheatsheet (#8499)

vacmar01 · web-flow · commit 3fa1aba4a725 · 2025-07-06T20:29:47.000-04:00
* removed deprecated stuff from the cheatsheet.

* added SIMBA example to cheatsheet.md
diff --git a/docs/docs/cheatsheet.md b/docs/docs/cheatsheet.md
@@ -2,127 +2,10 @@
 sidebar_position: 999
 ---
 
-
-!!! warning "This page is outdated and may not be fully accurate in DSPy 2.5 and 2.6"
-
-
 # DSPy Cheatsheet
 
 This page will contain snippets for frequent usage patterns.
 
-## DSPy DataLoaders
-
-Import and initializing a DataLoader Object:
-
-```python
-import dspy
-from dspy.datasets import DataLoader
-
-dl = DataLoader()
-```
-
-### Loading from HuggingFace Datasets
-
-```python
-code_alpaca = dl.from_huggingface("HuggingFaceH4/CodeAlpaca_20K")
-```
-
-You can access the dataset of the splits by calling key of the corresponding split:
-
-```python
-train_dataset = code_alpaca['train']
-test_dataset = code_alpaca['test']
-```
-
-### Loading specific splits from HuggingFace
-
-You can also manually specify splits you want to include as a parameters and it'll return a dictionary where keys are splits that you specified:
-
-```python
-code_alpaca = dl.from_huggingface(
-    "HuggingFaceH4/CodeAlpaca_20K",
-    split = ["train", "test"],
-)
-
-print(f"Splits in dataset: {code_alpaca.keys()}")
-```
-
-If you specify a single split then dataloader will return a List of `dspy.Example` instead of dictionary:
-
-```python
-code_alpaca = dl.from_huggingface(
-    "HuggingFaceH4/CodeAlpaca_20K",
-    split = "train",
-)
-
-print(f"Number of examples in split: {len(code_alpaca)}")
-```
-
-You can slice the split just like you do with HuggingFace Dataset too:
-
-```python
-code_alpaca_80 = dl.from_huggingface(
-    "HuggingFaceH4/CodeAlpaca_20K",
-    split = "train[:80%]",
-)
-
-print(f"Number of examples in split: {len(code_alpaca_80)}")
-
-code_alpaca_20_80 = dl.from_huggingface(
-    "HuggingFaceH4/CodeAlpaca_20K",
-    split = "train[20%:80%]",
-)
-
-print(f"Number of examples in split: {len(code_alpaca_20_80)}")
-```
-
-### Loading specific subset from HuggingFace
-
-If a dataset has a subset you can pass it as an arg like you do with `load_dataset` in HuggingFace:
-
-```python
-gms8k = dl.from_huggingface(
-    "gsm8k",
-    "main",
-    input_keys = ("question",),
-)
-
-print(f"Keys present in the returned dict: {list(gms8k.keys())}")
-
-print(f"Number of examples in train set: {len(gms8k['train'])}")
-print(f"Number of examples in test set: {len(gms8k['test'])}")
-```
-
-### Loading from CSV
-
-```python
-dolly_100_dataset = dl.from_csv("dolly_subset_100_rows.csv",)
-```
-
-You can choose only selected columns from the csv by specifying them in the arguments:
-
-```python
-dolly_100_dataset = dl.from_csv(
-    "dolly_subset_100_rows.csv",
-    fields=("instruction", "context", "response"),
-    input_keys=("instruction", "context")
-)
-```
-
-### Splitting a List of `dspy.Example`
-
-```python
-splits = dl.train_test_split(dataset, train_size=0.8) # `dataset` is a List of dspy.Example
-train_dataset = splits['train']
-test_dataset = splits['test']
-```
-
-### Sampling from List of `dspy.Example`
-
-```python
-sampled_example = dl.sample(dataset, n=5) # `dataset` is a List of dspy.Example
-```
-
 ## DSPy Programs
 
 ### dspy.Signature
@@ -131,8 +14,8 @@ sampled_example = dl.sample(dataset, n=5) # `dataset` is a List of dspy.Example
 class BasicQA(dspy.Signature):
     """Answer questions with short factoid answers."""
 
-    question = dspy.InputField()
-    answer = dspy.OutputField(desc="often between 1 and 5 words")
+    question: str = dspy.InputField()
+    answer: str = dspy.OutputField(desc="often between 1 and 5 words")
 ```
 
 ### dspy.ChainOfThought
@@ -225,13 +108,13 @@ class FactJudge(dspy.Signature):
     context = dspy.InputField(desc="Context for the prediction")
     question = dspy.InputField(desc="Question to be answered")
     answer = dspy.InputField(desc="Answer for the question")
-    factually_correct = dspy.OutputField(desc="Is the answer factually correct based on the context?", prefix="Factual[Yes/No]:")
+    factually_correct: bool = dspy.OutputField(desc="Is the answer factually correct based on the context?")
 
 judge = dspy.ChainOfThought(FactJudge)
 
 def factuality_metric(example, pred):
     factual = judge(context=example.context, question=example.question, answer=pred.answer)
-    return int(factual=="Yes")
+    return factual.factually_correct
 ```
 
 ## DSPy Evaluation
@@ -367,18 +250,6 @@ copro_teleprompter = COPRO(prompt_model=model_to_generate_prompts, metric=your_d
 compiled_program_optimized_signature = copro_teleprompter.compile(your_dspy_program, trainset=trainset, eval_kwargs=eval_kwargs)
 ```
 
-### MIPRO
-
-```python
-from dspy.teleprompt import MIPRO
-
-teleprompter = MIPRO(prompt_model=model_to_generate_prompts, task_model=model_that_solves_task, metric=your_defined_metric, num_candidates=num_new_prompts_generated, init_temperature=prompt_generation_temperature)
-
-kwargs = dict(num_threads=NUM_THREADS, display_progress=True, display_table=0)
-
-compiled_program_optimized_bayesian_signature = teleprompter.compile(your_dspy_program, trainset=trainset, num_trials=100, max_bootstrapped_demos=3, max_labeled_demos=5, eval_kwargs=kwargs)
-```
-
 ### MIPROv2
 
 Note: detailed documentation can be found [here](api/optimizers/MIPROv2.md). `MIPROv2` is the latest extension of `MIPRO` which includes updates such as (1) improvements to instruction proposal and (2) more efficient search with minibatching.
@@ -445,20 +316,6 @@ print(f"Evaluate optimized program...")
 evaluate(optimized_program, devset=devset[:])
 ```
 
-### Signature Optimizer with Types
-
-```python
-from dspy.teleprompt.signature_opt_typed import optimize_signature
-from dspy.evaluate.metrics import answer_exact_match
-from dspy.functional import TypedChainOfThought
-
-compiled_program = optimize_signature(
-    student=TypedChainOfThought("question -> answer"),
-    evaluator=Evaluate(devset=devset, metric=answer_exact_match, num_threads=10, display_progress=True),
-    n_iterations=50,
-).program
-```
-
 ### KNNFewShot
 
 ```python
@@ -484,6 +341,20 @@ your_dspy_program_compiled = fewshot_optuna_optimizer.compile(student=your_dspy_
 
 Other custom configurations are similar to customizing the `dspy.BootstrapFewShot` optimizer.
 
+
+### SIMBA
+
+SIMBA, which stands for Stochastic Introspective Mini-Batch Ascent, is a prompt optimizer that accepts arbitrary DSPy programs and proceeds in a sequence of mini-batches seeking to make incremental improvements to the prompt instructions or few-shot examples.
+
+```python
+from dspy.teleprompt import SIMBA
+
+simba = SIMBA(metric=your_defined_metric, max_steps=12, max_demos=10)
+
+optimized_program = simba.compile(student=your_dspy_program, trainset=trainset)
+```
+
+
 ## DSPy `Refine` and `BestofN`
 
 >`dspy.Suggest` and `dspy.Assert` are replaced by `dspy.Refine` and `dspy.BestofN` in DSPy 2.6.