amend

vmoens · vmoens · commit a171e327a28d · 2025-07-04T16:13:55.000+01:00
diff --git a/sota-implementations/grpo/grpo_utils.py b/sota-implementations/grpo/grpo_utils.py
@@ -548,9 +548,9 @@ def make_env(cfg: DictConfig, devices: list[int] | None = None):
             AddThinkingPrompt(
                 cond=lambda td: td["reward"] <= reward_threshold
                 and td["step_count"] < max_steps,
-                role="assistant",
-                edit_last_turn=True,
-                zero_reward=True,
+                role="user",
+                edit_last_turn=False,
+                zero_reward=False,
                 undo_done=True,
             ),
         )
diff --git a/torchrl/envs/llm/datasets/ifeval.py b/torchrl/envs/llm/datasets/ifeval.py
@@ -7,12 +7,7 @@
 from typing import Any, Callable, Literal
 
 import torch
-from tensordict import (
-    NonTensorData,
-    NonTensorStack,
-    TensorClass,
-    TensorDict,
-)
+from tensordict import NonTensorData, NonTensorStack, TensorClass, TensorDict
 from torchrl._utils import logger as torchrl_logger
 from torchrl.data import Composite, NonTensor, Unbounded
 from torchrl.envs import StepCounter
@@ -72,16 +67,23 @@ def _collate_fn(batch):
     batch = torch.stack([TensorDict.from_any(_batch) for _batch in batch])
     batch.rename_key_("prompt", "query")
     # we want instruction_id_list and kwargs to be lists, but not NonTensorStacks
-    instruction_id_list = batch.get("instruction_id_list")
+    instruction_id_list = batch["instruction_id_list"]
     # instruction_id_list should be a list of lists
     instruction_id_list = NonTensorStack(
-        *[NonTensorData([item] if not isinstance(item, list) else item) for item in instruction_id_list]
+        *[
+            NonTensorData([item] if not isinstance(item, list) else item)
+            for item in instruction_id_list
+        ]
+    )
+    kwargs = batch["kwargs"]
+    kwargs = NonTensorStack(
+        *[
+            NonTensorData([item] if not isinstance(item, list) else item)
+            for item in kwargs
+        ]
     )
-    kwargs = batch.get("kwargs")
-    kwargs = NonTensorStack(*[NonTensorData([item] if not isinstance(item, dict) else item) for item in kwargs])
     batch.set("instruction_id_list", instruction_id_list)
     batch.set("kwargs", kwargs)
-    torchrl_logger.info(f"Collated batch: {batch}")
     # we don't need a tensorclass here
     return batch
     # return IFEvalData.from_tensordict(batch)