pytorch
diff --git a/‎.github/unittest/linux/scripts/run_all.sh
Lines changed: 2 additions & 0 deletions b/‎.github/unittest/linux/scripts/run_all.sh
Lines changed: 2 additions & 0 deletions
diff --git a/‎.github/unittest/linux_optdeps/scripts/run_all.sh
Lines changed: 1 addition & 0 deletions b/‎.github/unittest/linux_optdeps/scripts/run_all.sh
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/source/reference/index.rst
Lines changed: 1 addition & 0 deletions b/‎docs/source/reference/index.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/source/reference/llms.rst
Lines changed: 113 additions & 1 deletion b/‎docs/source/reference/llms.rst
Lines changed: 113 additions & 1 deletion
diff --git a/‎test/llm/libs/test_mlgym.py
Lines changed: 101 additions & 0 deletions b/‎test/llm/libs/test_mlgym.py
Lines changed: 101 additions & 0 deletions
diff --git a/‎test/llm/mocking_classes.py
Lines changed: 68 additions & 0 deletions b/‎test/llm/mocking_classes.py
Lines changed: 68 additions & 0 deletions
diff --git a/‎test/llm/smoke_test.py
Lines changed: 18 additions & 0 deletions b/‎test/llm/smoke_test.py
Lines changed: 18 additions & 0 deletions
diff --git a/‎test/llm/smoke_test_deps.py
Lines changed: 14 additions & 0 deletions b/‎test/llm/smoke_test_deps.py
Lines changed: 14 additions & 0 deletions
@@ -208,11 +208,13 @@ pytest test/smoke_test_deps.py -v --durations 200 -k 'test_gym or test_dm_contro
 if [ "${CU_VERSION:-}" != cpu ] ; then
   python .github/unittest/helpers/coverage_run_parallel.py -m pytest test \
     --instafail --durations 200 -vv --capture no --ignore test/test_rlhf.py \
+    --ignore test/llm \
     --timeout=120 --mp_fork_if_no_cuda
 else
   python .github/unittest/helpers/coverage_run_parallel.py -m pytest test \
     --instafail --durations 200 -vv --capture no --ignore test/test_rlhf.py \
     --ignore test/test_distributed.py \
+    --ignore test/llm \
     --timeout=120 --mp_fork_if_no_cuda
 fi
 
 
@@ -159,6 +159,7 @@ export BATCHED_PIPE_TIMEOUT=60
 python .github/unittest/helpers/coverage_run_parallel.py -m pytest test \
   --instafail --durations 200 -vv --capture no --ignore test/test_rlhf.py \
   --ignore test/test_distributed.py \
+  --ignore test/llm \
   --timeout=120 --mp_fork_if_no_cuda
 
 coverage combine
 
@@ -7,6 +7,7 @@ API Reference
     collectors
     data
     envs
+    llms
     modules
     objectives
     trainers
 
@@ -1,4 +1,4 @@
-.. currentmodule:: torchrl.trainers
+.. currentmodule:: torchrl
 
 LLM interface
 =============
@@ -7,13 +7,125 @@ LLM interface
 
 TorchRL offers a set of tools for LLM post-training, as well as some examples for training or setup.
 
+Collectors
+----------
+
+TorchRL offers a specialized collector class (:class:`~torchrl.collectors.llm.LLMCollector`) that is tailored for LLM
+use cases. We also provide dedicated updaters for some inference engines.
+
+.. currentmodule:: torchrl.collectors.llm
+
+.. autosummary::
+    :toctree: generated/
+    :template: rl_template.rst
+
+    vLLMUpdater
+    LLMCollector
+
+
 Data structures
 ---------------
 
+To handle text-based data structures (such as conversations etc.), we offer a few data structures dedicated to carrying
+data for LLM post-training.
+
 .. currentmodule:: torchrl.data.llm
 
 .. autosummary::
     :toctree: generated/
     :template: rl_template.rst
 
     History
+    LLMData
+
+Environments
+------------
+
+When fine-tuning an LLM using TorchRL, the environment is a crucial component of the inference pipeline, alongside the
+policy and collector. Environments manage operations that are not handled by the LLM itself, such as interacting with
+tools, loading prompts from datasets, computing rewards (when necessary), and formatting data.
+
+The design of environments in TorchRL allows for flexibility and modularity. By framing tasks as environments, users can
+easily extend or modify existing environments using transforms. This approach enables the isolation of individual
+components within specific :class:`~torchrl.envs.EnvBase` or :class:`~torchrl.envs.Transform` subclasses, making it
+simpler to augment or alter the environment logic.
+
+Available Environment Classes and Utilities
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+TorchRL provides various environment classes and utilities for working with LLMs, including:
+
+- Various environment classes (:class:`~torchrl.envs.llm.ChatEnv`, :class:`~torchrl.envs.llm.DatasetChatEnv`,
+  :class:`~torchrl.envs.llm.GSM8KEnv`, etc.)
+- Utility functions (:class:`~torchrl.envs.make_gsm8k_env`, :class:`~torchrl.envs.make_mlgym`, etc.)
+- Transforms and other supporting classes (:class:`~torchrl.envs.KLRewardTransform`,
+  :class:`~torchrl.envs.TemplateTransform`, :class:`~torchrl.envs.Tokenizer`, etc.)
+
+These components can be used to create customized environments tailored to specific use cases and requirements.
+
+.. currentmodule:: torchrl.envs.llm
+
+.. autosummary::
+    :toctree: generated/
+    :template: rl_template.rst
+
+    ChatEnv
+    DatasetChatEnv
+    GSM8KEnv
+    make_gsm8k_env
+    GSM8KPrepareQuestion
+    GSM8KEnv
+    IFEvalEnv
+    IfEvalScorer
+    IFEvalScoreData
+    LLMEnv
+    LLMHashingEnv
+    make_mlgym
+    MLGymWrapper
+    GSM8KRewardParser
+    IfEvalScorer
+    as_nested_tensor
+    as_padded_tensor
+    DataLoadingPrimer
+    KLRewardTransform
+    TemplateTransform
+    Tokenizer
+
+Modules
+-------
+
+The :ref:`~torchrl.modules.llm` section provides a set of wrappers and utility functions for popular training and
+inference backends. The main goal of these primitives is to:
+
+- Unify the input / output data format across training and inference pipelines;
+- Unify the input / output data format across backends (to be able to use different backends across losses and
+  collectors, for instance)
+- Give appropriate tooling to construct these objects in typical RL settings (resource allocation, async execution,
+  weight update, etc.)
+
+Wrappers
+~~~~~~~~
+
+.. currentmodule:: torchrl.modules.llm
+
+.. autosummary::
+    :toctree: generated/
+    :template: rl_template.rst
+
+    TransformersWrapper
+    vLLMWrapper
+
+Utils
+~~~~~
+
+.. currentmodule:: torchrl.modules.llm
+
+.. autosummary::
+    :toctree: generated/
+    :template: rl_template.rst
+
+    CategoricalSequential
+    LLMOnDevice
+    make_vllm_worker
+    stateless_init_process_group
+    vLLMWorker
@@ -0,0 +1,101 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from __future__ import annotations
+
+import argparse
+
+from functools import partial
+
+import pytest
+
+from torchrl import logger as torchrl_logger
+from torchrl.envs import SerialEnv
+
+from torchrl.envs.llm import make_mlgym
+from torchrl.modules.llm import TransformersWrapper
+
+
+class TestMLGYM:
+    def test_mlgym_specs(self):
+        from transformers import AutoModelForCausalLM, AutoTokenizer
+
+        model_name = "Qwen/Qwen2.5-7B-Instruct"
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+        tokenizer.eos_token = "<|im_end|>"
+        policy = TransformersWrapper(
+            AutoModelForCausalLM.from_pretrained(model_name).cuda(),
+            tokenizer=tokenizer,
+            from_text=True,
+            generate=True,
+            device="cuda:0",
+            generate_kwargs={
+                # "temperature": 0.8,
+                # "repetition_penalty": 1.5,
+                "max_new_tokens": 1024
+            },
+        )
+
+        env = SerialEnv(
+            1,
+            [
+                partial(
+                    make_mlgym,
+                    task="prisonersDilemma",
+                    tokenizer=tokenizer,
+                    device="cuda:0",
+                )
+            ],
+        )
+        rollout = env.rollout(3, policy)
+        torchrl_logger.info(f"{rollout=}")
+        env.check_env_specs(break_when_any_done="both")
+
+    def test_mlgym_task_reset(self):
+        from transformers import AutoModelForCausalLM, AutoTokenizer
+
+        model_name = "Qwen/Qwen2.5-7B-Instruct"
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+        tokenizer.eos_token = "<|im_end|>"
+        policy = TransformersWrapper(
+            AutoModelForCausalLM.from_pretrained(model_name).cuda(),
+            tokenizer=tokenizer,
+            from_text=True,
+            generate=True,
+            device="cuda:0",
+            generate_kwargs={
+                # "temperature": 0.8,
+                # "repetition_penalty": 1.5,
+                "max_new_tokens": 1024
+            },
+        )
+
+        env = SerialEnv(
+            1,
+            [
+                partial(
+                    make_mlgym,
+                    tasks=[
+                        "prisonersDilemma",
+                        "regressionKaggleHousePrice",
+                        "battleOfSexes",
+                    ],
+                    tokenizer=tokenizer,
+                    device="cuda:0",
+                )
+            ],
+        )
+        # We should get at least two tasks
+        rollout = env.rollout(100, policy, break_when_any_done=False)
+        torchrl_logger.info(f"{rollout=}")
+        torchrl_logger.info(rollout["task"])
+
+    def test_mlgym_wrong_format(self):
+        # A vanilla policy will not output anything useful, yet the env should run without error
+        ...
+
+
+if __name__ == "__main__":
+    args, unknown = argparse.ArgumentParser().parse_known_args()
+    pytest.main([__file__, "--capture", "no", "--exitfirst"] + unknown)
@@ -0,0 +1,68 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from __future__ import annotations
+
+import random
+import string
+
+import torch
+
+
+class DummyStrDataLoader:
+    def __init__(self, batch_size=0):
+        if isinstance(batch_size, tuple):
+            batch_size = torch.Size(batch_size).numel()
+        self.batch_size = batch_size
+
+    def generate_random_string(self, length=10):
+        """Generate a random string of a given length."""
+        return "".join(random.choice(string.ascii_lowercase) for _ in range(length))
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        if self.batch_size == 0:
+            return {"text": self.generate_random_string()}
+        else:
+            return {
+                "text": [self.generate_random_string() for _ in range(self.batch_size)]
+            }
+
+
+class DummyTensorDataLoader:
+    def __init__(self, batch_size=0, max_length=10, padding=False):
+        if isinstance(batch_size, tuple):
+            batch_size = torch.Size(batch_size).numel()
+        self.batch_size = batch_size
+        self.max_length = max_length
+        self.padding = padding
+
+    def generate_random_tensor(self):
+        """Generate a tensor of random int64 values."""
+        length = random.randint(1, self.max_length)
+        rt = torch.randint(1, 10000, (length,))
+        return rt
+
+    def pad_tensor(self, tensor):
+        """Pad a tensor to the maximum length."""
+        padding_length = self.max_length - len(tensor)
+        return torch.cat((torch.zeros(padding_length, dtype=torch.int64), tensor))
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        if self.batch_size == 0:
+            tensor = self.generate_random_tensor()
+            tokens = self.pad_tensor(tensor) if self.padding else tensor
+        else:
+            tensors = [self.generate_random_tensor() for _ in range(self.batch_size)]
+            if self.padding:
+                tensors = [self.pad_tensor(tensor) for tensor in tensors]
+                tokens = torch.stack(tensors)
+            else:
+                tokens = tensors
+        return {"tokens": tokens, "attention_mask": tokens != 0}
@@ -0,0 +1,18 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from __future__ import annotations
+
+import argparse
+
+import pytest
+
+
+def test_import():
+    pass
+
+
+if __name__ == "__main__":
+    args, unknown = argparse.ArgumentParser().parse_known_args()
+    pytest.main([__file__, "--capture", "no", "--exitfirst"] + unknown)
@@ -0,0 +1,14 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from __future__ import annotations
+
+import argparse
+
+import pytest
+
+
+if __name__ == "__main__":
+    args, unknown = argparse.ArgumentParser().parse_known_args()
+    pytest.main([__file__, "--capture", "no", "--exitfirst"] + unknown)