pytorch
diff --git a/‎.github/unittest/linux_libs/scripts_rlhf/environment.yml renamed to ‎.github/unittest/linux_libs/scripts_llm/environment.yml
Lines changed: 2 additions & 1 deletion b/‎.github/unittest/linux_libs/scripts_rlhf/environment.yml renamed to ‎.github/unittest/linux_libs/scripts_llm/environment.yml
Lines changed: 2 additions & 1 deletion
diff --git a/‎.github/unittest/linux_libs/scripts_rlhf/install.sh renamed to ‎.github/unittest/linux_libs/scripts_llm/install.sh
Lines changed: 18 additions & 17 deletions b/‎.github/unittest/linux_libs/scripts_rlhf/install.sh renamed to ‎.github/unittest/linux_libs/scripts_llm/install.sh
Lines changed: 18 additions & 17 deletions
diff --git a/‎.github/unittest/linux_libs/scripts_rlhf/post_process.sh renamed to ‎.github/unittest/linux_libs/scripts_llm/post_process.sh b/‎.github/unittest/linux_libs/scripts_rlhf/post_process.sh renamed to ‎.github/unittest/linux_libs/scripts_llm/post_process.sh
diff --git a/‎.github/unittest/linux_libs/scripts_rlhf/run-clang-format.py renamed to ‎.github/unittest/linux_libs/scripts_llm/run-clang-format.py b/‎.github/unittest/linux_libs/scripts_rlhf/run-clang-format.py renamed to ‎.github/unittest/linux_libs/scripts_llm/run-clang-format.py
diff --git a/‎.github/unittest/linux_libs/scripts_rlhf/run_test.sh renamed to ‎.github/unittest/linux_libs/scripts_llm/run_test.sh
Lines changed: 2 additions & 0 deletions b/‎.github/unittest/linux_libs/scripts_rlhf/run_test.sh renamed to ‎.github/unittest/linux_libs/scripts_llm/run_test.sh
Lines changed: 2 additions & 0 deletions
diff --git a/‎.github/unittest/linux_libs/scripts_rlhf/setup_env.sh renamed to ‎.github/unittest/linux_libs/scripts_llm/setup_env.sh b/‎.github/unittest/linux_libs/scripts_rlhf/setup_env.sh renamed to ‎.github/unittest/linux_libs/scripts_llm/setup_env.sh
diff --git a/‎.github/workflows/test-linux-rlhf.yml renamed to ‎.github/workflows/test-linux-llm.yml
Lines changed: 5 additions & 5 deletions b/‎.github/workflows/test-linux-rlhf.yml renamed to ‎.github/workflows/test-linux-llm.yml
Lines changed: 5 additions & 5 deletions
diff --git a/‎test/test_actors.py
Lines changed: 91 additions & 21 deletions b/‎test/test_actors.py
Lines changed: 91 additions & 21 deletions
diff --git a/‎torchrl/envs/custom/llm.py
Lines changed: 5 additions & 1 deletion b/‎torchrl/envs/custom/llm.py
Lines changed: 5 additions & 1 deletion
@@ -17,5 +17,6 @@ dependencies:
     - pyyaml
     - scipy
     - hydra-core
-    - transformers<4.42.0
+    - transformers
     - datasets
+    - vllm
@@ -26,23 +26,24 @@ fi
 # submodules
 git submodule sync && git submodule update --init --recursive
 
-printf "Installing PyTorch with cu128"
-if [[ "$TORCH_VERSION" == "nightly" ]]; then
-  if [ "${CU_VERSION:-}" == cpu ] ; then
-      pip3 install --pre torch "numpy<2.0.0" --index-url https://download.pytorch.org/whl/nightly/cpu -U
-  else
-      pip3 install --pre torch "numpy<2.0.0" --index-url https://download.pytorch.org/whl/nightly/cu128 -U
-  fi
-elif [[ "$TORCH_VERSION" == "stable" ]]; then
-    if [ "${CU_VERSION:-}" == cpu ] ; then
-      pip3 install torch "numpy<2.0.0" --index-url https://download.pytorch.org/whl/cpu
-  else
-      pip3 install torch "numpy<2.0.0" --index-url https://download.pytorch.org/whl/cu128
-  fi
-else
-  printf "Failed to install pytorch"
-  exit 1
-fi
+# We skip pytorch install due to vllm requirements
+#printf "Installing PyTorch with cu128"
+#if [[ "$TORCH_VERSION" == "nightly" ]]; then
+#  if [ "${CU_VERSION:-}" == cpu ] ; then
+#      pip3 install --pre torch "numpy<2.0.0" --index-url https://download.pytorch.org/whl/nightly/cpu -U
+#  else
+#      pip3 install --pre torch "numpy<2.0.0" --index-url https://download.pytorch.org/whl/nightly/cu128 -U
+#  fi
+#elif [[ "$TORCH_VERSION" == "stable" ]]; then
+#    if [ "${CU_VERSION:-}" == cpu ] ; then
+#      pip3 install torch "numpy<2.0.0" --index-url https://download.pytorch.org/whl/cpu
+#  else
+#      pip3 install torch "numpy<2.0.0" --index-url https://download.pytorch.org/whl/cu128
+#  fi
+#else
+#  printf "Failed to install pytorch"
+#  exit 1
+#fi
 
 # install tensordict
 if [[ "$RELEASE" == 0 ]]; then
 
@@ -24,6 +24,8 @@ python -c "import transformers, datasets"
 
 python .github/unittest/helpers/coverage_run_parallel.py -m pytest test/test_rlhf.py --instafail -v --durations 200 --capture no --error-for-skips
 
+python .github/unittest/helpers/coverage_run_parallel.py -m pytest test/test_actors.py -k llm --instafail -v --durations 200 --capture no --error-for-skips --runslow
+
 python .github/unittest/helpers/coverage_run_parallel.py examples/rlhf/train_rlhf.py \
   sys.device=cuda:0 sys.ref_device=cuda:0 \
   model.name_or_path=gpt2 train.max_epochs=2 \
 
@@ -1,4 +1,4 @@
-name: RLHF Tests on Linux
+name: LLM Tests on Linux
 
 on:
   pull_request:
@@ -50,7 +50,7 @@ jobs:
         export TF_CPP_MIN_LOG_LEVEL=0
         export TD_GET_DEFAULTS_TO_NONE=1
 
-        bash .github/unittest/linux_libs/scripts_rlhf/setup_env.sh
-        bash .github/unittest/linux_libs/scripts_rlhf/install.sh
-        bash .github/unittest/linux_libs/scripts_rlhf/run_test.sh
-        bash .github/unittest/linux_libs/scripts_rlhf/post_process.sh
+        bash .github/unittest/linux_libs/scripts_llm/setup_env.sh
+        bash .github/unittest/linux_libs/scripts_llm/install.sh
+        bash .github/unittest/linux_libs/scripts_llm/run_test.sh
+        bash .github/unittest/linux_libs/scripts_llm/post_process.sh
@@ -919,54 +919,108 @@ def test_lmhead_actorvalueoperator(device):
 
 @pytest.mark.skipif(not _has_transformers, reason="missing transformers dependencies")
 @pytest.mark.skipif(not _has_vllm, reason="missing vllm dependencies")
-class TestTransformerActor:
+class TestLLMActor:
     @pytest.mark.parametrize(
-        "from_text, generate, tokens, attention_mask",
+        "from_text, generate, return_log_probs, tokens, attention_mask",
         [
-            (True, True, None, None),
-            (True, False, None, None),
+            (True, True, True, None, None),
+            (True, True, False, None, None),
+            (True, False, None, None, None),
             (
                 False,
                 True,
+                True,
                 torch.randint(1024, (1, 10)),
                 torch.ones(1, 10, dtype=torch.int64),
             ),
-            (False, True, torch.randint(1024, (1, 10)), None),
+            (False, True, True, torch.randint(1024, (1, 10)), None),
+            (
+                False,
+                True,
+                False,
+                torch.randint(1024, (1, 10)),
+                torch.ones(1, 10, dtype=torch.int64),
+            ),
+            (False, True, False, torch.randint(1024, (1, 10)), None),
         ],
     )
-    def test_from_hf_transformers(self, from_text, generate, tokens, attention_mask):
+    def test_from_hf_transformers(
+        self, from_text, generate, return_log_probs, tokens, attention_mask
+    ):
         from transformers import AutoTokenizer, GPT2Config, GPT2LMHeadModel
 
+        model_name = "distilbert-base-uncased"  # or "minilm" or "albert-tiny"
+        # Load the model and tokenizer
+        # model = AutoModel.from_pretrained(model_name)
+        # tokenizer = AutoTokenizer.from_pretrained(model_name)
+
         tokenizer = AutoTokenizer.from_pretrained("gpt2")
-        tokenizer.pad_token = tokenizer.eos_token
         model = GPT2LMHeadModel(GPT2Config())
+
+        tokenizer.pad_token = tokenizer.eos_token
         tokenizer.padding_side = "left"
+
         m = from_hf_transformers(
-            model, tokenizer=tokenizer, from_text=from_text, generate=generate
+            model,
+            tokenizer=tokenizer,
+            from_text=from_text,
+            generate=generate,
+            return_log_probs=return_log_probs,
+        )
+        self._run_check(
+            m,
+            tokens,
+            attention_mask,
+            generate,
+            return_log_probs,
+            from_text,
+            has_logits=True,
         )
-        self._run_check(m, tokens, attention_mask, generate, from_text, has_logits=True)
 
     @pytest.mark.parametrize(
-        "from_text, generate, tokens, attention_mask",
+        "from_text, generate, return_log_probs, tokens, attention_mask",
         [
-            (True, True, None, None),
-            (True, False, None, None),
+            (True, True, True, None, None),
+            (True, True, False, None, None),
+            (True, False, None, None, None),
+            (
+                False,
+                True,
+                True,
+                torch.randint(1024, (1, 10)),
+                torch.ones(1, 10, dtype=torch.int64),
+            ),
+            (False, True, True, torch.randint(1024, (1, 10)), None),
             (
                 False,
                 True,
+                False,
                 torch.randint(1024, (1, 10)),
                 torch.ones(1, 10, dtype=torch.int64),
             ),
-            (False, True, torch.randint(1024, (1, 10)), None),
+            (False, True, False, torch.randint(1024, (1, 10)), None),
         ],
     )
-    def test_from_vllm(self, from_text, generate, tokens, attention_mask):
+    def test_from_vllm(
+        self, from_text, generate, return_log_probs, tokens, attention_mask
+    ):
         from vllm import LLM
 
         model = LLM(model="facebook/opt-125m")
-        m = from_vllm(model, from_text=from_text, generate=generate)
+        m = from_vllm(
+            model,
+            from_text=from_text,
+            generate=generate,
+            return_log_probs=return_log_probs,
+        )
         self._run_check(
-            m, tokens, attention_mask, generate, from_text, has_logits=False
+            m,
+            tokens,
+            attention_mask,
+            generate,
+            return_log_probs,
+            from_text,
+            has_logits=False,
         )
 
     def _make_data(
@@ -1007,7 +1061,16 @@ def _make_data(
             )
         return tdin
 
-    def _run_check(self, m, tokens, attention_mask, generate, from_text, has_logits):
+    def _run_check(
+        self,
+        m,
+        tokens,
+        attention_mask,
+        generate,
+        return_log_probs,
+        from_text,
+        has_logits,
+    ):
         tdin = self._make_data(
             m, tokens, attention_mask, generate, from_text, has_logits
         )
@@ -1024,13 +1087,19 @@ def _run_check(self, m, tokens, attention_mask, generate, from_text, has_logits)
         if generate and (attention_mask is not None or from_text):
             assert td.attention_mask is not None, (generate, generate, from_text)
         else:
-            assert td.attention_mask is None
+            assert td.attention_mask is None, (generate, from_text)
         if not generate:
             # logprobs are computed on text response of tokens_response
             assert td.text_response is not None or td.tokens_response is not None
             assert td.log_probs is not None
             if has_logits:
                 assert td.logits is not None
+        if generate:
+            if return_log_probs:
+                assert td.log_probs is not None
+                assert td.log_probs.shape[-2] == td.tokens_response.shape[-1]
+            else:
+                assert td.log_probs is None
 
         # Test the shapes
         assert td.tokens_response is not None, (generate, has_logits, from_text)
@@ -1042,7 +1111,7 @@ def _run_check(self, m, tokens, attention_mask, generate, from_text, has_logits)
             assert (
                 td.tokens_response[..., : td.tokens.shape[-1]]
                 != td.tokens[..., : td.tokens_response.shape[-1]]
-            ).any()
+            ).any(), (generate, from_text)
 
     @pytest.mark.parametrize(
         "from_text, tokens, attention_mask",
@@ -1060,7 +1129,9 @@ def test_from_vllm_logprobs(self, from_text, tokens, attention_mask):
         from vllm import LLM
 
         model = LLM(model="facebook/opt-125m")
-        m_generate = from_vllm(model, from_text=from_text, generate=True)
+        m_generate = from_vllm(
+            model, from_text=from_text, generate=True, return_log_probs=True
+        )
         m_logprobs = from_vllm(model, from_text=from_text, generate=False)
         self._check_lps(
             m_generate, m_logprobs, tokens, attention_mask, from_text, has_logits=False
@@ -1091,7 +1162,6 @@ def _check_lps(
             text_response=td_generate.text_response,
         )
         td_logprobs = model_logprobs(tdin_logprobs)
-        print(td_generate.log_probs / td_logprobs.log_probs)
         torch.testing.assert_close(
             td_generate.log_probs, td_logprobs.log_probs, rtol=1e-2, atol=1e-2
         )
 
@@ -143,7 +143,11 @@ def __init__(
         # self.action_key = unravel_key(action_key)
         if str2str:
             self.full_observation_spec_unbatched = Composite(
-                {self.str_key: NonTensor(example_data="a string", batched=True, shape=())}
+                {
+                    self.str_key: NonTensor(
+                        example_data="a string", batched=True, shape=()
+                    )
+                }
             )
             self.full_action_spec_unbatched = Composite(
                 {action_key: NonTensor(example_data="a string", batched=True, shape=())}