enforce safety checker; use dummy checker in fast tests

a-r-r-o-w · a-r-r-o-w · commit 3bc4cd92ef25 · 2025-04-13T16:43:46.000+02:00
diff --git a/src/diffusers/pipelines/cosmos/pipeline_cosmos.py b/src/diffusers/pipelines/cosmos/pipeline_cosmos.py
@@ -144,7 +144,6 @@ class CosmosPipeline(DiffusionPipeline):
 
     model_cpu_offload_seq = "text_encoder->transformer->vae"
     _callback_tensor_inputs = ["latents", "prompt_embeds", "negative_prompt_embeds"]
-    _optional_components = ["safety_checker"]
 
     def __init__(
         self,
@@ -153,19 +152,12 @@ def __init__(
         transformer: CosmosTransformer3DModel,
         vae: AutoencoderKLCosmos,
         scheduler: EDMEulerScheduler,
-        safety_checker: CosmosSafetyChecker = None,
-        requires_safety_checker: bool = True,
+        safety_checker: CosmosSafetyChecker,
     ):
         super().__init__()
 
-        if requires_safety_checker and safety_checker is None:
-            safety_checker = CosmosSafetyChecker()
         if safety_checker is None:
-            logger.warning(
-                f"You have disabled the safety checker for {self.__class__} by passing `safety_checker=None`. This "
-                f"is in violation of the [NVIDIA Open Model License Agreement](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license). "
-                f"Please ensure that you are compliant with the license agreement."
-            )
+            safety_checker = CosmosSafetyChecker()
 
         self.register_modules(
             vae=vae,
@@ -175,7 +167,6 @@ def __init__(
             scheduler=scheduler,
             safety_checker=safety_checker,
         )
-        self.register_to_config(requires_safety_checker=requires_safety_checker)
 
         self.vae_scale_factor_temporal = (
             self.vae.config.temporal_compression_ratio if getattr(self, "vae", None) else 8
@@ -476,6 +467,13 @@ def __call__(
                 indicating whether the corresponding generated image contains "not-safe-for-work" (nsfw) content.
         """
 
+        if self.safety_checker is None:
+            raise ValueError(
+                f"You have disabled the safety checker for {self.__class__}. This is in violation of the "
+                "[NVIDIA Open Model License Agreement](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license). "
+                f"Please ensure that you are compliant with the license agreement."
+            )
+
         if isinstance(callback_on_step_end, (PipelineCallback, MultiPipelineCallbacks)):
             callback_on_step_end_tensor_inputs = callback_on_step_end.tensor_inputs
 
diff --git a/src/diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py b/src/diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py
@@ -187,7 +187,6 @@ class CosmosVideoToWorldPipeline(DiffusionPipeline):
 
     model_cpu_offload_seq = "text_encoder->transformer->vae"
     _callback_tensor_inputs = ["latents", "prompt_embeds", "negative_prompt_embeds"]
-    _optional_components = ["safety_checker"]
 
     def __init__(
         self,
@@ -196,19 +195,12 @@ def __init__(
         transformer: CosmosTransformer3DModel,
         vae: AutoencoderKLCosmos,
         scheduler: EDMEulerScheduler,
-        safety_checker: CosmosSafetyChecker = None,
-        requires_safety_checker: bool = True,
+        safety_checker: CosmosSafetyChecker,
     ):
         super().__init__()
 
-        if requires_safety_checker and safety_checker is None:
-            safety_checker = CosmosSafetyChecker()
         if safety_checker is None:
-            logger.warning(
-                f"You have disabled the safety checker for {self.__class__} by passing `safety_checker=None`. This "
-                f"is in violation of the [NVIDIA Open Model License Agreement](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license). "
-                f"Please ensure that you are compliant with the license agreement."
-            )
+            safety_checker = CosmosSafetyChecker()
 
         self.register_modules(
             vae=vae,
@@ -218,7 +210,6 @@ def __init__(
             scheduler=scheduler,
             safety_checker=safety_checker,
         )
-        self.register_to_config(requires_safety_checker=requires_safety_checker)
 
         self.vae_scale_factor_temporal = (
             self.vae.config.temporal_compression_ratio if getattr(self, "vae", None) else 8
@@ -591,6 +582,13 @@ def __call__(
                 indicating whether the corresponding generated image contains "not-safe-for-work" (nsfw) content.
         """
 
+        if self.safety_checker is None:
+            raise ValueError(
+                f"You have disabled the safety checker for {self.__class__}. This is in violation of the "
+                "[NVIDIA Open Model License Agreement](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license). "
+                f"Please ensure that you are compliant with the license agreement."
+            )
+
         if isinstance(callback_on_step_end, (PipelineCallback, MultiPipelineCallbacks)):
             callback_on_step_end_tensor_inputs = callback_on_step_end.tensor_inputs
 
diff --git a/tests/pipelines/cosmos/cosmos_guardrail.py b/tests/pipelines/cosmos/cosmos_guardrail.py
@@ -0,0 +1,47 @@
+# Copyright 2024 The HuggingFace Team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# ===== This file is an implementation of a dummy guardrail for the fast tests =====
+
+from typing import Union
+
+import numpy as np
+import torch
+
+from diffusers.configuration_utils import ConfigMixin
+from diffusers.models.modeling_utils import ModelMixin
+
+
+class DummyCosmosSafetyChecker(ModelMixin, ConfigMixin):
+    def __init__(self) -> None:
+        super().__init__()
+
+        self._dtype = torch.float32
+
+    def check_text_safety(self, prompt: str) -> bool:
+        return True
+
+    def check_video_safety(self, frames: np.ndarray) -> np.ndarray:
+        return frames
+
+    def to(self, device: Union[str, torch.device] = None, dtype: torch.dtype = None) -> None:
+        self._dtype = dtype
+
+    @property
+    def device(self) -> torch.device:
+        return None
+
+    @property
+    def dtype(self) -> torch.dtype:
+        return self._dtype
diff --git a/tests/pipelines/cosmos/test_cosmos.py b/tests/pipelines/cosmos/test_cosmos.py
@@ -13,6 +13,9 @@
 # limitations under the License.
 
 import inspect
+import json
+import os
+import tempfile
 import unittest
 
 import numpy as np
@@ -24,13 +27,21 @@
 
 from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
 from ..test_pipelines_common import PipelineTesterMixin, to_np
+from .cosmos_guardrail import DummyCosmosSafetyChecker
 
 
 enable_full_determinism()
 
 
+class CosmosPipelineWrapper(CosmosPipeline):
+    @staticmethod
+    def from_pretrained(*args, **kwargs):
+        kwargs["safety_checker"] = DummyCosmosSafetyChecker()
+        return CosmosPipeline.from_pretrained(*args, **kwargs)
+
+
 class CosmosPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
-    pipeline_class = CosmosPipeline
+    pipeline_class = CosmosPipelineWrapper
     params = TEXT_TO_IMAGE_PARAMS - {"cross_attention_kwargs"}
     batch_params = TEXT_TO_IMAGE_BATCH_PARAMS
     image_params = TEXT_TO_IMAGE_IMAGE_PARAMS
@@ -106,8 +117,7 @@ def get_dummy_components(self):
             "text_encoder": text_encoder,
             "tokenizer": tokenizer,
             # We cannot run the Cosmos Guardrail for fast tests due to the large model size
-            "safety_checker": None,
-            "requires_safety_checker": False,
+            "safety_checker": DummyCosmosSafetyChecker(),
         }
         return components
 
@@ -149,13 +159,6 @@ def test_inference(self):
         max_diff = np.abs(generated_video - expected_video).max()
         self.assertLessEqual(max_diff, 1e10)
 
-    def test_components_function(self):
-        init_components = self.get_dummy_components()
-        init_components = {k: v for k, v in init_components.items() if not isinstance(v, (str, int, float))}
-        pipe = self.pipeline_class(**init_components, requires_safety_checker=False)
-        self.assertTrue(hasattr(pipe, "components"))
-        self.assertTrue(set(pipe.components.keys()) == set(init_components.keys()))
-
     def test_callback_inputs(self):
         sig = inspect.signature(self.pipeline_class.__call__)
         has_callback_tensor_inputs = "callback_on_step_end_tensor_inputs" in sig.parameters
@@ -216,7 +219,7 @@ def callback_inputs_change_tensor(pipe, i, t, callback_kwargs):
         assert output.abs().sum() < 1e10
 
     def test_inference_batch_single_identical(self):
-        self._test_inference_batch_single_identical(batch_size=3, expected_max_diff=1e-3)
+        self._test_inference_batch_single_identical(batch_size=3, expected_max_diff=1e-2)
 
     def test_attention_slicing_forward_pass(
         self, test_max_difference=True, test_mean_pixel_difference=True, expected_max_diff=1e-3
@@ -282,3 +285,61 @@ def test_vae_tiling(self, expected_diff_max: float = 0.2):
             expected_diff_max,
             "VAE tiling should not affect the inference results",
         )
+
+    def test_serialization_with_variants(self):
+        components = self.get_dummy_components()
+        pipe = self.pipeline_class(**components)
+        model_components = [
+            component_name
+            for component_name, component in pipe.components.items()
+            if isinstance(component, torch.nn.Module)
+        ]
+        model_components.remove("safety_checker")
+        variant = "fp16"
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            pipe.save_pretrained(tmpdir, variant=variant, safe_serialization=False)
+
+            with open(f"{tmpdir}/model_index.json", "r") as f:
+                config = json.load(f)
+
+            for subfolder in os.listdir(tmpdir):
+                if not os.path.isfile(subfolder) and subfolder in model_components:
+                    folder_path = os.path.join(tmpdir, subfolder)
+                    is_folder = os.path.isdir(folder_path) and subfolder in config
+                    assert is_folder and any(p.split(".")[1].startswith(variant) for p in os.listdir(folder_path))
+
+    def test_torch_dtype_dict(self):
+        components = self.get_dummy_components()
+        if not components:
+            self.skipTest("No dummy components defined.")
+
+        pipe = self.pipeline_class(**components)
+
+        specified_key = next(iter(components.keys()))
+
+        with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdirname:
+            pipe.save_pretrained(tmpdirname, safe_serialization=False)
+            torch_dtype_dict = {specified_key: torch.bfloat16, "default": torch.float16}
+            loaded_pipe = self.pipeline_class.from_pretrained(
+                tmpdirname, safety_checker=DummyCosmosSafetyChecker(), torch_dtype=torch_dtype_dict
+            )
+
+        for name, component in loaded_pipe.components.items():
+            if name == "safety_checker":
+                continue
+            if isinstance(component, torch.nn.Module) and hasattr(component, "dtype"):
+                expected_dtype = torch_dtype_dict.get(name, torch_dtype_dict.get("default", torch.float32))
+                self.assertEqual(
+                    component.dtype,
+                    expected_dtype,
+                    f"Component '{name}' has dtype {component.dtype} but expected {expected_dtype}",
+                )
+
+    @unittest.skip(
+        "The pipeline should not be runnable without a safety checker. The test creates a pipeline without passing in "
+        "a safety checker, which makes the pipeline default to the actual Cosmos Guardrail. The Cosmos Guardrail is "
+        "too large and slow to run on CI."
+    )
+    def test_encode_prompt_works_in_isolation(self):
+        pass
diff --git a/tests/pipelines/cosmos/test_cosmos_video2world.py b/tests/pipelines/cosmos/test_cosmos_video2world.py
diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py