Support receiving a list of generators in the Dream scheduler

dg845 · dg845 · commit abf7ec5d599d · 2025-08-27T01:55:01.000-07:00
diff --git a/src/diffusers/schedulers/scheduling_dream.py b/src/diffusers/schedulers/scheduling_dream.py
@@ -6,6 +6,7 @@
 
 from ..configuration_utils import ConfigMixin, register_to_config
 from ..utils import BaseOutput, logging
+from ..utils.torch_utils import multinomial_tensor, rand_tensor
 from .scheduling_utils import SchedulerMixin
 
 
@@ -75,7 +76,7 @@ def sample_tokens(
     top_k: Optional[int] = None,
     margin_confidence: bool = False,
     neg_entropy: bool = False,
-    generator: Optional[torch.Generator] = None,
+    generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
  ) -> Tuple[torch.Tensor, torch.Tensor]:
     """
     Samples from a sequence of logits of shape [..., vocab_size] and returns both the sampled sequence (as the second
@@ -91,19 +92,11 @@ def sample_tokens(
         logits = top_k_logits(logits, top_k)
 
     probs = torch.softmax(logits, dim=-1)
-    device = probs.device
-    probs_ = probs.to(generator.device) if generator is not None else probs  # handles when generator is on CPU
-    if probs_.device.type == "cpu" and probs_.dtype != torch.float32:
-        probs_ = probs_.float()  # multinomial is not implemented for cpu half precision
-    if probs.ndim > 2:
-        probs_ = probs_.reshape(-1, probs.size(-1))  # [B, L, V] --> [B * L, V]
 
     if temperature > 0:
         try:
             # Sample x0 ~ Cat(probs)
-            x0 = torch.multinomial(probs_, 1, generator=generator).to(device=device)
-            if probs.ndim > 2:
-                x0 = x0[:, 0].view(*probs.shape[:-1])  # [B * L, 1] --> [B, L]
+            x0 = multinomial_tensor(probs, 1, generator=generator, device=logits.device)
             confidence = torch.gather(probs, -1, x0.unsqueeze(-1)).squeeze(-1)  # [B, L]
         except:
             confidence, x0 = probs.max(dim=-1)
@@ -349,6 +342,7 @@ def step(
         timestep: Union[float, torch.Tensor],
         sample: torch.Tensor,
         generator: Optional[torch.Generator] = None,
+        noise: Optional[torch.Tensor] = None,
         return_dict: bool = True,
     ) -> Union[DreamMaskedDiffusionSchedulerOutput, Tuple]:
         """
@@ -364,6 +358,9 @@ def step(
                 A current instance of a sample created by the diffusion process.
             generator (`torch.Generator`, *optional*):
                 A random number generator.
+            noise (`torch.Tensor`, *optional*):
+                Allows the noise to be specified directly as an alternative to generating noise with the generator.
+                Note that this noise should drawn from the uniform distribution over [0, 1].
             return_dict (`bool`):
                 Whether or not to return a [`~schedulers.scheduling_euler_discrete.EulerDiscreteSchedulerOutput`] or
                 tuple.
@@ -396,7 +393,9 @@ def step(
 
         # TODO: mask logits (model_output) beforehand? might make it more efficient?
         if self.config.logit_sampling_alg == "origin":
-            to_unmask_mask = torch.rand(*sample.shape, generator=generator, device=sample.device) < unmask_prob
+            if noise is None:
+                noise = rand_tensor(sample.shape, generator=generator, device=sample.device)
+            to_unmask_mask = noise < unmask_prob
             confidence, pred_original_sample = sample_tokens(
                 model_output, temperature=temperature, top_p=top_p, top_k=top_k, generator=generator
             )
@@ -437,9 +436,7 @@ def step(
                 else:
                     full_confidence = full_confidence / self.config.alg_temperature
                     full_confidence = F.softmax(full_confidence, dim=-1)
-                    unmask_indices = torch.multinomial(
-                        full_confidence, num_samples=num_tokens_to_unmask, generator=generator
-                    )
+                    unmask_indices = multinomial_tensor(full_confidence, num_tokens_to_unmask, generator=generator)
                 unmask_indices = unmask_indices.to(sample.device)
 
                 row_indices = torch.arange(sample.size(0), device=sample.device).unsqueeze(1).expand_as(unmask_indices)
diff --git a/src/diffusers/utils/torch_utils.py b/src/diffusers/utils/torch_utils.py
@@ -86,6 +86,126 @@ def randn_tensor(
     return latents
 
 
+def rand_tensor(
+    shape: Union[Tuple, List],
+    generator: Optional[Union[List["torch.Generator"], "torch.Generator"]] = None,
+    device: Optional[Union[str, "torch.device"]] = None,
+    dtype: Optional["torch.dtype"] = None,
+    layout: Optional["torch.layout"] = None,
+):
+    """A helper function to create random tensors on the desired `device` with the desired `dtype`. When
+    passing a list of generators, you can seed each batch size individually. If CPU generators are passed, the tensor
+    is always created on the CPU. This is analogous to `randn_tensor`, except it creates random tensors from the
+    uniform distribution over [0, 1] using `torch.rand`.
+    """
+    # device on which tensor is created defaults to device
+    if isinstance(device, str):
+        device = torch.device(device)
+    rand_device = device
+    batch_size = shape[0]
+
+    layout = layout or torch.strided
+    device = device or torch.device("cpu")
+
+    if generator is not None:
+        gen_device_type = generator.device.type if not isinstance(generator, list) else generator[0].device.type
+        if gen_device_type != device.type and gen_device_type == "cpu":
+            rand_device = "cpu"
+            if device != "mps":
+                logger.info(
+                    f"The passed generator was created on 'cpu' even though a tensor on {device} was expected."
+                    f" Tensors will be created on 'cpu' and then moved to {device}. Note that one can probably"
+                    f" slightly speed up this function by passing a generator that was created on the {device} device."
+                )
+        elif gen_device_type != device.type and gen_device_type == "cuda":
+            raise ValueError(f"Cannot generate a {device} tensor from a generator of type {gen_device_type}.")
+
+    # make sure generator list of length 1 is treated like a non-list
+    if isinstance(generator, list) and len(generator) == 1:
+        generator = generator[0]
+
+    if isinstance(generator, list):
+        shape = (1,) + shape[1:]
+        latents = [
+            torch.rand(shape, generator=generator[i], device=rand_device, dtype=dtype, layout=layout)
+            for i in range(batch_size)
+        ]
+        latents = torch.cat(latents, dim=0).to(device)
+    else:
+        latents = torch.rand(shape, generator=generator, device=rand_device, dtype=dtype, layout=layout).to(device)
+
+    return latents
+
+
+def multinomial_tensor(
+    logits: torch.Tensor,
+    num_samples: int,
+    replacement: bool = False,
+    generator: Optional[Union[List["torch.Generator"], "torch.Generator"]] = None,
+    device: Optional[Union[str, "torch.device"]] = None,
+    squeeze_trailing_dim: bool = True,
+):
+    """
+    Creates a tensor drawn from the multinomial distribution specified by the (possibly unnormalized) probabilities
+    given by `logits`. This is to analogous to `randn_tensor`, wrapping `torch.multinomial` rather than `torch.randn`.
+
+    In general, if `logits` has shape [..., num_categories], where the ... represents leading batch dimensions, the
+    output will have shape [..., num_samples]. `logits` is assumed to have at least one leading batch dimension.
+    """
+    batch_size = logits.shape[0]
+    num_cats = logits.shape[-1]
+
+    device = device or torch.device("cpu")
+
+    if generator is not None:
+        gen_device = generator.device if not isinstance(generator, list) else generator[0].device
+        gen_device_type = gen_device.type
+        if gen_device_type != device.type and gen_device_type == "cpu":
+            if device != "mps":
+                logger.info(
+                    f"The passed generator was created on 'cpu' even though a tensor on {device} was expected."
+                    f" Tensors will be created on 'cpu' and then moved to {device}. Note that one can probably"
+                    f" slightly speed up this function by passing a generator that was created on the {device} device."
+                )
+        elif gen_device_type != device.type and gen_device_type == "cuda":
+            raise ValueError(f"Cannot generate a {device} tensor from a generator of type {gen_device_type}.")
+
+    # make sure generator list of length 1 is treated like a non-list
+    if isinstance(generator, list) and len(generator) == 1:
+        generator = generator[0]
+
+    # Handle the case where generator is on CPU
+    logits_ = logits.to(gen_device) if generator is not None else logits
+
+    # Multinomial is not implemented for half precision on CPU
+    if logits_.device.type == "cpu" and logits_.dtype != torch.float32:
+        logits_ = logits_.float()
+
+    if isinstance(generator, list):
+        sample = []
+        original_shape = logits.shape[1:-1]
+        for i in range(batch_size):
+            logits_instance = logits_[i]
+            if logits_instance.ndim > 2:
+                logits_instance = logits_instance.reshape(-1, num_cats)
+            sample_instance = torch.multinomial(logits_instance, num_samples, replacement, generator=generator[i])
+            if logits_instance.ndim > 2:
+                sample_instance = sample_instance.view(*original_shape, num_samples)
+        sample = torch.stack(sample, dim=0).to(device)
+    else:
+        if logits.ndim > 2:
+            original_shape = logits.shape[:-1]
+            logits_ = logits_.reshape(-1, logits.size(-1))
+        sample = torch.multinomial(logits_, num_samples, replacement, generator=generator).to(device)
+        if logits.ndim > 2:
+            sample = sample.view(*original_shape, num_samples)
+
+    if squeeze_trailing_dim:
+        sample = sample.squeeze(-1)
+
+    return sample
+
+
 def is_compiled_module(module) -> bool:
     """Check whether the module was compiled with torch.compile()"""
     if is_torch_version("<", "2.0.0") or not hasattr(torch, "_dynamo"):