fix diffusers pipelines d class ut (#2085)

lvyufeng · web-flow · commit 188c0b4e9238 · 2025-07-24T17:02:50.000+08:00
diff --git a/mindnlp/core/_tensor.py b/mindnlp/core/_tensor.py
@@ -678,6 +678,9 @@ def __contains__(self, item):
     Tensor.roll = ops.roll
     StubTensor.roll = ops.roll
 
+    Tensor.bernoulli_ = ops.inplace_bernoulli
+    StubTensor.bernoulli_ = ops.inplace_bernoulli
+
 
 def _rebuild_from_type_v2(func, new_type, args, state):
     ret = func(*args)
diff --git a/mindnlp/core/nn/functional.py b/mindnlp/core/nn/functional.py
@@ -1,5 +1,6 @@
 """nn functional"""
 import math
+import numbers
 import warnings
 from typing import Optional, Tuple, List
 import numpy as np
@@ -547,7 +548,7 @@ def interpolate(input, size=None, scale_factor=None, mode='nearest', align_corne
                     "Please provide input tensor in (N, C, d1, d2, ...,dK) format and "
                     "output size in (o1, o2, ...,oK) format."
                 )
-            output_size = size
+            output_size = [s.item() if not isinstance(s, numbers.Number) else s for s in size]
         else:
             output_size = [size for _ in range(dim)]
     elif scale_factor is not None:
@@ -637,10 +638,10 @@ def interpolate(input, size=None, scale_factor=None, mode='nearest', align_corne
         )
     if input.dim() == 4 and mode == "bilinear":
         assert align_corners is not None
-        if antialias:
-            return torch._C._nn._upsample_bilinear2d_aa(
-                input, output_size, align_corners, scale_factors
-            )
+        # if antialias:
+        #     return torch._C._nn._upsample_bilinear2d_aa(
+        #         input, output_size, align_corners, scale_factors
+        #     )
         return upsample_bilinear2d_op(
             input, output_size, scale_factors, align_corners
         )
@@ -867,7 +868,13 @@ def conv_transpose3d(input, weight, bias=None, stride=1, padding=0, output_paddi
 
 def max_pool2d(input, kernel_size, stride=None, padding=0, dilation=1, ceil_mode=False, return_indices=False):
     if use_pyboost():
-        return mint.nn.functional.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode=ceil_mode, return_indices=return_indices)
+        input_ndim = input.ndim
+        if input_ndim == 3:
+            input = input.unsqueeze(1)
+        out = mint.nn.functional.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode=ceil_mode, return_indices=return_indices)
+        if input_ndim == 3:
+            out = out.squeeze(1)
+        return out
     return ops.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode=ceil_mode, return_indices=return_indices)
 
 def max_pool1d(input, kernel_size, stride=None, padding=0, dilation=1, ceil_mode=False, return_indices=False):
diff --git a/mindnlp/core/ops/inplace.py b/mindnlp/core/ops/inplace.py
@@ -181,6 +181,10 @@ def inplace_sub(self, other):
     self.data = core.sub(self, other)
     return self
 
+def inplace_bernoulli(self, p=0.5, *, generator=None):
+    self.data = core.bernoulli(self, generator=generator, p=p)
+    return self
+
 __all__ = [
     'inplace_copy',
     'inplace_zero',
@@ -202,5 +206,6 @@ def inplace_sub(self, other):
     'inplace_mul',
     'inplace_neg',
     'inplace_exp',
-    'inplace_sub'
+    'inplace_sub',
+    'inplace_bernoulli'
 ]
diff --git a/mindnlp/core/ops/random.py b/mindnlp/core/ops/random.py
@@ -12,11 +12,12 @@
 
 # bernoulli
 has_bernoulli = hasattr(mindspore.mint, 'bernoulli')
-def bernoulli(input, *, generator=None, out=None):
+def bernoulli(input, *, generator=None, out=None, **kwargs):
+    p = kwargs.pop('p', 0.5)
     if use_pyboost() and has_bernoulli:
         return call_ms_func(mindspore.mint.bernoulli, input, generator=generator, out=out)
     random_numbers = rand(*input.shape, dtype=mindspore.float32)
-    samples = random_numbers < 0.5
+    samples = random_numbers < p
     samples = samples.int()
     if out is None:
         return samples
diff --git a/mindnlp/core/ops/reduction.py b/mindnlp/core/ops/reduction.py
@@ -1,11 +1,13 @@
 """reduction op"""
+import numbers
 from collections import namedtuple
 import mindspore
 from mindspore import ops
 from mindspore.ops._primitive_cache import _get_cache_prim
 from ..configs import use_pyboost, DEVICE_TARGET
 
 from ._inner import call_ms_func
+from mindnlp import core
 
 max_out = namedtuple('max_out', ['values', 'indices'])
 min_out = namedtuple('min_out', ['values', 'indices'])
@@ -154,12 +156,180 @@ def prod(input, dim=None, keepdim=False, *, dtype=None):
     return ops.prod(input, dim, keepdim).to(dtype)
 
 # quantile
-def quantile(input, q, dim=None, keepdim=False, *, interpolation='linear'):
-    return ops.quantile(input, q, dim, keepdim)
+def quantile_output_shape(
+    original_dim,
+    input_tensor,
+    q,
+    keepdim,
+    wrapped_dim
+):
+    """
+    计算分位数函数的输出形状
+    
+    参数:
+        original_dim: 原始维度（None表示展平）
+        input_tensor: 输入张量
+        q: 分位数张量
+        keepdim: 是否保留维度
+        wrapped_dim: 处理后的维度索引
+    """
+    # 计算输出形状: q大小 + 缩减维度后的大小
+    out_shape = []
+    
+    if original_dim is not None and input_tensor.dim() > 0:
+        # 保留原始维度结构
+        out_shape = list(input_tensor.shape)
+        if keepdim:
+            out_shape[wrapped_dim] = 1
+        else:
+            del out_shape[wrapped_dim]
+    elif keepdim:
+        # 当展平但需保留维度时创建全1形状
+        out_shape = [1] * input_tensor.dim()
+    
+    if q.dim() > 0:
+        # 添加分位数维度到最前面
+        out_shape.insert(0, q.numel())
+    
+    return out_shape
+
+
+def quantile(
+    input_tensor,
+    q,
+    dim = None,
+    keepdim: bool = False,
+    interpolation: str = 'linear',
+    ignore_nan: bool = False
+):
+    """
+    PyTorch分位数函数的完整实现
+    
+    参数:
+        input_tensor: 输入数据
+        q: 分位数(0-1之间)
+        dim: 计算维度
+        keepdim: 是否保留维度
+        interpolation: 插值模式 ('linear', 'lower', 'higher', 'nearest', 'midpoint')
+        ignore_nan: 是否忽略NaN值
+        
+    返回:
+        计算得到的分位数
+    """
+    if isinstance(q, numbers.Number):
+        q = core.tensor(q, dtype=input_tensor.dtype)
+    # ===== 1. 输入验证 =====
+    device = input_tensor.device
+    dtype = input_tensor.dtype
+
+    # 验证分位数范围
+    if device.type == 'cpu':
+        if not core.all((q >= 0) & (q <= 1)):
+            raise ValueError("quantile() q values must be in the range [0, 1]")
+    
+    # ===== 2. 维度处理 =====
+    wrapped_dim = dim if dim is not None else 0
+    original_dim = dim
+    
+    if dim is not None:
+        # 验证维度有效性
+        if dim < 0:
+            dim = input_tensor.dim() + dim
+        if dim < 0 or dim >= input_tensor.dim():
+            raise ValueError(f"Dimension out of range (expected to be in range [{-input_tensor.dim()}, {input_tensor.dim()-1}])")
+        wrapped_dim = dim
+    
+    # 计算输出形状
+    out_shape = quantile_output_shape(original_dim, input_tensor, q, keepdim, wrapped_dim)
+    
+    # ===== 3. 数据预处理 =====
+    # 处理标量分位数
+    q_scalar = q.dim() == 0
+    q = q.reshape(-1)  # 确保q是1D
+    
+    # 展平或重排维度
+    if dim is None:
+        # 展平整个张量
+        sorted_x, _ = input_tensor.flatten().sort()
+    elif wrapped_dim == input_tensor.dim() - 1:
+        # 当目标维度已是最后一维时直接排序
+        sorted_x, _ = input_tensor.sort(dim=wrapped_dim)
+    else:
+        # 将目标维度移到末尾再排序
+        transposed = input_tensor.transpose(wrapped_dim, -1).unsqueeze(-1)
+        sorted_x, _ = transposed.sort(dim=-2)
+        sorted_x = sorted_x.squeeze(-1)
+    
+    # ===== 4. 分位数计算核心 =====
+    n = sorted_x.shape[-1]
+    
+    # 处理空输入
+    if n == 0:
+        result = core.full(out_shape, float('nan'), device=device, dtype=dtype)
+        return result
+    
+    # 计算排名位置 (考虑NaN处理)
+    if ignore_nan:
+        # 计算非NaN数量
+        non_nan_count = (~sorted_x.isnan()).sum(dim=-1, keepdim=True)
+        ranks = q * (non_nan_count - 1)
+        ranks = core.clamp(ranks, min=0)  # 防止负索引
+    else:
+        last_index = n - 1
+        # 广播处理NaN标记
+        nan_mask = sorted_x.isnan().any(dim=-1, keepdim=True)
+        # 扩展q和nan_mask到相同形状
+        expanded_q = q.view(1, -1).expand(*sorted_x.shape[:-1], q.numel())
+        nan_mask = nan_mask.expand_as(expanded_q)
+        # 计算基础排名
+        ranks = expanded_q * last_index
+        # 对包含NaN的行使用最后索引
+        ranks = core.where(nan_mask, core.tensor(last_index, device=device), ranks)
+    
+    # 根据插值模式调整排名
+    if interpolation == 'lower':
+        ranks = core.floor(ranks)
+    elif interpolation == 'higher':
+        ranks = core.ceil(ranks)
+    elif interpolation == 'nearest':
+        ranks = core.round(ranks)
+    
+    # 确保排名在有效范围内
+    ranks = core.clamp(ranks, 0, n - 1)
+    
+    # 获取下界索引和值
+    ranks_below = ranks.to(core.int64)
+    values_below = sorted_x.gather(-1, ranks_below)
+    
+    # ===== 5. 插值处理 =====
+    if interpolation in ['linear', 'midpoint']:
+        # 计算插值权重
+        weights = core.full_like(ranks, 0.5) if interpolation == 'midpoint' else ranks - ranks_below
+        
+        # 获取上界值
+        ranks_above = core.ceil(ranks).to(core.int64)
+        values_above = sorted_x.gather(-1, ranks_above)
+        
+        # 线性插值: result = (1 - weight)*below + weight*above
+        values_below = values_below.lerp(values_above, weights)
+    
+    # ===== 6. 形状调整 =====
+    if q_scalar:
+        # 标量分位数：移除分位数维度
+        values_below = values_below.squeeze(-1)
+    else:
+        # 多分位数：移动分位数维度到最前面
+        values_below = values_below.movedim(-1, 0)
+    
+    # 恢复原始输出形状
+    if values_below.shape != tuple(out_shape):
+        values_below = values_below.reshape(out_shape)
+    
+    return values_below
 
 # nanquantile
 def nanquantile(input, q, dim=None, keepdim=False, *, interpolation='linear'):
-    return ops.quantile(input, q, dim, keepdim)
+    return ops.nanquantile(input, q, dim, keepdim)
 
 # std
 has_std = hasattr(mindspore.mint, 'std')