PaddlePaddle · luotao1 · Jul 9, 2025 · Jun 22, 2025 · Jun 23, 2025 · Jul 3, 2025
diff --git a/python/paddle/nn/__init__.py b/python/paddle/nn/__init__.py
@@ -108,6 +108,7 @@
     L1Loss,
     MarginRankingLoss,
     MSELoss,
+    MultiLabelMarginLoss,
     MultiLabelSoftMarginLoss,
     MultiMarginLoss,
     NLLLoss,
@@ -305,6 +306,7 @@
     'CosineEmbeddingLoss',
     'RReLU',
     'MultiMarginLoss',
+    'MultiLabelMarginLoss',
     'TripletMarginWithDistanceLoss',
     'TripletMarginLoss',
     'SoftMarginLoss',

diff --git a/python/paddle/nn/functional/__init__.py b/python/paddle/nn/functional/__init__.py
@@ -116,6 +116,7 @@
     margin_cross_entropy,
     margin_ranking_loss,
     mse_loss,
+    multi_label_margin_loss,
     multi_label_soft_margin_loss,
     multi_margin_loss,
     nll_loss,
@@ -292,6 +293,7 @@
     'triplet_margin_loss',
     'adaptive_log_softmax_with_loss',
     'multi_margin_loss',
+    'multi_label_margin_loss',
     'soft_margin_loss',
     'gaussian_nll_loss',
     'scaled_dot_product_attention',

diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py
@@ -4196,6 +4196,125 @@ def multi_margin_loss(
         return loss
 
 
+def multi_label_margin_loss(
+    input: Tensor,
+    label: Tensor,
+    reduction: _ReduceMode = 'mean',
+    name: str | None = None,
+) -> Tensor:
+    r"""Measures a multi-class multi-classification hinge loss (margin-based loss) between input :math:`input` and label :math:`label`:
+
+    For i-th mini-batch sample, the loss in terms of the 2D input :math:`input_i` and 2D label :math:`label_i` is:
+
+    .. math::
+        \text{loss}(input_i, label_i) = \frac{\sum_{j \in \text{valid_labels}} \sum_{k \neq \text{valid_labels}} \max(0, 1 - (input_i[\text{valid_labels}[j]] - input_i[k]))}{C}
+
+    where :math:`C` is the number of classes, :math:`\text{valid_labels}` contains all non-negative label indices
+    for sample :math:`i` (stopping at the first -1 encountered), and :math:`k` ranges over all class indices
+    except those in :math:`\text{valid_labels}`.
+
+    The criterion only considers the first non-negative label values, allowing different samples to have variable numbers of target classes.
+
+    Parameters:
+        input (Tensor): Input tensor, the data type is float32 or float64. Shape is (N, C), where C is number of classes.
+        label (Tensor): Label tensor, the data type is int32 or int64. Shape is (N, C), same shape as input.
+            Label values should be class indices (non-negative values) and -1 values.
+            The -1 values are ignored and stop processing for each sample.
+        reduction (str, optional): Indicate how to calculate the loss by batch_size,
+            the candidates are ``'none'`` | ``'mean'`` | ``'sum'``.
+            If :attr:`reduction` is ``'none'``, the unreduced loss is returned;
+            If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned;
+            If :attr:`reduction` is ``'sum'``, the summed loss is returned.
+            Default: ``'mean'``
+        name (str|None, optional): Name for the operation (optional, default is None).
+            For more information, please refer to :ref:`api_guide_Name`.
+
+    Returns:
+        Tensor, The tensor variable storing the multi_label_margin_loss of input and label.
+
+    Examples:
+        .. code-block:: python
+
+            >>> import paddle
+            >>> import paddle.nn.functional as F
+
+            >>> input = paddle.to_tensor([[0.1, 0.2, 0.4, 0.8], [0.2, 0.5, 0.3, 0.1]], dtype='float32')
+            >>> label = paddle.to_tensor([[3, 0, -1, -1], [0, 2, -1, -1]], dtype='int64')
+
+            >>> loss = F.multi_label_margin_loss(input, label, reduction='mean')
+            >>> print(loss)
+            Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+                   0.94999999)
+    """
+    if reduction not in ['sum', 'mean', 'none']:
+        raise ValueError(
+            "'reduction' in 'multi_label_margin_loss' should be 'sum', 'mean' or 'none', "
+            f"but received {reduction}."
+        )
+
+    if not in_dynamic_mode():
+        check_variable_and_dtype(
+            input, 'input', ['float32', 'float64'], 'multi_label_margin_loss'
+        )
+        check_variable_and_dtype(
+            label, 'label', ['int32', 'int64'], 'multi_label_margin_loss'
+        )
+
+    if input.dim() != 2:
+        raise ValueError(f'Expected 2D input tensor, but got {input.dim()}D')
+
+    if label.dim() != 2:
+        raise ValueError(f'Expected 2D label tensor, but got {label.dim()}D')
+
+    N, C = input.shape
+
+    if paddle.in_dynamic_mode() and label.numel() > 0:
+        min_val = paddle.min(label).item()
+        max_val = paddle.max(label).item()
+
+        if min_val < -1:
+            raise ValueError("label values should be >= -1")
+        if max_val >= C:
+            raise ValueError(f"label values should be < {C}")
+
+    # calculate valid_mask
+    valid_mask = (label != -1).cast('int32')
+    valid_mask = valid_mask * valid_mask.cumprod(dim=1)
+
+    row_ids, col_ids = paddle.where(valid_mask)
+    targets_flat = label[row_ids, col_ids]
+
+    invalid_mask = paddle.ones([N, C], dtype='bool')
+    invalid_mask[row_ids, targets_flat] = False
+
+    # calculate margin by broadcasting
+    input_target = input[row_ids, targets_flat].unsqueeze(-1)
+    margin = 1 - input_target + input[row_ids]
+    margin = paddle.where(
+        invalid_mask[row_ids], margin, paddle.zeros_like(margin)
+    )
+
+    relu_margin = paddle.maximum(margin, paddle.zeros_like(margin))
+
+    losses = paddle.scatter_nd_add(
+        paddle.zeros([N], dtype=input.dtype),
+        row_ids.unsqueeze(-1),
+        relu_margin.sum(
+            axis=1,
+        ),
+    )
+
+    # average by number of valid labels
+    losses /= C
+
+    if reduction == 'mean':
+        return paddle.mean(losses, name=name)
+    elif reduction == 'sum':
+        return paddle.sum(losses, name=name)
+    elif reduction == 'none':
+        return losses
+
+
 def soft_margin_loss(
     input: Tensor,
     label: Tensor,

diff --git a/python/paddle/nn/layer/__init__.py b/python/paddle/nn/layer/__init__.py
@@ -72,6 +72,7 @@
     L1Loss,
     MarginRankingLoss,
     MSELoss,
+    MultiLabelMarginLoss,
     MultiLabelSoftMarginLoss,
     MultiMarginLoss,
     NLLLoss,

diff --git a/python/paddle/nn/layer/loss.py b/python/paddle/nn/layer/loss.py
@@ -2197,6 +2197,92 @@ def forward(self, input: Tensor, label: Tensor) -> Tensor:
         )
 
 
+class MultiLabelMarginLoss(Layer):
+    r"""Creates a criterion that optimizes a multi-class multi-classification hinge loss (margin-based loss)
+    between input :math:`input` and label :math:`label`:
+
+    For i-th mini-batch sample, the loss in terms of the 2D input :math:`input_i` and 2D label :math:`label_i` is:
+
+    .. math::
+        \text{loss}(input_i, label_i) = \frac{\sum_{j \in \text{valid_labels}} \sum_{k \neq \text{valid_labels}} \max(0, 1 - (input_i[\text{valid_labels}[j]] - input_i[k]))}{C}
+
+    where :math:`C` is the number of classes, :math:`\text{valid_labels}` contains all non-negative label indices
+    for sample :math:`i` (stopping at the first -1 encountered), and :math:`k` ranges over all class indices
+    except those in :math:`\text{valid_labels}`.
+
+    The criterion only considers the first non-negative label values, allowing different samples to have variable numbers of target classes.
+
+    Parameters:
+
+        reduction (str, optional): Indicate how to calculate the loss by batch_size,
+                the candidates are ``'none'`` | ``'mean'`` | ``'sum'``.
+                If :attr:`reduction` is ``'none'``, the unreduced loss is returned;
+                If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned;
+                If :attr:`reduction` is ``'sum'``, the summed loss is returned.
+                Default: ``'mean'``
+
+        name (str|None, optional): Name for the operation (optional, default is None).
+            For more information, please refer to :ref:`api_guide_Name`.
+
+    Call parameters:
+        input (Tensor): Input tensor, the data type is float32 or float64.
+
+        label (Tensor): Label tensor, the data type is int32 or int64.
+            Label values should be class indices (non-negative values) and -1 values.
+            The -1 values are ignored and stop processing for each sample.
+
+    Shape:
+        input: 2-D Tensor, the shape is :math:`[N, C]`, where :math:`N` is batch size and :math:`C` is number of classes.
+
+        label: 2-D Tensor, the shape is :math:`[N, C]`, same shape as input.
+
+        output: scalar. If :attr:`reduction` is ``'none'``, then same shape as :math:`[N]`.
+
+    Returns:
+        A callable object of MultiLabelMarginLoss.
+
+    Examples:
+        .. code-block:: python
+
+            >>> import paddle
+            >>> import paddle.nn as nn
+
+            >>> input = paddle.to_tensor([[0.1, 0.2, 0.4, 0.8], [0.2, 0.5, 0.3, 0.1]], dtype='float32')
+            >>> label = paddle.to_tensor([[3, 0, -1, -1], [0, 2, -1, -1]], dtype='int64')
+
+            >>> multi_label_margin_loss = nn.MultiLabelMarginLoss(reduction='mean')
+            >>> loss = multi_label_margin_loss(input, label)
+            >>> print(loss)
+            Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+                   0.94999999)
+    """
+
+    reduction: _ReduceMode
+    name: str | None
+
+    def __init__(
+        self,
+        reduction: _ReduceMode = 'mean',
+        name: str | None = None,
+    ) -> None:
+        super().__init__()
+        if reduction not in ['sum', 'mean', 'none']:
+            raise ValueError(
+                "'reduction' in 'MultiLabelMarginLoss' should be 'sum', 'mean' or 'none', "
+                f"but received {reduction}."
+            )
+        self.reduction = reduction
+        self.name = name
+
+    def forward(self, input: Tensor, label: Tensor) -> Tensor:
+        return F.multi_label_margin_loss(
+            input,
+            label,
+            reduction=self.reduction,
+            name=self.name,
+        )
+
+
 class SoftMarginLoss(Layer):
     r"""