Skip to content

新增MultiLabelMarginLoss #73538

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jul 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions python/paddle/nn/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@
L1Loss,
MarginRankingLoss,
MSELoss,
MultiLabelMarginLoss,
MultiLabelSoftMarginLoss,
MultiMarginLoss,
NLLLoss,
Expand Down Expand Up @@ -305,6 +306,7 @@
'CosineEmbeddingLoss',
'RReLU',
'MultiMarginLoss',
'MultiLabelMarginLoss',
'TripletMarginWithDistanceLoss',
'TripletMarginLoss',
'SoftMarginLoss',
Expand Down
2 changes: 2 additions & 0 deletions python/paddle/nn/functional/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@
margin_cross_entropy,
margin_ranking_loss,
mse_loss,
multi_label_margin_loss,
multi_label_soft_margin_loss,
multi_margin_loss,
nll_loss,
Expand Down Expand Up @@ -292,6 +293,7 @@
'triplet_margin_loss',
'adaptive_log_softmax_with_loss',
'multi_margin_loss',
'multi_label_margin_loss',
'soft_margin_loss',
'gaussian_nll_loss',
'scaled_dot_product_attention',
Expand Down
119 changes: 119 additions & 0 deletions python/paddle/nn/functional/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -4196,6 +4196,125 @@ def multi_margin_loss(
return loss


def multi_label_margin_loss(
input: Tensor,
label: Tensor,
reduction: _ReduceMode = 'mean',
name: str | None = None,
) -> Tensor:
r"""Measures a multi-class multi-classification hinge loss (margin-based loss) between input :math:`input` and label :math:`label`:

For i-th mini-batch sample, the loss in terms of the 2D input :math:`input_i` and 2D label :math:`label_i` is:

.. math::
\text{loss}(input_i, label_i) = \frac{\sum_{j \in \text{valid_labels}} \sum_{k \neq \text{valid_labels}} \max(0, 1 - (input_i[\text{valid_labels}[j]] - input_i[k]))}{C}

where :math:`C` is the number of classes, :math:`\text{valid_labels}` contains all non-negative label indices
for sample :math:`i` (stopping at the first -1 encountered), and :math:`k` ranges over all class indices
except those in :math:`\text{valid_labels}`.

The criterion only considers the first non-negative label values, allowing different samples to have variable numbers of target classes.

Parameters:
input (Tensor): Input tensor, the data type is float32 or float64. Shape is (N, C), where C is number of classes.
label (Tensor): Label tensor, the data type is int32 or int64. Shape is (N, C), same shape as input.
Label values should be class indices (non-negative values) and -1 values.
The -1 values are ignored and stop processing for each sample.
reduction (str, optional): Indicate how to calculate the loss by batch_size,
the candidates are ``'none'`` | ``'mean'`` | ``'sum'``.
If :attr:`reduction` is ``'none'``, the unreduced loss is returned;
If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned;
If :attr:`reduction` is ``'sum'``, the summed loss is returned.
Default: ``'mean'``
name (str|None, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.

Returns:
Tensor, The tensor variable storing the multi_label_margin_loss of input and label.

Examples:
.. code-block:: python

>>> import paddle
>>> import paddle.nn.functional as F

>>> input = paddle.to_tensor([[0.1, 0.2, 0.4, 0.8], [0.2, 0.5, 0.3, 0.1]], dtype='float32')
>>> label = paddle.to_tensor([[3, 0, -1, -1], [0, 2, -1, -1]], dtype='int64')

>>> loss = F.multi_label_margin_loss(input, label, reduction='mean')
>>> print(loss)
Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
0.94999999)
"""
if reduction not in ['sum', 'mean', 'none']:
raise ValueError(
"'reduction' in 'multi_label_margin_loss' should be 'sum', 'mean' or 'none', "
f"but received {reduction}."
)

if not in_dynamic_mode():
check_variable_and_dtype(
input, 'input', ['float32', 'float64'], 'multi_label_margin_loss'
)
check_variable_and_dtype(
label, 'label', ['int32', 'int64'], 'multi_label_margin_loss'
)

if input.dim() != 2:
raise ValueError(f'Expected 2D input tensor, but got {input.dim()}D')

if label.dim() != 2:
raise ValueError(f'Expected 2D label tensor, but got {label.dim()}D')

N, C = input.shape

if paddle.in_dynamic_mode() and label.numel() > 0:
min_val = paddle.min(label).item()
max_val = paddle.max(label).item()

if min_val < -1:
raise ValueError("label values should be >= -1")
if max_val >= C:
raise ValueError(f"label values should be < {C}")

# calculate valid_mask
valid_mask = (label != -1).cast('int32')
valid_mask = valid_mask * valid_mask.cumprod(dim=1)

row_ids, col_ids = paddle.where(valid_mask)
targets_flat = label[row_ids, col_ids]

invalid_mask = paddle.ones([N, C], dtype='bool')
invalid_mask[row_ids, targets_flat] = False

# calculate margin by broadcasting
input_target = input[row_ids, targets_flat].unsqueeze(-1)
margin = 1 - input_target + input[row_ids]
margin = paddle.where(
invalid_mask[row_ids], margin, paddle.zeros_like(margin)
)

relu_margin = paddle.maximum(margin, paddle.zeros_like(margin))

losses = paddle.scatter_nd_add(
paddle.zeros([N], dtype=input.dtype),
row_ids.unsqueeze(-1),
relu_margin.sum(
axis=1,
),
)

# average by number of valid labels
losses /= C

if reduction == 'mean':
return paddle.mean(losses, name=name)
elif reduction == 'sum':
return paddle.sum(losses, name=name)
elif reduction == 'none':
return losses


def soft_margin_loss(
input: Tensor,
label: Tensor,
Expand Down
1 change: 1 addition & 0 deletions python/paddle/nn/layer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
L1Loss,
MarginRankingLoss,
MSELoss,
MultiLabelMarginLoss,
MultiLabelSoftMarginLoss,
MultiMarginLoss,
NLLLoss,
Expand Down
86 changes: 86 additions & 0 deletions python/paddle/nn/layer/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -2197,6 +2197,92 @@ def forward(self, input: Tensor, label: Tensor) -> Tensor:
)


class MultiLabelMarginLoss(Layer):
r"""Creates a criterion that optimizes a multi-class multi-classification hinge loss (margin-based loss)
between input :math:`input` and label :math:`label`:

For i-th mini-batch sample, the loss in terms of the 2D input :math:`input_i` and 2D label :math:`label_i` is:

.. math::
\text{loss}(input_i, label_i) = \frac{\sum_{j \in \text{valid_labels}} \sum_{k \neq \text{valid_labels}} \max(0, 1 - (input_i[\text{valid_labels}[j]] - input_i[k]))}{C}

where :math:`C` is the number of classes, :math:`\text{valid_labels}` contains all non-negative label indices
for sample :math:`i` (stopping at the first -1 encountered), and :math:`k` ranges over all class indices
except those in :math:`\text{valid_labels}`.

The criterion only considers the first non-negative label values, allowing different samples to have variable numbers of target classes.

Parameters:

reduction (str, optional): Indicate how to calculate the loss by batch_size,
the candidates are ``'none'`` | ``'mean'`` | ``'sum'``.
If :attr:`reduction` is ``'none'``, the unreduced loss is returned;
If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned;
If :attr:`reduction` is ``'sum'``, the summed loss is returned.
Default: ``'mean'``

name (str|None, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.

Call parameters:
input (Tensor): Input tensor, the data type is float32 or float64.

label (Tensor): Label tensor, the data type is int32 or int64.
Label values should be class indices (non-negative values) and -1 values.
The -1 values are ignored and stop processing for each sample.

Shape:
input: 2-D Tensor, the shape is :math:`[N, C]`, where :math:`N` is batch size and :math:`C` is number of classes.

label: 2-D Tensor, the shape is :math:`[N, C]`, same shape as input.

output: scalar. If :attr:`reduction` is ``'none'``, then same shape as :math:`[N]`.

Returns:
A callable object of MultiLabelMarginLoss.

Examples:
.. code-block:: python

>>> import paddle
>>> import paddle.nn as nn

>>> input = paddle.to_tensor([[0.1, 0.2, 0.4, 0.8], [0.2, 0.5, 0.3, 0.1]], dtype='float32')
>>> label = paddle.to_tensor([[3, 0, -1, -1], [0, 2, -1, -1]], dtype='int64')

>>> multi_label_margin_loss = nn.MultiLabelMarginLoss(reduction='mean')
>>> loss = multi_label_margin_loss(input, label)
>>> print(loss)
Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
0.94999999)
"""

reduction: _ReduceMode
name: str | None

def __init__(
self,
reduction: _ReduceMode = 'mean',
name: str | None = None,
) -> None:
super().__init__()
if reduction not in ['sum', 'mean', 'none']:
raise ValueError(
"'reduction' in 'MultiLabelMarginLoss' should be 'sum', 'mean' or 'none', "
f"but received {reduction}."
)
self.reduction = reduction
self.name = name

def forward(self, input: Tensor, label: Tensor) -> Tensor:
return F.multi_label_margin_loss(
input,
label,
reduction=self.reduction,
name=self.name,
)


class SoftMarginLoss(Layer):
r"""

Expand Down
Loading
Loading