Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion mindnlp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
mindspore.set_device(os.environ.get('DEVICE_TARGET'))

# for different ascend devices
if platform.system().lower() == 'linux':
if platform.system().lower() == 'linux' and mindspore.get_context('device_target') == 'Ascend':
SOC = MSContext.get_instance().get_ascend_soc_version()
# enable vmm since only vmm can release device memory when del tensor.
if SOC != 'ascend310b':
Expand Down
1 change: 1 addition & 0 deletions mindnlp/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
preserve_format = None
legacy_contiguous_format = None
channels_last_3d = None
channels_last = None
memory_format = None

inf = float("inf")
Expand Down
9 changes: 9 additions & 0 deletions mindnlp/core/_apis/cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -1221,3 +1221,12 @@ def logsumexp(input, dim, keepdim=False):

def bernoulli(input, generator):
return legacy.bernoulli(input, seed, offset)

def right_shift(input, other):
return legacy.right_shift(input, other)

def histc(input, bins=100, min=0, max=0):
return legacy.histogram(input, bins, float(min), float(max))

def search_sorted(sorted_sequence, values, sorter, dtype, right):
return legacy.search_sorted(sorted_sequence, values, sorter, dtype, right)
58 changes: 37 additions & 21 deletions mindnlp/core/_apis/gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import mindspore
from mindspore._c_expression import _empty_instance
from mindnlp import core
from .._op_prim.cpu import legacy
from .._op_prim.gpu import legacy

try:
from mindspore._c_expression import TensorPy as Tensor_
Expand Down Expand Up @@ -34,6 +34,8 @@ def fill_scalar(size, fill_value, dtype):
return legacy.cast(legacy.fill_v2(size, mindspore.Tensor(fill_value)), dtype)

def fill_tensor(size, fill_value, dtype):
if dtype is None:
return legacy.fill_v2(size, mindspore.Tensor(fill_value))
return legacy.cast(legacy.fill_v2(size, fill_value), dtype)

def zeros_like(input, dtype):
Expand Down Expand Up @@ -123,6 +125,9 @@ def div(input, other):
return legacy.div(input, other)

def mul(input, other):
if input.dtype == core.bool:
if isinstance(other, bool) or (not isinstance(other, numbers.Number) and other.dtype == core.bool):
return bitwise_and_scalar(input, other)
return legacy.mul(input, other)

def reduce_all(input, axis, keepdims):
Expand Down Expand Up @@ -253,6 +258,11 @@ def less(input, other):
return legacy.less(input, other)

def select(condition, x, y):
if isinstance(x, numbers.Number) or x.ndim == 0:
x = fill_scalar(condition.shape, x, None)
if isinstance(y, numbers.Number) or y.ndim == 0:
y = fill_scalar(condition.shape, y, None)

return legacy.select(condition, x, y)

def round(input, decimals):
Expand Down Expand Up @@ -317,16 +327,15 @@ def ones_like(input, dtype):
return legacy.ones_like(input)

def embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq):
return cast(legacy.gather(weight, input, 0, 0), weight.dtype)
return legacy.gather(weight, input, 0, 0)

def linspace(start, end, steps, dtype):
start = float(start)
end = float(end)
return legacy.lin_space(mindspore.Tensor(start), mindspore.Tensor(end), steps)

def masked_fill(input, mask, value):
if input.dtype.is_floating_point and isinstance(value, numbers.Number):
value = float(value)
value = fill_scalar((), value, input.dtype)
return legacy.masked_fill(input, mask, value)

def sum(input, dim, keepdim, dtype):
Expand Down Expand Up @@ -388,9 +397,14 @@ def layer_norm(input, normalized_shape, weight, bias, eps=1e-5):
return legacy.layer_norm(input, weight, bias, begin_axis, begin_axis, eps)

def argmin_with_value(input, axis, keep_dims):
if axis is None:
axis = -1
return legacy.arg_min_with_value(input, axis, keep_dims)

def argmax_with_value(input, axis, keep_dims):
if axis is None:
axis = -1

return legacy.arg_max_with_value(input, axis, keep_dims)

def silu(input):
Expand Down Expand Up @@ -425,9 +439,13 @@ def eye(n, m, dtype):
return legacy.eye(n, m, dtype)

def argmax(input, axis, keep_dims):
if axis is None:
axis = -1
return legacy.arg_max_with_value(input, axis, keep_dims)[0]

def argmin(input, axis, keep_dims):
if axis is None:
axis = -1
return legacy.arg_min_with_value(input, axis, keep_dims)[0]

def exp(input):
Expand Down Expand Up @@ -489,18 +507,7 @@ def scatter(input, dim, index, src):
return legacy.tensor_scatter_elements(input, index, src, dim, "none")

def batch_norm(input, weight, bias, running_mean=None, runnning_var=None, training=False, momentum=0.1, epsilon=1e-5):
input_ndim = input.ndim
if input_ndim == 2:
return legacy.batch_norm(input, weight, bias, running_mean, runnning_var, training, epsilon, momentum, 'NCHW')
else:
input = transpose_view(input, 1, -1)
input_shape = input.shape
input = reshape(input, (-1, input.shape[-1]))
outs = legacy.batch_norm(input, weight, bias, running_mean, runnning_var, training, epsilon, momentum, 'NCHW')
out = reshape(outs[0], (*input_shape[:-1], -1))
out = transpose_view(out, 1, -1)

return out, outs[1], outs[2]
return legacy.batch_norm(input, weight, bias, running_mean, runnning_var, training, epsilon, momentum, 'NCHW')

def tanh(input):
return legacy.tanh(input)
Expand Down Expand Up @@ -797,25 +804,22 @@ def max_pool2d(input, kernel_size, stride=1, padding=0, dilation=1, ceil_mode=Fa
return out

def baddbmm(input, batch1, batch2, alpha=1, beta=1):
return add(mul(beta, input), mul(alpha, bmm(batch1, batch2)))
return add(mul(input, beta), mul(bmm(batch1, batch2), alpha))

def softplus(input, beta=1, threshold=20):
return legacy.softplus(input)

def gather_nd(input, indices):
return legacy.gather_nd(input, indices)

def unique_consecutive(input, return_inverse, return_counts, dim):
return legacy.unique_consecutive(input, return_inverse, return_counts, dim)

def meshgrid(input, lambd):
return legacy.meshgrid(input, lambd)

def addcmul(input, tensor1, tensor2, value=1.0):
return legacy.addcmul(input, tensor1, tensor2, mindspore.Tensor(value))

def addmm(input, mat1, mat2, alpha=1.0, beta=1.0):
return add(mul(beta, input), mul(alpha, bmm(mat1, mat2)))
return add(mul(input, beta), mul(bmm(mat1, mat2), alpha))

def im2col(input, kernel_size, dilation=1, padding=0, stride=1):
out = legacy.im2_col(input, kernel_size, stride, dilation, padding)
Expand Down Expand Up @@ -1101,6 +1105,8 @@ def bernoulli(input, generator):
return legacy.bernoulli(input, seed, offset)

def arange(start, end, step, dtype):
if dtype is not None:
return cast(legacy.range(start, end, step, 100000), dtype)
return legacy.range(start, end, step, 100000)

def inplace_fill_scalar(input, value):
Expand All @@ -1121,3 +1127,13 @@ def inplace_uniform(input, from_, to_, generator_):
mindspore.tensor(from_, dtype=mindspore.int32),
mindspore.tensor(to_, dtype=mindspore.int32), 0, 0)
return input.assign_value(value)

def right_shift(input, other):
return legacy.right_shift(input, other)

def inplace_fill_tensor(input, value):
input.assign_value(fill_tensor(input.shape, value, None))
return input

def search_sorted(sorted_sequence, values, sorter, dtype, right):
return legacy.search_sorted(sorted_sequence, values, sorter, dtype, right)
10 changes: 10 additions & 0 deletions mindnlp/core/_apis/npu.py
Original file line number Diff line number Diff line change
Expand Up @@ -1594,3 +1594,13 @@ def bernoulli(input, generator):
def multinomial(input, num_samples, replacement, generator):
seed, offset = generator._step(12) # pylint: disable=protected-access
return pyboost.multinomial_ext_op(input, num_samples, replacement, seed, offset)

def right_shift(input, other):
if use_pyboost():
return pyboost.right_shift_op(input, other)
return legacy.right_shift(input, other)

def histc(input, bins=100, min=0, max=0):
if use_pyboost():
return pyboost.histc_ext_op(input, bins, float(min), float(max))
return legacy.histogram(input, bins, float(min), float(max))
8 changes: 5 additions & 3 deletions mindnlp/core/_tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ def __init__(self, *args, **kwargs):

Tensor.__init__ = __init__
origin_setitem = Tensor.__setitem__
origin_is_contiguous = Tensor.is_contiguous
Tensor._requires_grad = False

def tensor(data, *, dtype=None, device=None, requires_grad=False):
Expand Down Expand Up @@ -1253,7 +1254,8 @@ def hardshrink(self, lambd=0.5):


# Tensor.histc

def histc(self, bins=100, min=0, max=0):
return ops.histc(self, bins, min, max)

# Tensor.histogram

Expand Down Expand Up @@ -1364,8 +1366,8 @@ def isnan(self):
return ops.isnan(self)

# Tensor.is_contiguous
# def is_contiguous(self):
# return self.is_contiguous()
def is_contiguous(self, memory_format=None):
return origin_is_contiguous(self)

# Tensor.is_complex
def is_complex(self):
Expand Down
15 changes: 13 additions & 2 deletions mindnlp/core/cuda/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,19 @@ def __exit__(self, type: Any, value: Any, traceback: Any):
def is_bf16_supported():
return False

def mem_get_info(index):
return (1024, 1024)
def mem_get_info(device=None):
if not isinstance(device, int):
device = mindspore.context.get_context("device_id")

res = mindspore.hal.get_device_properties(device)
return (res.total_memory, res.total_memory)

def get_device_capability(device=None):
if not isinstance(device, int):
device = mindspore.context.get_context("device_id")

res = mindspore.hal.get_device_properties(device)
return (res.major, res.minor)

def memory_reserved(device=None):
return ms_memory_reserved()
Expand Down
59 changes: 46 additions & 13 deletions mindnlp/core/nn/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ def pad(input, pad, mode='constant', value=None):
if isinstance(pad, tuple):
pad = tuple(p if isinstance(p, int) else p.item() for p in pad)

if input.device.type in ['cpu', 'meta'] or ON_A1:
if input.device.type in ['cpu', 'meta', 'cuda'] or ON_A1:
new_pad = ()
for idx, pad_v in enumerate(pad):
if not isinstance(pad_v, int):
Expand All @@ -301,6 +301,8 @@ def pad(input, pad, mode='constant', value=None):
value = bool(value)
elif input.dtype in [core.int32, core.int64]:
value = int(value)
if input.device.type == 'cuda' and len(new_pad) == 8:
return execute('pad_v3', input, new_pad[:-2], mode, value)
return execute('pad_v3', input, new_pad, mode, value)
out = input
if (isinstance(pad, tuple) and not pad):
Expand All @@ -324,9 +326,9 @@ def pad(input, pad, mode='constant', value=None):
return out

def nll_loss(input, target, weight=None, ignore_index=-100, reduction='mean'):
# if input.device.type == 'npu':
return _nllloss_nd(input, target, weight, ignore_index, reduction)
# return _inner_nll_loss(input, target, weight, ignore_index, reduction)
if input.device.type in ['npu', 'cpu']:
return _nllloss_nd(input, target, weight, ignore_index, reduction)
return _inner_nll_loss(input, target, weight, ignore_index, reduction)

def _inner_nll_loss(inputs, target, weight=None, ignore_index=-100, reduction='mean', label_smoothing=0.0):
ndim = inputs.ndim
Expand All @@ -352,7 +354,7 @@ def _inner_nll_loss(inputs, target, weight=None, ignore_index=-100, reduction='m
def _nll_loss(inputs, target, target_dim=-1, weight=None, ignore_index=None, reduction='none', label_smoothing=0.0):
"""nll loss inner function"""
if target.ndim == inputs.ndim - 1:
target = target.expand_dims(target_dim)
target = target.unsqueeze(target_dim)
if ignore_index is not None:
non_pad_mask = core.eq(target, ignore_index)
target = target.masked_fill(non_pad_mask, core.cast(0, target.dtype))
Expand All @@ -366,10 +368,10 @@ def _nll_loss(inputs, target, target_dim=-1, weight=None, ignore_index=None, red
weight = weight.view(weight.shape + (1,))
weighted_inputs = inputs * weight
weighted_inputs = weighted_inputs.view(orig_shape)
loss = core.neg(core.gather_d(weighted_inputs, target_dim, target))
loss = core.neg(core.gather(weighted_inputs, target_dim, target))
smooth_loss = core.neg(weighted_inputs.sum(axis=target_dim, keepdims=True))
else:
loss = core.neg(core.gather_d(inputs, target_dim, target))
loss = core.neg(core.gather(inputs, target_dim, target))
smooth_loss = core.neg(inputs.sum(axis=target_dim, keepdims=True))
loss_weights = core.ones_like(loss)

Expand Down Expand Up @@ -427,11 +429,42 @@ def _nllloss_nd(input, target, weight=None, ingore_index=-100, reduction='mean')
ret = execute('nllloss_2d', input, target, weight, reduction, ingore_index)[0]
return ret.view(out_size)


def cross_entropy_gpu(input, target, weight=None, ignore_index=-100, reduction='mean', label_smoothing=0.0):
class_dim = 0 if input.ndim == 1 else 1
if target.dtype.is_floating_point:
return _cross_entropy(input, target, class_dim, weight, reduction, label_smoothing)
return nll_loss(log_softmax(input, class_dim), target, weight, ignore_index, reduction)

def _cross_entropy(inputs, target, target_dim, weight=None, reduction='mean', label_smoothing=0.0):
"""cross entropy inner function"""
class_dim = 0 if inputs.ndim == 1 else 1
n_classes = inputs.shape[class_dim]
inputs = log_softmax(inputs, class_dim)
if label_smoothing > 0.0:
target = target * (1 - label_smoothing) + label_smoothing / n_classes

if weight is None:
weight = core.ones_like(inputs)
elif inputs.ndim != 1:
broadcast_shape = [1 for _ in range(inputs.ndim)]
broadcast_shape[1] = weight.shape[0]
weight = weight.reshape(broadcast_shape)

if reduction == 'mean':
return -(inputs * target * weight).sum() / (inputs.nel / n_classes)
if reduction == 'sum':
return -(inputs * target * weight).sum()
return -(inputs * target * weight).sum(class_dim)


def cross_entropy(input, target, weight=None, ignore_index=-100, reduction='mean', label_smoothing=0.0):
if label_smoothing < 0.0 or label_smoothing > 1.0:
raise ValueError(f"For cross_entropy, label_smoothing must in [0, 1]")
if input.ndim == 0 or input.shape[0] == 0:
raise ValueError(f"For cross_entropy, input don't support 0-dim and shape[0].")
if input.device.type == 'cuda':
return cross_entropy_gpu(input, target, weight, ignore_index, reduction, label_smoothing)
class_dim = 0 if input.ndim == 1 else 1
n_classes = input.shape[class_dim]
input = log_softmax(input, class_dim, dtype=input.dtype)
Expand Down Expand Up @@ -675,10 +708,10 @@ def interpolate(input, size=None, scale_factor=None, mode='nearest', align_corne
)
if input.dim() == 4 and mode == "bicubic":
assert align_corners is not None
if antialias:
return torch._C._nn._upsample_bicubic2d_aa(
input, output_size, align_corners, scale_factors
)
# if antialias:
# return torch._C._nn._upsample_bicubic2d_aa(
# input, output_size, align_corners, scale_factors
# )
return execute(
'upsample_bicubic2d', input, output_size, scale_factors, align_corners
)
Expand Down Expand Up @@ -1146,8 +1179,8 @@ def scaled_dot_product_attention(query, key, value, attn_mask=None, dropout_p=0.
else:
attn_bias = attn_mask + attn_bias

attn_weight = query.float() @ key.transpose(-2, -1).float() * scale_factor
attn_weight += attn_bias.float()
attn_weight = query @ key.transpose(-2, -1) * scale_factor
attn_weight += attn_bias
attn_weight = softmax(attn_weight, dim=-1, dtype=core.float32).to(query.dtype)
attn_weight = dropout(attn_weight, dropout_p, training=True)
return attn_weight @ value
Expand Down
7 changes: 7 additions & 0 deletions mindnlp/core/ops/_inner.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,14 @@ def npu_clear_float_status_v2(status):
def all_finite(inputs):
return execute('all_finite', inputs)

def custom_masked_scatter_vec(input, mask, source):
output = input.clone()
output[mask] = source.flatten() # 关键的一行:向量化赋值
return output

def masked_scatter(input, mask, source):
if input.device.type == 'cuda':
return custom_masked_scatter_vec(input, mask, source)
return execute('masked_scatter', input, mask, source)

__all__ = [
Expand Down
Loading
Loading