Skip to content

API: Replace na_action parameter in Series/DataFrame/Index.map by the standard skipna #61530

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 19 additions & 7 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -171,14 +171,26 @@ and ``na_action="ignore"`` did not work correctly for any :class:`.ExtensionArra

*New behavior*:

.. ipython:: python
.. code-block:: ipython

ser = pd.Series(["a", "b", np.nan], dtype="category")
ser.map(str.upper, na_action="ignore")
df = pd.DataFrame(ser)
df.map(str.upper, na_action="ignore")
idx = pd.Index(ser)
idx.map(str.upper, na_action="ignore")
In [1]: ser = pd.Series(["a", "b", np.nan], dtype="category")
In [2]: ser.map(str.upper, na_action="ignore")
Out[2]:
0 A
1 B
2 NaN
dtype: category
Categories (2, object): ['A', 'B']
In [3]: df = pd.DataFrame(ser)
In [4]: df.map(str.upper, na_action="ignore")
Out[4]:
0
0 A
1 B
2 NaN
In [5]: idx = pd.Index(ser)
In [6]: idx.map(str.upper, na_action="ignore")
Out[6]: CategoricalIndex(['A', 'B', nan], categories=['A', 'B'], ordered=False, dtype='category')

Also, note that :meth:`Categorical.map` implicitly has had its ``na_action`` set to ``"ignore"`` by default.
This has been deprecated and the default for :meth:`Categorical.map` will change
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ Other enhancements
^^^^^^^^^^^^^^^^^^
- :class:`pandas.api.typing.FrozenList` is available for typing the outputs of :attr:`MultiIndex.names`, :attr:`MultiIndex.codes` and :attr:`MultiIndex.levels` (:issue:`58237`)
- :class:`pandas.api.typing.SASReader` is available for typing the output of :func:`read_sas` (:issue:`55689`)
- :meth:`Series.map`, :meth:`DataFrame.map` and :meth:`Index.map` got a new ``skipna`` boolean parameter that replaces the existing ``na_action`` parameter (:issue:`61128`)
- Added :meth:`.Styler.to_typst` to write Styler objects to file, buffer or string in Typst format (:issue:`57617`)
- Added missing :meth:`pandas.Series.info` to API reference (:issue:`60926`)
- :class:`pandas.api.typing.NoDefault` is available for typing ``no_default``
Expand Down
8 changes: 0 additions & 8 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,14 +402,6 @@ def nselect_method(request):
return request.param


@pytest.fixture(params=[None, "ignore"])
def na_action(request):
"""
Fixture for 'na_action' argument in map.
"""
return request.param


Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The skipna fixture already exists.

@pytest.fixture(params=[True, False])
def ascending(request):
"""
Expand Down
32 changes: 24 additions & 8 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -1631,7 +1631,8 @@ def union_with_duplicates(
def map_array(
arr: ArrayLike,
mapper,
na_action: Literal["ignore"] | None = None,
na_action: Literal["ignore"] | None | lib.NoDefault = lib.no_default,
skipna: bool = False,
) -> np.ndarray | ExtensionArray | Index:
"""
Map values using an input mapping or function.
Expand All @@ -1640,8 +1641,13 @@ def map_array(
----------
mapper : function, dict, or Series
Mapping correspondence.
na_action : {None, 'ignore'}, default None
If 'ignore', propagate NA values, without passing them to the
na_action : {None, 'ignore'}, default None
If 'ignore', propagate NaN values, without passing them to func.

.. deprecated:: 3.0.0
Use ``skipna`` instead.
skipna : bool, default False
If ``True``, propagate NA values, without passing them to the
mapping correspondence.

Returns
Expand All @@ -1653,9 +1659,19 @@ def map_array(
"""
from pandas import Index

if na_action not in (None, "ignore"):
msg = f"na_action must either be 'ignore' or None, {na_action} was passed"
raise ValueError(msg)
if na_action != lib.no_default:
warnings.warn(
"The ``na_action`` parameter has been deprecated and it will be "
"removed in a future version of pandas. Use ``skipna`` instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
if na_action == "ignore":
skipna = True
elif na_action not in (None, "ignore"):
raise ValueError(
f"na_action must either be 'ignore' or None, {na_action!r} was passed"
)

# we can fastpath dict/Series to an efficient map
# as we know that we are not going to have to yield
Expand Down Expand Up @@ -1690,7 +1706,7 @@ def map_array(
mapper = Series(mapper)

if isinstance(mapper, ABCSeries):
if na_action == "ignore":
if skipna:
mapper = mapper[mapper.index.notna()]

# Since values were input this means we came from either
Expand All @@ -1705,7 +1721,7 @@ def map_array(

# we must convert to python types
values = arr.astype(object, copy=False)
if na_action is None:
if not skipna:
return lib.map_infer(values, mapper)
else:
return lib.map_infer_mask(values, mapper, mask=isna(values).view(np.uint8))
3 changes: 1 addition & 2 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,7 @@ def map(
executor the user wants to use.
skip_na : bool
Whether the function should be called for missing values or not.
This is specified by the pandas user as ``map(na_action=None)``
or ``map(na_action='ignore')``.
This is specified by the pandas user as ``map(skipna=)``.
"""

@staticmethod
Expand Down
13 changes: 10 additions & 3 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1477,11 +1477,18 @@ def to_numpy(
result[~mask] = data[~mask]._pa_array.to_numpy()
return result

def map(self, mapper, na_action: Literal["ignore"] | None = None):
def map(
self,
mapper,
na_action: Literal["ignore"] | None | lib.NoDefault = lib.no_default,
skipna: bool = False,
):
if is_numeric_dtype(self.dtype):
return map_array(self.to_numpy(), mapper, na_action=na_action)
return map_array(
self.to_numpy(), mapper, na_action=na_action, skipna=skipna
)
else:
return super().map(mapper, na_action)
return super().map(mapper, na_action=na_action, skipna=skipna)

@doc(ExtensionArray.duplicated)
def duplicated(
Expand Down
32 changes: 28 additions & 4 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2541,7 +2541,12 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):

return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs)

def map(self, mapper, na_action: Literal["ignore"] | None = None):
def map(
self,
mapper,
na_action: Literal["ignore"] | None | lib.NoDefault = lib.no_default,
skipna: bool = False,
):
"""
Map values using an input mapping or function.

Expand All @@ -2550,8 +2555,13 @@ def map(self, mapper, na_action: Literal["ignore"] | None = None):
mapper : function, dict, or Series
Mapping correspondence.
na_action : {None, 'ignore'}, default None
If 'ignore', propagate NA values, without passing them to the
mapping correspondence. If 'ignore' is not supported, a
If 'ignore', propagate NaN values, without passing them to func.

.. deprecated:: 3.0.0
Use ``skipna`` instead.
skipna : bool, default False
If ``True``, propagate NA values, without passing them to the
mapping correspondence. If ``True`` is not supported, a
``NotImplementedError`` should be raised.

Returns
Expand All @@ -2561,7 +2571,21 @@ def map(self, mapper, na_action: Literal["ignore"] | None = None):
If the function returns a tuple with more than one element
a MultiIndex will be returned.
"""
return map_array(self, mapper, na_action=na_action)
if na_action != lib.no_default:
warnings.warn(
"The ``na_action`` parameter has been deprecated and it will be "
"removed in a future version of pandas. Use ``skipna`` instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
if na_action == "ignore":
skipna = True
elif na_action not in (None, "ignore"):
raise ValueError(
"na_action must either be 'ignore' or None, "
f"{na_action!r} was passed"
)
return map_array(self, mapper, skipna=skipna)

# ------------------------------------------------------------------------
# GroupBy Methods
Expand Down
39 changes: 31 additions & 8 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
cast,
overload,
)
import warnings

import numpy as np

Expand All @@ -22,6 +23,7 @@
)
from pandas._libs.arrays import NDArrayBacked
from pandas.compat.numpy import function as nv
from pandas.util._exceptions import find_stack_level
from pandas.util._validators import validate_bool_kwarg

from pandas.core.dtypes.cast import (
Expand Down Expand Up @@ -1497,7 +1499,8 @@ def remove_unused_categories(self) -> Self:
def map(
self,
mapper,
na_action: Literal["ignore"] | None = None,
na_action: Literal["ignore"] | None | lib.NoDefault = lib.no_default,
skipna: bool = False,
):
"""
Map categories using an input mapping or function.
Expand All @@ -1516,7 +1519,12 @@ def map(
mapper : function, dict, or Series
Mapping correspondence.
na_action : {None, 'ignore'}, default None
If 'ignore', propagate NaN values, without passing them to the
If 'ignore', propagate NaN values, without passing them to func.

.. deprecated:: 3.0.0
Use ``skipna`` instead.
skipna : bool, default False
If ``True``, propagate NA values, without passing them to the
mapping correspondence.

Returns
Expand All @@ -1541,10 +1549,10 @@ def map(
>>> cat
['a', 'b', 'c']
Categories (3, object): ['a', 'b', 'c']
>>> cat.map(lambda x: x.upper(), na_action=None)
>>> cat.map(lambda x: x.upper(), skipna=False)
['A', 'B', 'C']
Categories (3, object): ['A', 'B', 'C']
>>> cat.map({"a": "first", "b": "second", "c": "third"}, na_action=None)
>>> cat.map({"a": "first", "b": "second", "c": "third"}, skipna=False)
['first', 'second', 'third']
Categories (3, object): ['first', 'second', 'third']

Expand All @@ -1555,29 +1563,44 @@ def map(
>>> cat
['a', 'b', 'c']
Categories (3, object): ['a' < 'b' < 'c']
>>> cat.map({"a": 3, "b": 2, "c": 1}, na_action=None)
>>> cat.map({"a": 3, "b": 2, "c": 1}, skipna=False)
[3, 2, 1]
Categories (3, int64): [3 < 2 < 1]

If the mapping is not one-to-one an :class:`~pandas.Index` is returned:

>>> cat.map({"a": "first", "b": "second", "c": "first"}, na_action=None)
>>> cat.map({"a": "first", "b": "second", "c": "first"}, skipna=False)
Index(['first', 'second', 'first'], dtype='object')

If a `dict` is used, all unmapped categories are mapped to `NaN` and
the result is an :class:`~pandas.Index`:

>>> cat.map({"a": "first", "b": "second"}, na_action=None)
>>> cat.map({"a": "first", "b": "second"}, skipna=False)
Index(['first', 'second', nan], dtype='object')
"""
assert callable(mapper) or is_dict_like(mapper)

if na_action != lib.no_default:
warnings.warn(
"The ``na_action`` parameter has been deprecated and it will be "
"removed in a future version of pandas. Use ``skipna`` instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
if na_action == "ignore":
skipna = True
elif na_action not in (None, "ignore"):
raise ValueError(
"na_action must either be 'ignore' or None, "
f"{na_action!r} was passed"
)

new_categories = self.categories.map(mapper)

has_nans = np.any(self._codes == -1)

na_val = np.nan
if na_action is None and has_nans:
if not skipna and has_nans:
na_val = mapper(np.nan) if callable(mapper) else mapper.get(np.nan, np.nan)

if new_categories.is_unique and not new_categories.hasnans and na_val is np.nan:
Expand Down
9 changes: 7 additions & 2 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -743,10 +743,15 @@ def _unbox(self, other) -> np.int64 | np.datetime64 | np.timedelta64 | np.ndarra
# pandas assumes they're there.

@ravel_compat
def map(self, mapper, na_action: Literal["ignore"] | None = None):
def map(
self,
mapper,
na_action: Literal["ignore"] | None | lib.NoDefault = lib.no_default,
skipna: bool = False,
):
from pandas import Index

result = map_array(self, mapper, na_action=na_action)
result = map_array(self, mapper, na_action=na_action, skipna=skipna)
result = Index(result)

if isinstance(result, ABCMultiIndex):
Expand Down
9 changes: 7 additions & 2 deletions pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -1324,8 +1324,13 @@ def max(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):
)
return self._wrap_reduction_result("max", result, skipna=skipna, axis=axis)

def map(self, mapper, na_action: Literal["ignore"] | None = None):
return map_array(self.to_numpy(), mapper, na_action=na_action)
def map(
self,
mapper,
na_action: Literal["ignore"] | None | lib.NoDefault = lib.no_default,
skipna: bool = False,
):
return map_array(self.to_numpy(), mapper, na_action=na_action, skipna=skipna)

@overload
def any(
Expand Down
Loading
Loading