Skip to content

Commit 92b1013

Browse files
authored
Remove more top level cudf imports in core (#18862)
Towards #10820 Authors: - Matthew Roeschke (https://github.com/mroeschke) - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: #18862
1 parent b39c83c commit 92b1013

File tree

6 files changed

+45
-27
lines changed

6 files changed

+45
-27
lines changed

docs/cudf/source/conf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -596,6 +596,7 @@ def on_missing_reference(app, env, node, contnode):
596596
("py:obj", "DataFrame.pipe"),
597597
("py:meth", "pyarrow.Table.to_pandas"),
598598
("py:class", "abc.Hashable"),
599+
("py:class", "cp.ndarray"),
599600
("py:class", "pd.DataFrame"),
600601
("py:class", "pandas.core.indexes.frozen.FrozenList"),
601602
("py:class", "pa.Array"),

python/cudf/cudf/core/algorithms.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,20 @@
22
from __future__ import annotations
33

44
import warnings
5+
from typing import TYPE_CHECKING
56

67
import cupy as cp
78
import pyarrow as pa
89

910
import cudf
1011
from cudf.core.column import as_column
11-
from cudf.core.index import Index
12+
from cudf.core.dtypes import CategoricalDtype
1213
from cudf.options import get_option
1314
from cudf.utils.dtypes import can_convert_to_column, cudf_dtype_to_pa_type
1415

16+
if TYPE_CHECKING:
17+
from cudf.core.index import Index
18+
1519

1620
def factorize(
1721
values,
@@ -80,7 +84,6 @@ def factorize(
8084
>>> uniques
8185
Index([<NA>, 1.0, 2.0], dtype='float64')
8286
"""
83-
8487
return_cupy_array = isinstance(values, cp.ndarray)
8588

8689
if not can_convert_to_column(values):
@@ -112,8 +115,10 @@ def factorize(
112115
dtype="int64" if get_option("mode.pandas_compatible") else None,
113116
).values
114117

115-
return labels, cats.values if return_cupy_array else Index._from_column(
116-
cats
118+
# TODO: Avoid accessing Index from the top level namespace
119+
return (
120+
labels,
121+
cats.values if return_cupy_array else cudf.Index._from_column(cats),
117122
)
118123

119124

@@ -218,6 +223,7 @@ def unique(values):
218223
>>> pd.unique(pd.Series([("a", "b"), ("b", "a"), ("a", "c"), ("b", "a")]).values)
219224
array([('a', 'b'), ('b', 'a'), ('a', 'c')], dtype=object)
220225
"""
226+
# TODO: Avoid accessing Index and Series from the top level namespace
221227
if not isinstance(values, (cudf.Series, cudf.Index, cp.ndarray)):
222228
raise ValueError(
223229
"Must pass cudf.Series, cudf.Index, or cupy.ndarray object"
@@ -229,7 +235,7 @@ def unique(values):
229235
return cp.asarray(cudf.Index(values).unique())
230236
if isinstance(values, cudf.Series):
231237
if get_option("mode.pandas_compatible"):
232-
if isinstance(values.dtype, cudf.CategoricalDtype):
238+
if isinstance(values.dtype, CategoricalDtype):
233239
raise NotImplementedError(
234240
"cudf.Categorical is not implemented"
235241
)

python/cudf/cudf/core/reshape.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@
3131
from collections.abc import Hashable
3232

3333
from cudf._typing import DtypeObj
34+
from cudf.core.dataframe import DataFrame
35+
from cudf.core.index import Index
36+
from cudf.core.multiindex import MultiIndex
37+
from cudf.core.series import Series
3438

3539
_AXIS_MAP = {0: 0, 1: 1, "index": 0, "columns": 1}
3640

@@ -114,7 +118,7 @@ def _get_combined_index(indexes, intersect: bool = False, sort=None):
114118

115119

116120
def _normalize_series_and_dataframe(
117-
objs: list[cudf.Series | cudf.DataFrame], axis: Literal[0, 1]
121+
objs: list[Series | DataFrame], axis: Literal[0, 1]
118122
) -> None:
119123
"""Convert any cudf.Series objects in objs to DataFrames in place."""
120124
# Default to naming series by a numerical id if they are not named.
@@ -545,14 +549,14 @@ def concat(
545549

546550

547551
def melt(
548-
frame: cudf.DataFrame,
552+
frame: DataFrame,
549553
id_vars=None,
550554
value_vars=None,
551555
var_name=None,
552556
value_name: Hashable = "value",
553557
col_level=None,
554558
ignore_index: bool = True,
555-
) -> cudf.DataFrame:
559+
) -> DataFrame:
556560
"""Unpivots a DataFrame from wide format to long format,
557561
optionally leaving identifier variables set.
558562
@@ -933,7 +937,9 @@ def _merge_sorted(
933937
if len(objs) < 1:
934938
raise ValueError("objs must be non-empty")
935939

936-
if not all(isinstance(table, cudf.core.frame.Frame) for table in objs):
940+
if not all(
941+
isinstance(table, (cudf.DataFrame, cudf.Series)) for table in objs
942+
):
937943
raise TypeError("Elements of objs must be Frame-like")
938944

939945
if len(objs) == 1:
@@ -1003,9 +1009,9 @@ def _merge_sorted(
10031009

10041010
def _pivot(
10051011
col_accessor: ColumnAccessor,
1006-
index: cudf.Index | cudf.MultiIndex,
1007-
columns: cudf.Index | cudf.MultiIndex,
1008-
) -> cudf.DataFrame:
1012+
index: Index | MultiIndex,
1013+
columns: Index | MultiIndex,
1014+
) -> DataFrame:
10091015
"""
10101016
Reorganize the values of the DataFrame according to the given
10111017
index and columns.
@@ -1059,8 +1065,8 @@ def as_tuple(x):
10591065

10601066

10611067
def pivot(
1062-
data: cudf.DataFrame, columns=None, index=no_default, values=no_default
1063-
) -> cudf.DataFrame:
1068+
data: DataFrame, columns=None, index=no_default, values=no_default
1069+
) -> DataFrame:
10641070
"""
10651071
Return reshaped DataFrame organized by the given index and column values.
10661072

python/cudf/cudf/core/single_column_frame.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
import pyarrow as pa
3131

3232
from cudf._typing import Dtype, NotImplementedType, ScalarLike
33+
from cudf.core.dataframe import DataFrame
34+
from cudf.core.index import Index
3335

3436

3537
class SingleColumnFrame(Frame, NotIterable):
@@ -217,15 +219,15 @@ def tolist(self) -> None:
217219

218220
to_list = tolist
219221

220-
def _to_frame(
221-
self, name: Hashable, index: cudf.Index | None
222-
) -> cudf.DataFrame:
222+
def _to_frame(self, name: Hashable, index: Index | None) -> DataFrame:
223223
"""Helper function for Series.to_frame, Index.to_frame"""
224+
224225
if name is no_default:
225226
col_name = 0 if self.name is None else self.name
226227
else:
227228
col_name = name
228229
ca = ColumnAccessor({col_name: self._column}, verify=False)
230+
# TODO: Avoid accessing DataFrame from the top level namespace
229231
return cudf.DataFrame._from_data(ca, index=index)
230232

231233
@property # type: ignore
@@ -279,7 +281,7 @@ def __cuda_array_interface__(self):
279281
@_performance_tracking
280282
def factorize(
281283
self, sort: bool = False, use_na_sentinel: bool = True
282-
) -> tuple[cupy.ndarray, cudf.Index]:
284+
) -> tuple[cupy.ndarray, Index]:
283285
"""Encode the input values as integer labels.
284286
285287
Parameters
@@ -309,7 +311,8 @@ def factorize(
309311
>>> uniques
310312
Index(['a', 'c'], dtype='object')
311313
"""
312-
return cudf.core.algorithms.factorize(
314+
# TODO: Avoid accessing factorize from the top level namespace
315+
return cudf.factorize(
313316
self,
314317
sort=sort,
315318
use_na_sentinel=use_na_sentinel,
@@ -344,6 +347,7 @@ def _make_operands_for_binop(
344347
Dict[Optional[str], Tuple[ColumnBase, Any, bool, Any]]
345348
The operands to be passed to _colwise_binop.
346349
"""
350+
347351
# Get the appropriate name for output operations involving two objects
348352
# that are Series-like objects. The output shares the lhs's name unless
349353
# the rhs is a _differently_ named Series-like object.
@@ -360,6 +364,7 @@ def _make_operands_for_binop(
360364
if not hasattr(
361365
other, "__cuda_array_interface__"
362366
) and not isinstance(other, cudf.RangeIndex):
367+
# TODO: Avoid accessing RangeIndex from the top level namespace
363368
return NotImplemented
364369

365370
# Non-scalar right operands are valid iff they convert to columns.

python/cudf/cudf/core/tokenize_vocabulary.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import pylibcudf as plc
66

7-
import cudf
7+
from cudf.core.series import Series
88

99

1010
class TokenizeVocabulary:
@@ -17,14 +17,14 @@ class TokenizeVocabulary:
1717
Strings column of vocabulary terms
1818
"""
1919

20-
def __init__(self, vocabulary: cudf.Series) -> None:
20+
def __init__(self, vocabulary: Series) -> None:
2121
self.vocabulary = plc.nvtext.tokenize.TokenizeVocabulary(
2222
vocabulary._column.to_pylibcudf(mode="read")
2323
)
2424

2525
def tokenize(
2626
self, text, delimiter: str = "", default_id: int = -1
27-
) -> cudf.Series:
27+
) -> Series:
2828
"""
2929
Parameters
3030
----------
@@ -46,4 +46,4 @@ def tokenize(
4646
self.vocabulary, delimiter, default_id
4747
)
4848

49-
return cudf.Series._from_column(result)
49+
return Series._from_column(result)

python/cudf/cudf/core/wordpiece_tokenize.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import pylibcudf as plc
66

7-
import cudf
7+
from cudf.core.series import Series
88

99

1010
class WordPieceVocabulary:
@@ -17,12 +17,12 @@ class WordPieceVocabulary:
1717
Strings column of vocabulary terms
1818
"""
1919

20-
def __init__(self, vocabulary: cudf.Series) -> None:
20+
def __init__(self, vocabulary: Series) -> None:
2121
self.vocabulary = plc.nvtext.wordpiece_tokenize.WordPieceVocabulary(
2222
vocabulary._column.to_pylibcudf(mode="read")
2323
)
2424

25-
def tokenize(self, text, max_words_per_row: int = 0) -> cudf.Series:
25+
def tokenize(self, text, max_words_per_row: int = 0) -> Series:
2626
"""
2727
Parameters
2828
----------
@@ -43,4 +43,4 @@ def tokenize(self, text, max_words_per_row: int = 0) -> cudf.Series:
4343
self.vocabulary, max_words_per_row
4444
)
4545

46-
return cudf.Series._from_column(result)
46+
return Series._from_column(result)

0 commit comments

Comments
 (0)