Skip to content

Commit 25051f8

Browse files
committed
delay import of sklearn ijson
1 parent 0fda7bf commit 25051f8

7 files changed

+14
-13
lines changed

azure-pipelines.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ jobs:
2424
- script: pip install -r requirements-dev.txt
2525
displayName: 'Install Requirements dev'
2626
- script: |
27-
ruff .
27+
ruff check .
2828
displayName: 'Ruff'
2929
- script: |
3030
black --diff .
@@ -76,7 +76,7 @@ jobs:
7676
- script: pip install --pre --extra-index https://pypi.anaconda.org/scipy-wheels-nightly/simple scikit-learn
7777
displayName: 'Install scikit-learn nightly'
7878
- script: |
79-
ruff .
79+
ruff check .
8080
displayName: 'Ruff'
8181
- script: |
8282
rstcheck -r ./_doc ./pandas_streaming
@@ -117,7 +117,7 @@ jobs:
117117
- script: pip install -r requirements-dev.txt
118118
displayName: 'Install Requirements dev'
119119
- script: |
120-
ruff .
120+
ruff check .
121121
displayName: 'Ruff'
122122
- script: |
123123
rstcheck -r ./_doc ./pandas_streaming

pandas_streaming/df/connex_split.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
from logging import getLogger
33
import pandas
44
import numpy
5-
from sklearn.model_selection import train_test_split
65
from .dataframe_helpers import dataframe_shuffle
76

87
logger = getLogger("pandas-streaming")
@@ -61,6 +60,7 @@ def train_test_split_weights(
6160
raise ValueError(
6261
f"test_size={test_size} or train_size={train_size} cannot be null (1)."
6362
)
63+
from sklearn.model_selection import train_test_split
6464
return train_test_split(
6565
df, test_size=test_size, train_size=train_size, random_state=random_state
6666
)

pandas_streaming/df/dataframe.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -640,10 +640,10 @@ def _reservoir_sampling(
640640
if len(indices) < n:
641641
indices.append((i, ir))
642642
else:
643-
x = nrandom.random() # pylint: disable=E1101
643+
x = nrandom.random()
644644
if x * n < (seen - n):
645645
k = nrandom.randint(0, len(indices) - 1)
646-
indices[k] = (i, ir) # pylint: disable=E1126
646+
indices[k] = (i, ir)
647647
indices = set(indices)
648648

649649
def reservoir_iterate(sdf, indices, chunksize):

pandas_streaming/df/dataframe_helpers.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -155,13 +155,13 @@ def hash_floatl(c):
155155
} # pylint: disable=R1721
156156
for c in cols:
157157
t = coltype[c]
158-
if t == int:
158+
if t == int: # noqa: E721
159159
df[c] = df[c].apply(hash_intl)
160160
elif t == numpy.int64:
161161
df[c] = df[c].apply(lambda x: numpy.int64(hash_intl(x)))
162-
elif t == float:
162+
elif t == float: # noqa: E721
163163
df[c] = df[c].apply(hash_floatl)
164-
elif t == object:
164+
elif t == object: # noqa: E721
165165
df[c] = df[c].apply(hash_strl)
166166
else:
167167
raise NotImplementedError( # pragma: no cover

pandas_streaming/df/dataframe_io_helpers.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from ujson import dumps
66
except ImportError: # pragma: no cover
77
from json import dumps
8-
import ijson
98

109

1110
class JsonPerRowsStream:
@@ -257,6 +256,8 @@ def enumerate_json_items(
257256
else:
258257
if hasattr(filename, "seek"):
259258
filename.seek(0)
259+
import ijson
260+
260261
parser = ijson.parse(filename)
261262
current = None
262263
curkey = None

pandas_streaming/df/dataframe_split.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def sklearn_train_test_split(
4545
)
4646
with warnings.catch_warnings():
4747
warnings.filterwarnings("ignore", category=ImportWarning)
48-
from sklearn.model_selection import train_test_split # pylint: disable=C0415
48+
from sklearn.model_selection import train_test_split
4949

5050
opts = ["test_size", "train_size", "random_state", "shuffle", "stratify"]
5151
split_ops = {}

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,11 @@ exclude = [
2525
# Same as Black.
2626
line-length = 88
2727

28-
[tool.ruff.mccabe]
28+
[tool.ruff.lint.mccabe]
2929
# Unlike Flake8, default to a complexity level of 10.
3030
max-complexity = 10
3131

32-
[tool.ruff.per-file-ignores]
32+
[tool.ruff.lint.per-file-ignores]
3333
"_doc/examples/plot_first_example.py" = ["E402", "F811"]
3434
"_unittests/ut_df/test_dataframe_io_helpers.py" = ["E501"]
3535
"pandas_streaming/data/__init__.py" = ["F401"]

0 commit comments

Comments
 (0)