Skip to content

Commit 9753f32

Browse files
authored
Delay import of sklearn ijson (#40)
* delay import of sklearn ijson * ruff * remove rstcheck * remove circecli * complex * fix documentation
1 parent 0fda7bf commit 9753f32

File tree

11 files changed

+16
-134
lines changed

11 files changed

+16
-134
lines changed

.circleci/config.yml

Lines changed: 0 additions & 70 deletions
This file was deleted.

.github/workflows/documentation.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ jobs:
7777
grep ERROR doc.txt
7878
exit 1
7979
fi
80-
if [[ $(grep WARNING doc.txt) ]]; then
80+
if [[ $(grep WARNING doc.txt | grep -v 'std:term:y') ]]; then
8181
echo "Documentation produces warnings."
8282
grep WARNING doc.txt
8383
exit 1

.github/workflows/rstcheck.yml

Lines changed: 0 additions & 27 deletions
This file was deleted.

azure-pipelines.yml

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ jobs:
2424
- script: pip install -r requirements-dev.txt
2525
displayName: 'Install Requirements dev'
2626
- script: |
27-
ruff .
27+
ruff check .
2828
displayName: 'Ruff'
2929
- script: |
3030
black --diff .
@@ -76,11 +76,8 @@ jobs:
7676
- script: pip install --pre --extra-index https://pypi.anaconda.org/scipy-wheels-nightly/simple scikit-learn
7777
displayName: 'Install scikit-learn nightly'
7878
- script: |
79-
ruff .
79+
ruff check .
8080
displayName: 'Ruff'
81-
- script: |
82-
rstcheck -r ./_doc ./pandas_streaming
83-
displayName: 'rstcheck'
8481
- script: |
8582
black --diff .
8683
displayName: 'Black'
@@ -117,11 +114,8 @@ jobs:
117114
- script: pip install -r requirements-dev.txt
118115
displayName: 'Install Requirements dev'
119116
- script: |
120-
ruff .
117+
ruff check .
121118
displayName: 'Ruff'
122-
- script: |
123-
rstcheck -r ./_doc ./pandas_streaming
124-
displayName: 'rstcheck'
125119
- script: |
126120
black --diff .
127121
displayName: 'Black'

pandas_streaming/df/connex_split.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
from logging import getLogger
33
import pandas
44
import numpy
5-
from sklearn.model_selection import train_test_split
65
from .dataframe_helpers import dataframe_shuffle
76

87
logger = getLogger("pandas-streaming")
@@ -61,6 +60,8 @@ def train_test_split_weights(
6160
raise ValueError(
6261
f"test_size={test_size} or train_size={train_size} cannot be null (1)."
6362
)
63+
from sklearn.model_selection import train_test_split
64+
6465
return train_test_split(
6566
df, test_size=test_size, train_size=train_size, random_state=random_state
6667
)

pandas_streaming/df/dataframe.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -640,10 +640,10 @@ def _reservoir_sampling(
640640
if len(indices) < n:
641641
indices.append((i, ir))
642642
else:
643-
x = nrandom.random() # pylint: disable=E1101
643+
x = nrandom.random()
644644
if x * n < (seen - n):
645645
k = nrandom.randint(0, len(indices) - 1)
646-
indices[k] = (i, ir) # pylint: disable=E1126
646+
indices[k] = (i, ir)
647647
indices = set(indices)
648648

649649
def reservoir_iterate(sdf, indices, chunksize):

pandas_streaming/df/dataframe_helpers.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,9 @@ def numpy_types():
2525
numpy.uint16,
2626
numpy.uint32,
2727
numpy.uint64,
28-
numpy.float_,
2928
numpy.float16,
3029
numpy.float32,
3130
numpy.float64,
32-
numpy.complex_,
3331
numpy.complex64,
3432
numpy.complex128,
3533
]
@@ -155,13 +153,13 @@ def hash_floatl(c):
155153
} # pylint: disable=R1721
156154
for c in cols:
157155
t = coltype[c]
158-
if t == int:
156+
if t == int: # noqa: E721
159157
df[c] = df[c].apply(hash_intl)
160158
elif t == numpy.int64:
161159
df[c] = df[c].apply(lambda x: numpy.int64(hash_intl(x)))
162-
elif t == float:
160+
elif t == float: # noqa: E721
163161
df[c] = df[c].apply(hash_floatl)
164-
elif t == object:
162+
elif t == object: # noqa: E721
165163
df[c] = df[c].apply(hash_strl)
166164
else:
167165
raise NotImplementedError( # pragma: no cover

pandas_streaming/df/dataframe_io_helpers.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from ujson import dumps
66
except ImportError: # pragma: no cover
77
from json import dumps
8-
import ijson
98

109

1110
class JsonPerRowsStream:
@@ -257,6 +256,8 @@ def enumerate_json_items(
257256
else:
258257
if hasattr(filename, "seek"):
259258
filename.seek(0)
259+
import ijson
260+
260261
parser = ijson.parse(filename)
261262
current = None
262263
curkey = None

pandas_streaming/df/dataframe_split.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def sklearn_train_test_split(
4545
)
4646
with warnings.catch_warnings():
4747
warnings.filterwarnings("ignore", category=ImportWarning)
48-
from sklearn.model_selection import train_test_split # pylint: disable=C0415
48+
from sklearn.model_selection import train_test_split
4949

5050
opts = ["test_size", "train_size", "random_state", "shuffle", "stratify"]
5151
split_ops = {}

pyproject.toml

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,3 @@
1-
[tool.rstcheck]
2-
report_level = "INFO"
3-
ignore_directives = [
4-
"autoclass",
5-
"autofunction",
6-
"automodule",
7-
"exreflist",
8-
"gdot",
9-
"image-sg",
10-
"pr",
11-
"runpython",
12-
]
13-
ignore_roles = ["epkg"]
14-
151
[tool.ruff]
162

173
# Exclude a variety of commonly ignored directories.
@@ -25,11 +11,11 @@ exclude = [
2511
# Same as Black.
2612
line-length = 88
2713

28-
[tool.ruff.mccabe]
14+
[tool.ruff.lint.mccabe]
2915
# Unlike Flake8, default to a complexity level of 10.
3016
max-complexity = 10
3117

32-
[tool.ruff.per-file-ignores]
18+
[tool.ruff.lint.per-file-ignores]
3319
"_doc/examples/plot_first_example.py" = ["E402", "F811"]
3420
"_unittests/ut_df/test_dataframe_io_helpers.py" = ["E501"]
3521
"pandas_streaming/data/__init__.py" = ["F401"]

requirements-dev.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ pycodestyle
1414
pylint>=2.14.0
1515
pytest
1616
pytest-cov
17-
rstcheck[sphinx,toml]
1817
ruff
1918
scikit-learn
2019
scipy

0 commit comments

Comments
 (0)