Skip to content

Commit ad16d96

Browse files
committed
lint
1 parent 9753f32 commit ad16d96

14 files changed

+86
-79
lines changed

_doc/conf.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
# -*- coding: utf-8 -*-
21
import sys
32
import os
43
from sphinx_runpython.github_link import make_linkcode_resolve

_doc/examples/first_step.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""
22
First steps with pandas_streaming
33
=================================
4-
4+
55
A few difference between :epkg:`pandas` and *pandas_streaming*.
66
77
pandas to pandas_streaming

_unittests/ut_df/test_connex_split.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ def test_split_connex2(self):
176176
for k, v in sorted(stats[0].items()):
177177
rows.append(f"{k}={v}")
178178
raise AssertionError(
179-
"Non empty intersection {0} & {1}\n{2}\n{3}\n{4}".format(
179+
"Non empty intersection {0} & {1}\n{2}\n{3}\n{4}".format( # noqa: UP030
180180
s1, s2, train, test, "\n".join(rows)
181181
)
182182
)
@@ -212,7 +212,7 @@ def test_split_connex_missing(self):
212212
for k, v in sorted(stats[0].items()):
213213
rows.append(f"{k}={v}")
214214
raise AssertionError(
215-
"Non empty intersection {0} & {1}\n{2}\n{3}\n{4}".format(
215+
"Non empty intersection {0} & {1}\n{2}\n{3}\n{4}".format( # noqa: UP030
216216
s1, s2, train, test, "\n".join(rows)
217217
)
218218
)

_unittests/ut_df/test_connex_split_big.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
# -*- coding: utf-8 -*-
21
import os
32
import unittest
43
from collections import Counter

_unittests/ut_df/test_connex_split_cat.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
# -*- coding: utf-8 -*-
2-
31
import unittest
42
from collections import Counter
53
import pandas

_unittests/ut_df/test_streaming_dataframe.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ def test_train_test_split_streaming_tiny(self):
223223

224224
def test_train_test_split_streaming_strat(self):
225225
sdf = dummy_streaming_dataframe(
226-
100, asfloat=True, tify=["t1" if i % 3 else "t0" for i in range(0, 100)]
226+
100, asfloat=True, tify=["t1" if i % 3 else "t0" for i in range(100)]
227227
)
228228
trsdf, tesdf = sdf.train_test_split(
229229
streaming=True, unique_rows=True, stratify="tify"
@@ -324,9 +324,9 @@ def test_concatv(self):
324324
self.assertEqualDataFrame(m1.to_dataframe(), df)
325325
m1 = sdf20.concat(df30, axis=0)
326326
self.assertEqualDataFrame(m1.to_dataframe(), df)
327-
m1 = sdf20.concat(map(lambda x: x, [df30]), axis=0)
327+
m1 = sdf20.concat(map(lambda x: x, [df30]), axis=0) # noqa: C417
328328
self.assertEqualDataFrame(m1.to_dataframe(), df)
329-
m1 = sdf20.concat(map(lambda x: x, [df30]), axis=0)
329+
m1 = sdf20.concat(map(lambda x: x, [df30]), axis=0) # noqa: C417
330330
self.assertEqualDataFrame(m1.to_dataframe(), df)
331331

332332
df20["cint"] = df20["cint"].astype(float)
@@ -490,7 +490,7 @@ def test_read_csv_names(self):
490490
def test_add_column(self):
491491
df = pandas.DataFrame(data=dict(X=[4.5, 6, 7], Y=["a", "b", "c"]))
492492
sdf = StreamingDataFrame.read_df(df)
493-
sdf2 = sdf.add_column("d", lambda row: int(1))
493+
sdf2 = sdf.add_column("d", lambda _row: 1)
494494
df2 = sdf2.to_dataframe()
495495
df["d"] = 1
496496
self.assertEqualDataFrame(df, df2)

pandas_streaming/data/dummy.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,12 @@ def dummy_streaming_dataframe(n, chunksize=10, asfloat=False, **cols):
1616
if asfloat:
1717
df = DataFrame(
1818
dict(
19-
cfloat=[_ + 0.1 for _ in range(0, n)],
20-
cstr=[f"s{i}" for i in range(0, n)],
19+
cfloat=[_ + 0.1 for _ in range(n)],
20+
cstr=[f"s{i}" for i in range(n)],
2121
)
2222
)
2323
else:
24-
df = DataFrame(
25-
dict(cint=list(range(0, n)), cstr=[f"s{i}" for i in range(0, n)])
26-
)
24+
df = DataFrame(dict(cint=list(range(n)), cstr=[f"s{i}" for i in range(n)]))
2725
for k, v in cols.items():
2826
df[k] = v
2927
return StreamingDataFrame.read_df(df, chunksize=chunksize)

pandas_streaming/df/connex_split.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@ class ImbalancedSplitException(Exception):
1212
Raised when an imbalanced split is detected.
1313
"""
1414

15-
pass
16-
1715

1816
def train_test_split_weights(
1917
df,
@@ -72,7 +70,7 @@ def train_test_split_weights(
7270
weights = list(df[weights])
7371
if len(weights) != df.shape[0]:
7472
raise ValueError(
75-
"Dimension mismatch between weights and dataframe "
73+
"Dimension mismatch between weights and dataframe " # noqa: UP030
7674
"{0} != {1}".format(df.shape[0], len(weights))
7775
)
7876

@@ -97,7 +95,7 @@ def train_test_split_weights(
9795
test_ids = []
9896
test_weights = 0
9997
train_weights = 0
100-
for i in range(0, df.shape[0]):
98+
for i in range(df.shape[0]):
10199
w = weights[i]
102100
if balance == 0:
103101
h = randint(0, 1)
@@ -116,7 +114,7 @@ def train_test_split_weights(
116114
r = abs(train_weights - test_weights) / (1.0 * (train_weights + test_weights))
117115
if r >= fail_imbalanced:
118116
raise ImbalancedSplitException( # pragma: no cover
119-
"Split is imbalanced: train_weights={0} test_weights={1} r={2}."
117+
"Split is imbalanced: train_weights={0} test_weights={1} r={2}." # noqa: UP030
120118
"".format(train_weights, test_weights, r)
121119
)
122120

pandas_streaming/df/dataframe.py

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,6 @@ class StreamingDataFrameSchemaError(Exception):
2323
Reveals an issue with inconsistant schemas.
2424
"""
2525

26-
pass
27-
2826

2927
class StreamingDataFrame:
3028
"""
@@ -273,9 +271,11 @@ def localf(a0=args[0]):
273271
**kwargs_create,
274272
)
275273

276-
def fct1(st=st, args=args, chunksize=chunksize, kw=kwargs.copy()):
274+
def fct1(
275+
st=st, args=args, chunksize=chunksize, kw=kwargs.copy() # noqa: B008
276+
):
277277
st.seek(0)
278-
for r in pandas.read_json(
278+
for r in pandas.read_json( # noqa: UP028
279279
st, *args, chunksize=chunksize, nrows=chunksize, lines=True, **kw
280280
):
281281
yield r
@@ -293,8 +293,8 @@ def fct1(st=st, args=args, chunksize=chunksize, kw=kwargs.copy()):
293293
**kwargs_create,
294294
)
295295

296-
def fct2(args=args, chunksize=chunksize, kw=kwargs.copy()):
297-
for r in pandas.read_json(
296+
def fct2(args=args, chunksize=chunksize, kw=kwargs.copy()): # noqa: B008
297+
for r in pandas.read_json( # noqa: UP028
298298
*args, chunksize=chunksize, nrows=chunksize, **kw
299299
):
300300
yield r
@@ -318,10 +318,10 @@ def fct2(args=args, chunksize=chunksize, kw=kwargs.copy()):
318318
**kwargs_create,
319319
)
320320

321-
def fct3(st=st, args=args, chunksize=chunksize, kw=kwargs.copy()):
321+
def fct3(st=st, args=args, chunksize=chunksize, kw=kwargs.copy()): # noqa: B008
322322
if hasattr(st, "seek"):
323323
st.seek(0)
324-
for r in pandas.read_json(
324+
for r in pandas.read_json( # noqa: UP028
325325
st, *args, chunksize=chunksize, nrows=chunksize, lines=True, **kw
326326
):
327327
yield r
@@ -438,7 +438,7 @@ def __iter__(self):
438438
elif self.check_schema:
439439
if list(it.columns) != sch[0]: # pylint: disable=E1136
440440
raise StreamingDataFrameSchemaError( # pragma: no cover
441-
"Column names are different after row {0}\nFirst chunk: {1}"
441+
"Column names are different after row {0}\nFirst chunk: {1}" # noqa: UP030
442442
"\nCurrent chunk: {2}".format(rows, sch[0], list(it.columns))
443443
) # pylint: disable=E1136
444444
if list(it.dtypes) != sch[1]: # pylint: disable=E1136
@@ -454,7 +454,7 @@ def __iter__(self):
454454
errdf = errdf[errdf["diff"]]
455455
errdf.to_csv(tdf, sep=",", index=False)
456456
raise StreamingDataFrameSchemaError(
457-
"Column types are different after row {0}. You may use option "
457+
"Column types are different after row {0}. You may use option " # noqa: UP030
458458
'dtype={{"column_name": str}} to force the type on this column.'
459459
"\n---\n{1}".format(rows, tdf.getvalue())
460460
)
@@ -502,9 +502,7 @@ def to_csv(self, path_or_buf=None, **kwargs) -> "StreamingDataFrame":
502502
st = StringIO()
503503
close = False
504504
elif isinstance(path_or_buf, str):
505-
st = open( # pylint: disable=R1732
506-
path_or_buf, "w", encoding=kwargs.get("encoding")
507-
)
505+
st = open(path_or_buf, "w", encoding=kwargs.get("encoding")) # noqa: SIM115
508506
close = True
509507
else:
510508
st = path_or_buf
@@ -537,7 +535,7 @@ def iterrows(self):
537535
See :epkg:`pandas:DataFrame:iterrows`.
538536
"""
539537
for df in self:
540-
for it in df.iterrows():
538+
for it in df.iterrows(): # noqa: UP028
541539
yield it
542540

543541
def head(self, n=5) -> pandas.DataFrame:
@@ -579,7 +577,8 @@ def where(self, *args, **kwargs) -> "StreamingDataFrame":
579577
"""
580578
kwargs["inplace"] = False
581579
return StreamingDataFrame(
582-
lambda: map(lambda df: df.where(*args, **kwargs), self), **self.get_kwargs()
580+
lambda: map(lambda df: df.where(*args, **kwargs), self), # noqa: C417
581+
**self.get_kwargs(),
583582
)
584583

585584
def sample(self, reservoir=False, cache=False, **kwargs) -> "StreamingDataFrame":
@@ -608,7 +607,7 @@ def sample(self, reservoir=False, cache=False, **kwargs) -> "StreamingDataFrame"
608607
df = sdf.to_df()
609608
return StreamingDataFrame.read_df(df, chunksize=df.shape[0])
610609
return StreamingDataFrame(
611-
lambda: map(lambda df: df.sample(**kwargs), self),
610+
lambda: map(lambda df: df.sample(**kwargs), self), # noqa: C417
612611
**self.get_kwargs(),
613612
stable=False,
614613
)
@@ -684,7 +683,7 @@ def drop(
684683
if inplace:
685684
raise NotImplementedError(f"drop is not implemented for inplace={inplace}.")
686685
return StreamingDataFrame(
687-
lambda: map(
686+
lambda: map( # noqa: C417
688687
lambda df: df.drop(
689688
labels,
690689
axis=axis,
@@ -706,7 +705,8 @@ def apply(self, *args, **kwargs) -> "StreamingDataFrame":
706705
<pandas_streaming.df.dataframe.StreamingDataFrame>`.
707706
"""
708707
return StreamingDataFrame(
709-
lambda: map(lambda df: df.apply(*args, **kwargs), self), **self.get_kwargs()
708+
lambda: map(lambda df: df.apply(*args, **kwargs), self), # noqa: C417
709+
**self.get_kwargs(),
710710
)
711711

712712
def applymap(self, *args, **kwargs) -> "StreamingDataFrame":
@@ -716,7 +716,7 @@ def applymap(self, *args, **kwargs) -> "StreamingDataFrame":
716716
<pandas_streaming.df.dataframe.StreamingDataFrame>`.
717717
"""
718718
return StreamingDataFrame(
719-
lambda: map(lambda df: df.applymap(*args, **kwargs), self),
719+
lambda: map(lambda df: df.applymap(*args, **kwargs), self), # noqa: C417
720720
**self.get_kwargs(),
721721
)
722722

@@ -773,7 +773,7 @@ def _concath(self, others):
773773
others = [others]
774774

775775
def iterateh(self, others):
776-
cols = tuple([self] + others)
776+
cols = (self, *others)
777777
for dfs in zip(*cols):
778778
nrows = [_.shape[0] for _ in dfs]
779779
if min(nrows) != max(nrows):
@@ -1382,7 +1382,7 @@ def __init__(self, iter_creation, check_schema=True, stable=True):
13821382
)
13831383
if len(self.columns) != 1:
13841384
raise RuntimeError( # pragma: no cover
1385-
f"A series can contain only one column not " f"{len(self.columns)!r}."
1385+
f"A series can contain only one column not {len(self.columns)!r}."
13861386
)
13871387

13881388
def apply(self, *args, **kwargs) -> "StreamingDataFrame":
@@ -1391,7 +1391,8 @@ def apply(self, *args, **kwargs) -> "StreamingDataFrame":
13911391
This function returns a @see cl StreamingSeries.
13921392
"""
13931393
return StreamingSeries(
1394-
lambda: map(lambda df: df.apply(*args, **kwargs), self), **self.get_kwargs()
1394+
lambda: map(lambda df: df.apply(*args, **kwargs), self), # noqa: C417
1395+
**self.get_kwargs(),
13951396
)
13961397

13971398
def __add__(self, value):

pandas_streaming/df/dataframe_helpers.py

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -148,9 +148,7 @@ def hash_floatl(c):
148148
"hash float"
149149
return hash_float(c, hash_length)
150150

151-
coltype = {
152-
n: t for n, t in zip(df.columns, df.dtypes) # pylint: disable=R1721
153-
} # pylint: disable=R1721
151+
coltype = dict(zip(df.columns, df.dtypes))
154152
for c in cols:
155153
t = coltype[c]
156154
if t == int: # noqa: E721
@@ -303,7 +301,7 @@ def pandas_fillna(df, by, hasna=None, suffix=None):
303301
cst = b"_"
304302
else:
305303
raise TypeError( # pragma: no cover
306-
"Unable to determine a constant for type='{0}' dtype='{1}'".format(
304+
"Unable to determine a constant for type='{0}' dtype='{1}'".format( # noqa: UP030
307305
val, df[c].dtype
308306
)
309307
)
@@ -422,22 +420,20 @@ def pandas_groupby_nan(
422420
if not nanback:
423421
dummy = DataFrame([{"a": "a"}])
424422
do = dummy.dtypes[0]
425-
typ = {
426-
c: t for c, t in zip(df.columns, df.dtypes) # pylint: disable=R1721
427-
} # pylint: disable=R1721
423+
typ = dict(zip(df.columns, df.dtypes))
428424
if typ[by[0]] != do:
429425
warnings.warn( # pragma: no cover
430-
f"[pandas_groupby_nan] NaN value: {rep}"
426+
f"[pandas_groupby_nan] NaN value: {rep}", stacklevel=0
431427
)
432428
return res
433429
for b in by:
434430
fnan = rep[b]
435431
if fnan in res.grouper.groups:
436432
res.grouper.groups[numpy.nan] = res.grouper.groups[fnan]
437433
del res.grouper.groups[fnan]
438-
new_val = list(
434+
new_val = [
439435
(numpy.nan if b == fnan else b) for b in res.grouper.result_index
440-
)
436+
]
441437
res.grouper.groupings[0]._group_index = Index(new_val)
442438
res.grouper.groupings[0].obj[b].replace(fnan, numpy.nan, inplace=True)
443439
if hasattr(res.grouper, "grouping"):
@@ -451,7 +447,7 @@ def pandas_groupby_nan(
451447
del res.grouper.groupings[0]._cache["result_index"]
452448
else:
453449
raise NotImplementedError(
454-
"Not implemented for type: {0}".format(
450+
"Not implemented for type: {0}".format( # noqa: UP030
455451
type(res.grouper.groupings[0].grouper)
456452
)
457453
)
@@ -466,11 +462,9 @@ def pandas_groupby_nan(
466462
):
467463
index = res.grouper.groupings[0]._cache["result_index"]
468464
if len(rep) == 1:
469-
key = list(rep.values())[0]
465+
key = list(rep.values())[0] # noqa: RUF015
470466
new_index = numpy.array(index)
471-
for i in range(
472-
0, len(new_index)
473-
): # pylint: disable=C0200
467+
for i in range(len(new_index)):
474468
if new_index[i] == key:
475469
new_index[i] = numpy.nan
476470
res.grouper.groupings[0]._cache["result_index"] = (
@@ -482,7 +476,7 @@ def pandas_groupby_nan(
482476
)
483477
else:
484478
raise NotImplementedError( # pragma: no cover
485-
"Not implemented for type: {0}".format(
479+
"Not implemented for type: {0}".format( # noqa: UP030
486480
type(res.grouper.groupings[0].grouper)
487481
)
488482
)
@@ -493,13 +487,11 @@ def pandas_groupby_nan(
493487
if not nanback:
494488
dummy = DataFrame([{"a": "a"}])
495489
do = dummy.dtypes[0]
496-
typ = {
497-
c: t for c, t in zip(df.columns, df.dtypes) # pylint: disable=R1721
498-
} # pylint: disable=R1721
490+
typ = dict(zip(df.columns, df.dtypes))
499491
for b in by:
500492
if typ[b] != do:
501493
warnings.warn( # pragma: no cover
502-
f"[pandas_groupby_nan] NaN values: {rep}"
494+
f"[pandas_groupby_nan] NaN values: {rep}", stacklevel=0
503495
)
504496
break
505497
return res

0 commit comments

Comments
 (0)