Skip to content

Commit 73e5e64

Browse files
authored
Better lint (#42)
* lint * doc
1 parent 9753f32 commit 73e5e64

16 files changed

+86
-85
lines changed

README.rst

-3
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,6 @@ pandas-streaming: streaming API over pandas
55
:target: https://ci.appveyor.com/project/sdpython/pandas-streaming
66
:alt: Build Status Windows
77

8-
.. image:: https://dl.circleci.com/status-badge/img/gh/sdpython/pandas-streaming/tree/main.svg?style=svg
9-
:target: https://dl.circleci.com/status-badge/redirect/gh/sdpython/pandas-streaming/tree/main
10-
118
.. image:: https://dev.azure.com/xavierdupre3/pandas_streaming/_apis/build/status/sdpython.pandas_streaming
129
:target: https://dev.azure.com/xavierdupre3/pandas_streaming/
1310

_doc/conf.py

-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
# -*- coding: utf-8 -*-
21
import sys
32
import os
43
from sphinx_runpython.github_link import make_linkcode_resolve

_doc/examples/first_step.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""
22
First steps with pandas_streaming
33
=================================
4-
4+
55
A few difference between :epkg:`pandas` and *pandas_streaming*.
66
77
pandas to pandas_streaming

_doc/index.rst

-3
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,6 @@ pandas-streaming: streaming API over pandas
99
:target: https://ci.appveyor.com/project/sdpython/pandas-streaming
1010
:alt: Build Status Windows
1111

12-
.. image:: https://dl.circleci.com/status-badge/img/gh/sdpython/pandas-streaming/tree/main.svg?style=svg
13-
:target: https://dl.circleci.com/status-badge/redirect/gh/sdpython/pandas-streaming/tree/main
14-
1512
.. image:: https://dev.azure.com/xavierdupre3/pandas_streaming/_apis/build/status/sdpython.pandas_streaming
1613
:target: https://dev.azure.com/xavierdupre3/pandas_streaming/
1714

_unittests/ut_df/test_connex_split.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ def test_split_connex2(self):
176176
for k, v in sorted(stats[0].items()):
177177
rows.append(f"{k}={v}")
178178
raise AssertionError(
179-
"Non empty intersection {0} & {1}\n{2}\n{3}\n{4}".format(
179+
"Non empty intersection {0} & {1}\n{2}\n{3}\n{4}".format( # noqa: UP030
180180
s1, s2, train, test, "\n".join(rows)
181181
)
182182
)
@@ -212,7 +212,7 @@ def test_split_connex_missing(self):
212212
for k, v in sorted(stats[0].items()):
213213
rows.append(f"{k}={v}")
214214
raise AssertionError(
215-
"Non empty intersection {0} & {1}\n{2}\n{3}\n{4}".format(
215+
"Non empty intersection {0} & {1}\n{2}\n{3}\n{4}".format( # noqa: UP030
216216
s1, s2, train, test, "\n".join(rows)
217217
)
218218
)

_unittests/ut_df/test_connex_split_big.py

-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
# -*- coding: utf-8 -*-
21
import os
32
import unittest
43
from collections import Counter

_unittests/ut_df/test_connex_split_cat.py

-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
# -*- coding: utf-8 -*-
2-
31
import unittest
42
from collections import Counter
53
import pandas

_unittests/ut_df/test_streaming_dataframe.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ def test_train_test_split_streaming_tiny(self):
223223

224224
def test_train_test_split_streaming_strat(self):
225225
sdf = dummy_streaming_dataframe(
226-
100, asfloat=True, tify=["t1" if i % 3 else "t0" for i in range(0, 100)]
226+
100, asfloat=True, tify=["t1" if i % 3 else "t0" for i in range(100)]
227227
)
228228
trsdf, tesdf = sdf.train_test_split(
229229
streaming=True, unique_rows=True, stratify="tify"
@@ -324,9 +324,9 @@ def test_concatv(self):
324324
self.assertEqualDataFrame(m1.to_dataframe(), df)
325325
m1 = sdf20.concat(df30, axis=0)
326326
self.assertEqualDataFrame(m1.to_dataframe(), df)
327-
m1 = sdf20.concat(map(lambda x: x, [df30]), axis=0)
327+
m1 = sdf20.concat(map(lambda x: x, [df30]), axis=0) # noqa: C417
328328
self.assertEqualDataFrame(m1.to_dataframe(), df)
329-
m1 = sdf20.concat(map(lambda x: x, [df30]), axis=0)
329+
m1 = sdf20.concat(map(lambda x: x, [df30]), axis=0) # noqa: C417
330330
self.assertEqualDataFrame(m1.to_dataframe(), df)
331331

332332
df20["cint"] = df20["cint"].astype(float)
@@ -490,7 +490,7 @@ def test_read_csv_names(self):
490490
def test_add_column(self):
491491
df = pandas.DataFrame(data=dict(X=[4.5, 6, 7], Y=["a", "b", "c"]))
492492
sdf = StreamingDataFrame.read_df(df)
493-
sdf2 = sdf.add_column("d", lambda row: int(1))
493+
sdf2 = sdf.add_column("d", lambda _row: 1)
494494
df2 = sdf2.to_dataframe()
495495
df["d"] = 1
496496
self.assertEqualDataFrame(df, df2)

pandas_streaming/data/dummy.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,12 @@ def dummy_streaming_dataframe(n, chunksize=10, asfloat=False, **cols):
1616
if asfloat:
1717
df = DataFrame(
1818
dict(
19-
cfloat=[_ + 0.1 for _ in range(0, n)],
20-
cstr=[f"s{i}" for i in range(0, n)],
19+
cfloat=[_ + 0.1 for _ in range(n)],
20+
cstr=[f"s{i}" for i in range(n)],
2121
)
2222
)
2323
else:
24-
df = DataFrame(
25-
dict(cint=list(range(0, n)), cstr=[f"s{i}" for i in range(0, n)])
26-
)
24+
df = DataFrame(dict(cint=list(range(n)), cstr=[f"s{i}" for i in range(n)]))
2725
for k, v in cols.items():
2826
df[k] = v
2927
return StreamingDataFrame.read_df(df, chunksize=chunksize)

pandas_streaming/df/connex_split.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@ class ImbalancedSplitException(Exception):
1212
Raised when an imbalanced split is detected.
1313
"""
1414

15-
pass
16-
1715

1816
def train_test_split_weights(
1917
df,
@@ -72,7 +70,7 @@ def train_test_split_weights(
7270
weights = list(df[weights])
7371
if len(weights) != df.shape[0]:
7472
raise ValueError(
75-
"Dimension mismatch between weights and dataframe "
73+
"Dimension mismatch between weights and dataframe " # noqa: UP030
7674
"{0} != {1}".format(df.shape[0], len(weights))
7775
)
7876

@@ -97,7 +95,7 @@ def train_test_split_weights(
9795
test_ids = []
9896
test_weights = 0
9997
train_weights = 0
100-
for i in range(0, df.shape[0]):
98+
for i in range(df.shape[0]):
10199
w = weights[i]
102100
if balance == 0:
103101
h = randint(0, 1)
@@ -116,7 +114,7 @@ def train_test_split_weights(
116114
r = abs(train_weights - test_weights) / (1.0 * (train_weights + test_weights))
117115
if r >= fail_imbalanced:
118116
raise ImbalancedSplitException( # pragma: no cover
119-
"Split is imbalanced: train_weights={0} test_weights={1} r={2}."
117+
"Split is imbalanced: train_weights={0} test_weights={1} r={2}." # noqa: UP030
120118
"".format(train_weights, test_weights, r)
121119
)
122120

pandas_streaming/df/dataframe.py

+23-22
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,6 @@ class StreamingDataFrameSchemaError(Exception):
2323
Reveals an issue with inconsistant schemas.
2424
"""
2525

26-
pass
27-
2826

2927
class StreamingDataFrame:
3028
"""
@@ -273,9 +271,11 @@ def localf(a0=args[0]):
273271
**kwargs_create,
274272
)
275273

276-
def fct1(st=st, args=args, chunksize=chunksize, kw=kwargs.copy()):
274+
def fct1(
275+
st=st, args=args, chunksize=chunksize, kw=kwargs.copy() # noqa: B008
276+
):
277277
st.seek(0)
278-
for r in pandas.read_json(
278+
for r in pandas.read_json( # noqa: UP028
279279
st, *args, chunksize=chunksize, nrows=chunksize, lines=True, **kw
280280
):
281281
yield r
@@ -293,8 +293,8 @@ def fct1(st=st, args=args, chunksize=chunksize, kw=kwargs.copy()):
293293
**kwargs_create,
294294
)
295295

296-
def fct2(args=args, chunksize=chunksize, kw=kwargs.copy()):
297-
for r in pandas.read_json(
296+
def fct2(args=args, chunksize=chunksize, kw=kwargs.copy()): # noqa: B008
297+
for r in pandas.read_json( # noqa: UP028
298298
*args, chunksize=chunksize, nrows=chunksize, **kw
299299
):
300300
yield r
@@ -318,10 +318,10 @@ def fct2(args=args, chunksize=chunksize, kw=kwargs.copy()):
318318
**kwargs_create,
319319
)
320320

321-
def fct3(st=st, args=args, chunksize=chunksize, kw=kwargs.copy()):
321+
def fct3(st=st, args=args, chunksize=chunksize, kw=kwargs.copy()): # noqa: B008
322322
if hasattr(st, "seek"):
323323
st.seek(0)
324-
for r in pandas.read_json(
324+
for r in pandas.read_json( # noqa: UP028
325325
st, *args, chunksize=chunksize, nrows=chunksize, lines=True, **kw
326326
):
327327
yield r
@@ -438,7 +438,7 @@ def __iter__(self):
438438
elif self.check_schema:
439439
if list(it.columns) != sch[0]: # pylint: disable=E1136
440440
raise StreamingDataFrameSchemaError( # pragma: no cover
441-
"Column names are different after row {0}\nFirst chunk: {1}"
441+
"Column names are different after row {0}\nFirst chunk: {1}" # noqa: UP030
442442
"\nCurrent chunk: {2}".format(rows, sch[0], list(it.columns))
443443
) # pylint: disable=E1136
444444
if list(it.dtypes) != sch[1]: # pylint: disable=E1136
@@ -454,7 +454,7 @@ def __iter__(self):
454454
errdf = errdf[errdf["diff"]]
455455
errdf.to_csv(tdf, sep=",", index=False)
456456
raise StreamingDataFrameSchemaError(
457-
"Column types are different after row {0}. You may use option "
457+
"Column types are different after row {0}. You may use option " # noqa: UP030
458458
'dtype={{"column_name": str}} to force the type on this column.'
459459
"\n---\n{1}".format(rows, tdf.getvalue())
460460
)
@@ -502,9 +502,7 @@ def to_csv(self, path_or_buf=None, **kwargs) -> "StreamingDataFrame":
502502
st = StringIO()
503503
close = False
504504
elif isinstance(path_or_buf, str):
505-
st = open( # pylint: disable=R1732
506-
path_or_buf, "w", encoding=kwargs.get("encoding")
507-
)
505+
st = open(path_or_buf, "w", encoding=kwargs.get("encoding")) # noqa: SIM115
508506
close = True
509507
else:
510508
st = path_or_buf
@@ -537,7 +535,7 @@ def iterrows(self):
537535
See :epkg:`pandas:DataFrame:iterrows`.
538536
"""
539537
for df in self:
540-
for it in df.iterrows():
538+
for it in df.iterrows(): # noqa: UP028
541539
yield it
542540

543541
def head(self, n=5) -> pandas.DataFrame:
@@ -579,7 +577,8 @@ def where(self, *args, **kwargs) -> "StreamingDataFrame":
579577
"""
580578
kwargs["inplace"] = False
581579
return StreamingDataFrame(
582-
lambda: map(lambda df: df.where(*args, **kwargs), self), **self.get_kwargs()
580+
lambda: map(lambda df: df.where(*args, **kwargs), self), # noqa: C417
581+
**self.get_kwargs(),
583582
)
584583

585584
def sample(self, reservoir=False, cache=False, **kwargs) -> "StreamingDataFrame":
@@ -608,7 +607,7 @@ def sample(self, reservoir=False, cache=False, **kwargs) -> "StreamingDataFrame"
608607
df = sdf.to_df()
609608
return StreamingDataFrame.read_df(df, chunksize=df.shape[0])
610609
return StreamingDataFrame(
611-
lambda: map(lambda df: df.sample(**kwargs), self),
610+
lambda: map(lambda df: df.sample(**kwargs), self), # noqa: C417
612611
**self.get_kwargs(),
613612
stable=False,
614613
)
@@ -684,7 +683,7 @@ def drop(
684683
if inplace:
685684
raise NotImplementedError(f"drop is not implemented for inplace={inplace}.")
686685
return StreamingDataFrame(
687-
lambda: map(
686+
lambda: map( # noqa: C417
688687
lambda df: df.drop(
689688
labels,
690689
axis=axis,
@@ -706,7 +705,8 @@ def apply(self, *args, **kwargs) -> "StreamingDataFrame":
706705
<pandas_streaming.df.dataframe.StreamingDataFrame>`.
707706
"""
708707
return StreamingDataFrame(
709-
lambda: map(lambda df: df.apply(*args, **kwargs), self), **self.get_kwargs()
708+
lambda: map(lambda df: df.apply(*args, **kwargs), self), # noqa: C417
709+
**self.get_kwargs(),
710710
)
711711

712712
def applymap(self, *args, **kwargs) -> "StreamingDataFrame":
@@ -716,7 +716,7 @@ def applymap(self, *args, **kwargs) -> "StreamingDataFrame":
716716
<pandas_streaming.df.dataframe.StreamingDataFrame>`.
717717
"""
718718
return StreamingDataFrame(
719-
lambda: map(lambda df: df.applymap(*args, **kwargs), self),
719+
lambda: map(lambda df: df.applymap(*args, **kwargs), self), # noqa: C417
720720
**self.get_kwargs(),
721721
)
722722

@@ -773,7 +773,7 @@ def _concath(self, others):
773773
others = [others]
774774

775775
def iterateh(self, others):
776-
cols = tuple([self] + others)
776+
cols = (self, *others)
777777
for dfs in zip(*cols):
778778
nrows = [_.shape[0] for _ in dfs]
779779
if min(nrows) != max(nrows):
@@ -1382,7 +1382,7 @@ def __init__(self, iter_creation, check_schema=True, stable=True):
13821382
)
13831383
if len(self.columns) != 1:
13841384
raise RuntimeError( # pragma: no cover
1385-
f"A series can contain only one column not " f"{len(self.columns)!r}."
1385+
f"A series can contain only one column not {len(self.columns)!r}."
13861386
)
13871387

13881388
def apply(self, *args, **kwargs) -> "StreamingDataFrame":
@@ -1391,7 +1391,8 @@ def apply(self, *args, **kwargs) -> "StreamingDataFrame":
13911391
This function returns a @see cl StreamingSeries.
13921392
"""
13931393
return StreamingSeries(
1394-
lambda: map(lambda df: df.apply(*args, **kwargs), self), **self.get_kwargs()
1394+
lambda: map(lambda df: df.apply(*args, **kwargs), self), # noqa: C417
1395+
**self.get_kwargs(),
13951396
)
13961397

13971398
def __add__(self, value):

0 commit comments

Comments
 (0)