Better lint (#42)

xadupre · web-flow · commit 73e5e6404a52 · 2024-09-07T16:49:51.000+02:00
* lint

* doc
diff --git a/README.rst b/README.rst
@@ -5,9 +5,6 @@ pandas-streaming: streaming API over pandas
     :target: https://ci.appveyor.com/project/sdpython/pandas-streaming
     :alt: Build Status Windows
 
-.. image:: https://dl.circleci.com/status-badge/img/gh/sdpython/pandas-streaming/tree/main.svg?style=svg
-    :target: https://dl.circleci.com/status-badge/redirect/gh/sdpython/pandas-streaming/tree/main
-
 .. image:: https://dev.azure.com/xavierdupre3/pandas_streaming/_apis/build/status/sdpython.pandas_streaming
     :target: https://dev.azure.com/xavierdupre3/pandas_streaming/
 
diff --git a/_doc/conf.py b/_doc/conf.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import sys
 import os
 from sphinx_runpython.github_link import make_linkcode_resolve
diff --git a/_doc/examples/first_step.py b/_doc/examples/first_step.py
@@ -1,7 +1,7 @@
 """
 First steps with pandas_streaming
 =================================
- 
+
 A few difference between :epkg:`pandas` and *pandas_streaming*.
 
 pandas to pandas_streaming
diff --git a/_doc/index.rst b/_doc/index.rst
@@ -9,9 +9,6 @@ pandas-streaming: streaming API over pandas
     :target: https://ci.appveyor.com/project/sdpython/pandas-streaming
     :alt: Build Status Windows
 
-.. image:: https://dl.circleci.com/status-badge/img/gh/sdpython/pandas-streaming/tree/main.svg?style=svg
-    :target: https://dl.circleci.com/status-badge/redirect/gh/sdpython/pandas-streaming/tree/main
-
 .. image:: https://dev.azure.com/xavierdupre3/pandas_streaming/_apis/build/status/sdpython.pandas_streaming
     :target: https://dev.azure.com/xavierdupre3/pandas_streaming/
 
diff --git a/_unittests/ut_df/test_connex_split.py b/_unittests/ut_df/test_connex_split.py
@@ -176,7 +176,7 @@ def test_split_connex2(self):
                 for k, v in sorted(stats[0].items()):
                     rows.append(f"{k}={v}")
                 raise AssertionError(
-                    "Non empty intersection {0} & {1}\n{2}\n{3}\n{4}".format(
+                    "Non empty intersection {0} & {1}\n{2}\n{3}\n{4}".format(  # noqa: UP030
                         s1, s2, train, test, "\n".join(rows)
                     )
                 )
@@ -212,7 +212,7 @@ def test_split_connex_missing(self):
                 for k, v in sorted(stats[0].items()):
                     rows.append(f"{k}={v}")
                 raise AssertionError(
-                    "Non empty intersection {0} & {1}\n{2}\n{3}\n{4}".format(
+                    "Non empty intersection {0} & {1}\n{2}\n{3}\n{4}".format(  # noqa: UP030
                         s1, s2, train, test, "\n".join(rows)
                     )
                 )
diff --git a/_unittests/ut_df/test_connex_split_big.py b/_unittests/ut_df/test_connex_split_big.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import os
 import unittest
 from collections import Counter
diff --git a/_unittests/ut_df/test_connex_split_cat.py b/_unittests/ut_df/test_connex_split_cat.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 import unittest
 from collections import Counter
 import pandas
diff --git a/_unittests/ut_df/test_streaming_dataframe.py b/_unittests/ut_df/test_streaming_dataframe.py
@@ -223,7 +223,7 @@ def test_train_test_split_streaming_tiny(self):
 
     def test_train_test_split_streaming_strat(self):
         sdf = dummy_streaming_dataframe(
-            100, asfloat=True, tify=["t1" if i % 3 else "t0" for i in range(0, 100)]
+            100, asfloat=True, tify=["t1" if i % 3 else "t0" for i in range(100)]
         )
         trsdf, tesdf = sdf.train_test_split(
             streaming=True, unique_rows=True, stratify="tify"
@@ -324,9 +324,9 @@ def test_concatv(self):
         self.assertEqualDataFrame(m1.to_dataframe(), df)
         m1 = sdf20.concat(df30, axis=0)
         self.assertEqualDataFrame(m1.to_dataframe(), df)
-        m1 = sdf20.concat(map(lambda x: x, [df30]), axis=0)
+        m1 = sdf20.concat(map(lambda x: x, [df30]), axis=0)  # noqa: C417
         self.assertEqualDataFrame(m1.to_dataframe(), df)
-        m1 = sdf20.concat(map(lambda x: x, [df30]), axis=0)
+        m1 = sdf20.concat(map(lambda x: x, [df30]), axis=0)  # noqa: C417
         self.assertEqualDataFrame(m1.to_dataframe(), df)
 
         df20["cint"] = df20["cint"].astype(float)
@@ -490,7 +490,7 @@ def test_read_csv_names(self):
     def test_add_column(self):
         df = pandas.DataFrame(data=dict(X=[4.5, 6, 7], Y=["a", "b", "c"]))
         sdf = StreamingDataFrame.read_df(df)
-        sdf2 = sdf.add_column("d", lambda row: int(1))
+        sdf2 = sdf.add_column("d", lambda _row: 1)
         df2 = sdf2.to_dataframe()
         df["d"] = 1
         self.assertEqualDataFrame(df, df2)
diff --git a/pandas_streaming/data/dummy.py b/pandas_streaming/data/dummy.py
@@ -16,14 +16,12 @@ def dummy_streaming_dataframe(n, chunksize=10, asfloat=False, **cols):
     if asfloat:
         df = DataFrame(
             dict(
-                cfloat=[_ + 0.1 for _ in range(0, n)],
-                cstr=[f"s{i}" for i in range(0, n)],
+                cfloat=[_ + 0.1 for _ in range(n)],
+                cstr=[f"s{i}" for i in range(n)],
             )
         )
     else:
-        df = DataFrame(
-            dict(cint=list(range(0, n)), cstr=[f"s{i}" for i in range(0, n)])
-        )
+        df = DataFrame(dict(cint=list(range(n)), cstr=[f"s{i}" for i in range(n)]))
     for k, v in cols.items():
         df[k] = v
     return StreamingDataFrame.read_df(df, chunksize=chunksize)
diff --git a/pandas_streaming/df/connex_split.py b/pandas_streaming/df/connex_split.py
@@ -12,8 +12,6 @@ class ImbalancedSplitException(Exception):
     Raised when an imbalanced split is detected.
     """
 
-    pass
-
 
 def train_test_split_weights(
     df,
@@ -72,7 +70,7 @@ def train_test_split_weights(
         weights = list(df[weights])
     if len(weights) != df.shape[0]:
         raise ValueError(
-            "Dimension mismatch between weights and dataframe "
+            "Dimension mismatch between weights and dataframe "  # noqa: UP030
             "{0} != {1}".format(df.shape[0], len(weights))
         )
 
@@ -97,7 +95,7 @@ def train_test_split_weights(
     test_ids = []
     test_weights = 0
     train_weights = 0
-    for i in range(0, df.shape[0]):
+    for i in range(df.shape[0]):
         w = weights[i]
         if balance == 0:
             h = randint(0, 1)
@@ -116,7 +114,7 @@ def train_test_split_weights(
     r = abs(train_weights - test_weights) / (1.0 * (train_weights + test_weights))
     if r >= fail_imbalanced:
         raise ImbalancedSplitException(  # pragma: no cover
-            "Split is imbalanced: train_weights={0} test_weights={1} r={2}."
+            "Split is imbalanced: train_weights={0} test_weights={1} r={2}."  # noqa: UP030
             "".format(train_weights, test_weights, r)
         )
 
diff --git a/pandas_streaming/df/dataframe.py b/pandas_streaming/df/dataframe.py
@@ -23,8 +23,6 @@ class StreamingDataFrameSchemaError(Exception):
     Reveals an issue with inconsistant schemas.
     """
 
-    pass
-
 
 class StreamingDataFrame:
     """
@@ -273,9 +271,11 @@ def localf(a0=args[0]):
                     **kwargs_create,
                 )
 
-            def fct1(st=st, args=args, chunksize=chunksize, kw=kwargs.copy()):
+            def fct1(
+                st=st, args=args, chunksize=chunksize, kw=kwargs.copy()  # noqa: B008
+            ):
                 st.seek(0)
-                for r in pandas.read_json(
+                for r in pandas.read_json(  # noqa: UP028
                     st, *args, chunksize=chunksize, nrows=chunksize, lines=True, **kw
                 ):
                     yield r
@@ -293,8 +293,8 @@ def fct1(st=st, args=args, chunksize=chunksize, kw=kwargs.copy()):
                     **kwargs_create,
                 )
 
-            def fct2(args=args, chunksize=chunksize, kw=kwargs.copy()):
-                for r in pandas.read_json(
+            def fct2(args=args, chunksize=chunksize, kw=kwargs.copy()):  # noqa: B008
+                for r in pandas.read_json(  # noqa: UP028
                     *args, chunksize=chunksize, nrows=chunksize, **kw
                 ):
                     yield r
@@ -318,10 +318,10 @@ def fct2(args=args, chunksize=chunksize, kw=kwargs.copy()):
                 **kwargs_create,
             )
 
-        def fct3(st=st, args=args, chunksize=chunksize, kw=kwargs.copy()):
+        def fct3(st=st, args=args, chunksize=chunksize, kw=kwargs.copy()):  # noqa: B008
             if hasattr(st, "seek"):
                 st.seek(0)
-            for r in pandas.read_json(
+            for r in pandas.read_json(  # noqa: UP028
                 st, *args, chunksize=chunksize, nrows=chunksize, lines=True, **kw
             ):
                 yield r
@@ -438,7 +438,7 @@ def __iter__(self):
             elif self.check_schema:
                 if list(it.columns) != sch[0]:  # pylint: disable=E1136
                     raise StreamingDataFrameSchemaError(  # pragma: no cover
-                        "Column names are different after row {0}\nFirst   chunk: {1}"
+                        "Column names are different after row {0}\nFirst   chunk: {1}"  # noqa: UP030
                         "\nCurrent chunk: {2}".format(rows, sch[0], list(it.columns))
                     )  # pylint: disable=E1136
                 if list(it.dtypes) != sch[1]:  # pylint: disable=E1136
@@ -454,7 +454,7 @@ def __iter__(self):
                     errdf = errdf[errdf["diff"]]
                     errdf.to_csv(tdf, sep=",", index=False)
                     raise StreamingDataFrameSchemaError(
-                        "Column types are different after row {0}. You may use option "
+                        "Column types are different after row {0}. You may use option "  # noqa: UP030
                         'dtype={{"column_name": str}} to force the type on this column.'
                         "\n---\n{1}".format(rows, tdf.getvalue())
                     )
@@ -502,9 +502,7 @@ def to_csv(self, path_or_buf=None, **kwargs) -> "StreamingDataFrame":
             st = StringIO()
             close = False
         elif isinstance(path_or_buf, str):
-            st = open(  # pylint: disable=R1732
-                path_or_buf, "w", encoding=kwargs.get("encoding")
-            )
+            st = open(path_or_buf, "w", encoding=kwargs.get("encoding"))  # noqa: SIM115
             close = True
         else:
             st = path_or_buf
@@ -537,7 +535,7 @@ def iterrows(self):
         See :epkg:`pandas:DataFrame:iterrows`.
         """
         for df in self:
-            for it in df.iterrows():
+            for it in df.iterrows():  # noqa: UP028
                 yield it
 
     def head(self, n=5) -> pandas.DataFrame:
@@ -579,7 +577,8 @@ def where(self, *args, **kwargs) -> "StreamingDataFrame":
         """
         kwargs["inplace"] = False
         return StreamingDataFrame(
-            lambda: map(lambda df: df.where(*args, **kwargs), self), **self.get_kwargs()
+            lambda: map(lambda df: df.where(*args, **kwargs), self),  # noqa: C417
+            **self.get_kwargs(),
         )
 
     def sample(self, reservoir=False, cache=False, **kwargs) -> "StreamingDataFrame":
@@ -608,7 +607,7 @@ def sample(self, reservoir=False, cache=False, **kwargs) -> "StreamingDataFrame"
             df = sdf.to_df()
             return StreamingDataFrame.read_df(df, chunksize=df.shape[0])
         return StreamingDataFrame(
-            lambda: map(lambda df: df.sample(**kwargs), self),
+            lambda: map(lambda df: df.sample(**kwargs), self),  # noqa: C417
             **self.get_kwargs(),
             stable=False,
         )
@@ -684,7 +683,7 @@ def drop(
         if inplace:
             raise NotImplementedError(f"drop is not implemented for inplace={inplace}.")
         return StreamingDataFrame(
-            lambda: map(
+            lambda: map(  # noqa: C417
                 lambda df: df.drop(
                     labels,
                     axis=axis,
@@ -706,7 +705,8 @@ def apply(self, *args, **kwargs) -> "StreamingDataFrame":
         <pandas_streaming.df.dataframe.StreamingDataFrame>`.
         """
         return StreamingDataFrame(
-            lambda: map(lambda df: df.apply(*args, **kwargs), self), **self.get_kwargs()
+            lambda: map(lambda df: df.apply(*args, **kwargs), self),  # noqa: C417
+            **self.get_kwargs(),
         )
 
     def applymap(self, *args, **kwargs) -> "StreamingDataFrame":
@@ -716,7 +716,7 @@ def applymap(self, *args, **kwargs) -> "StreamingDataFrame":
         <pandas_streaming.df.dataframe.StreamingDataFrame>`.
         """
         return StreamingDataFrame(
-            lambda: map(lambda df: df.applymap(*args, **kwargs), self),
+            lambda: map(lambda df: df.applymap(*args, **kwargs), self),  # noqa: C417
             **self.get_kwargs(),
         )
 
@@ -773,7 +773,7 @@ def _concath(self, others):
             others = [others]
 
         def iterateh(self, others):
-            cols = tuple([self] + others)
+            cols = (self, *others)
             for dfs in zip(*cols):
                 nrows = [_.shape[0] for _ in dfs]
                 if min(nrows) != max(nrows):
@@ -1382,7 +1382,7 @@ def __init__(self, iter_creation, check_schema=True, stable=True):
         )
         if len(self.columns) != 1:
             raise RuntimeError(  # pragma: no cover
-                f"A series can contain only one column not " f"{len(self.columns)!r}."
+                f"A series can contain only one column not {len(self.columns)!r}."
             )
 
     def apply(self, *args, **kwargs) -> "StreamingDataFrame":
@@ -1391,7 +1391,8 @@ def apply(self, *args, **kwargs) -> "StreamingDataFrame":
         This function returns a @see cl StreamingSeries.
         """
         return StreamingSeries(
-            lambda: map(lambda df: df.apply(*args, **kwargs), self), **self.get_kwargs()
+            lambda: map(lambda df: df.apply(*args, **kwargs), self),  # noqa: C417
+            **self.get_kwargs(),
         )
 
     def __add__(self, value):
diff --git a/pandas_streaming/df/dataframe_helpers.py b/pandas_streaming/df/dataframe_helpers.py
diff --git a/pandas_streaming/df/dataframe_split.py b/pandas_streaming/df/dataframe_split.py
diff --git a/pandas_streaming/ext_test_case.py b/pandas_streaming/ext_test_case.py
diff --git a/pyproject.toml b/pyproject.toml
diff --git a/setup.py b/setup.py

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,3 @@`
`1`		`-# -- coding: utf-8 --`
`2`	`1`	`import sys`
`3`	`2`	`import os`
`4`	`3`	`from sphinx_runpython.github_link import make_linkcode_resolve`
Original file line number	Diff line number	Diff line change
`@@ -176,7 +176,7 @@ def test_split_connex2(self):`
`176`	`176`	`for k, v in sorted(stats[0].items()):`
`177`	`177`	`rows.append(f"{k}={v}")`
`178`	`178`	`raise AssertionError(`
`179`		`- "Non empty intersection {0} & {1}\n{2}\n{3}\n{4}".format(`
	`179`	`+ "Non empty intersection {0} & {1}\n{2}\n{3}\n{4}".format( # noqa: UP030`
`180`	`180`	`s1, s2, train, test, "\n".join(rows)`
`181`	`181`	`)`
`182`	`182`	`)`
`@@ -212,7 +212,7 @@ def test_split_connex_missing(self):`
`212`	`212`	`for k, v in sorted(stats[0].items()):`
`213`	`213`	`rows.append(f"{k}={v}")`
`214`	`214`	`raise AssertionError(`
`215`		`- "Non empty intersection {0} & {1}\n{2}\n{3}\n{4}".format(`
	`215`	`+ "Non empty intersection {0} & {1}\n{2}\n{3}\n{4}".format( # noqa: UP030`
`216`	`216`	`s1, s2, train, test, "\n".join(rows)`
`217`	`217`	`)`
`218`	`218`	`)`
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,3 @@`
`1`		`-# -- coding: utf-8 --`
`2`		`-`
`3`	`1`	`import unittest`
`4`	`2`	`from collections import Counter`
`5`	`3`	`import pandas`