Skip to content

Better lint #42

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,6 @@ pandas-streaming: streaming API over pandas
:target: https://ci.appveyor.com/project/sdpython/pandas-streaming
:alt: Build Status Windows

.. image:: https://dl.circleci.com/status-badge/img/gh/sdpython/pandas-streaming/tree/main.svg?style=svg
:target: https://dl.circleci.com/status-badge/redirect/gh/sdpython/pandas-streaming/tree/main

.. image:: https://dev.azure.com/xavierdupre3/pandas_streaming/_apis/build/status/sdpython.pandas_streaming
:target: https://dev.azure.com/xavierdupre3/pandas_streaming/

Expand Down
1 change: 0 additions & 1 deletion _doc/conf.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# -*- coding: utf-8 -*-
import sys
import os
from sphinx_runpython.github_link import make_linkcode_resolve
Expand Down
2 changes: 1 addition & 1 deletion _doc/examples/first_step.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
First steps with pandas_streaming
=================================

A few difference between :epkg:`pandas` and *pandas_streaming*.

pandas to pandas_streaming
Expand Down
3 changes: 0 additions & 3 deletions _doc/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,6 @@ pandas-streaming: streaming API over pandas
:target: https://ci.appveyor.com/project/sdpython/pandas-streaming
:alt: Build Status Windows

.. image:: https://dl.circleci.com/status-badge/img/gh/sdpython/pandas-streaming/tree/main.svg?style=svg
:target: https://dl.circleci.com/status-badge/redirect/gh/sdpython/pandas-streaming/tree/main

.. image:: https://dev.azure.com/xavierdupre3/pandas_streaming/_apis/build/status/sdpython.pandas_streaming
:target: https://dev.azure.com/xavierdupre3/pandas_streaming/

Expand Down
4 changes: 2 additions & 2 deletions _unittests/ut_df/test_connex_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ def test_split_connex2(self):
for k, v in sorted(stats[0].items()):
rows.append(f"{k}={v}")
raise AssertionError(
"Non empty intersection {0} & {1}\n{2}\n{3}\n{4}".format(
"Non empty intersection {0} & {1}\n{2}\n{3}\n{4}".format( # noqa: UP030
s1, s2, train, test, "\n".join(rows)
)
)
Expand Down Expand Up @@ -212,7 +212,7 @@ def test_split_connex_missing(self):
for k, v in sorted(stats[0].items()):
rows.append(f"{k}={v}")
raise AssertionError(
"Non empty intersection {0} & {1}\n{2}\n{3}\n{4}".format(
"Non empty intersection {0} & {1}\n{2}\n{3}\n{4}".format( # noqa: UP030
s1, s2, train, test, "\n".join(rows)
)
)
Expand Down
1 change: 0 additions & 1 deletion _unittests/ut_df/test_connex_split_big.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# -*- coding: utf-8 -*-
import os
import unittest
from collections import Counter
Expand Down
2 changes: 0 additions & 2 deletions _unittests/ut_df/test_connex_split_cat.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# -*- coding: utf-8 -*-

import unittest
from collections import Counter
import pandas
Expand Down
8 changes: 4 additions & 4 deletions _unittests/ut_df/test_streaming_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ def test_train_test_split_streaming_tiny(self):

def test_train_test_split_streaming_strat(self):
sdf = dummy_streaming_dataframe(
100, asfloat=True, tify=["t1" if i % 3 else "t0" for i in range(0, 100)]
100, asfloat=True, tify=["t1" if i % 3 else "t0" for i in range(100)]
)
trsdf, tesdf = sdf.train_test_split(
streaming=True, unique_rows=True, stratify="tify"
Expand Down Expand Up @@ -324,9 +324,9 @@ def test_concatv(self):
self.assertEqualDataFrame(m1.to_dataframe(), df)
m1 = sdf20.concat(df30, axis=0)
self.assertEqualDataFrame(m1.to_dataframe(), df)
m1 = sdf20.concat(map(lambda x: x, [df30]), axis=0)
m1 = sdf20.concat(map(lambda x: x, [df30]), axis=0) # noqa: C417
self.assertEqualDataFrame(m1.to_dataframe(), df)
m1 = sdf20.concat(map(lambda x: x, [df30]), axis=0)
m1 = sdf20.concat(map(lambda x: x, [df30]), axis=0) # noqa: C417
self.assertEqualDataFrame(m1.to_dataframe(), df)

df20["cint"] = df20["cint"].astype(float)
Expand Down Expand Up @@ -490,7 +490,7 @@ def test_read_csv_names(self):
def test_add_column(self):
df = pandas.DataFrame(data=dict(X=[4.5, 6, 7], Y=["a", "b", "c"]))
sdf = StreamingDataFrame.read_df(df)
sdf2 = sdf.add_column("d", lambda row: int(1))
sdf2 = sdf.add_column("d", lambda _row: 1)
df2 = sdf2.to_dataframe()
df["d"] = 1
self.assertEqualDataFrame(df, df2)
Expand Down
8 changes: 3 additions & 5 deletions pandas_streaming/data/dummy.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,12 @@ def dummy_streaming_dataframe(n, chunksize=10, asfloat=False, **cols):
if asfloat:
df = DataFrame(
dict(
cfloat=[_ + 0.1 for _ in range(0, n)],
cstr=[f"s{i}" for i in range(0, n)],
cfloat=[_ + 0.1 for _ in range(n)],
cstr=[f"s{i}" for i in range(n)],
)
)
else:
df = DataFrame(
dict(cint=list(range(0, n)), cstr=[f"s{i}" for i in range(0, n)])
)
df = DataFrame(dict(cint=list(range(n)), cstr=[f"s{i}" for i in range(n)]))
for k, v in cols.items():
df[k] = v
return StreamingDataFrame.read_df(df, chunksize=chunksize)
8 changes: 3 additions & 5 deletions pandas_streaming/df/connex_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@ class ImbalancedSplitException(Exception):
Raised when an imbalanced split is detected.
"""

pass


def train_test_split_weights(
df,
Expand Down Expand Up @@ -72,7 +70,7 @@ def train_test_split_weights(
weights = list(df[weights])
if len(weights) != df.shape[0]:
raise ValueError(
"Dimension mismatch between weights and dataframe "
"Dimension mismatch between weights and dataframe " # noqa: UP030
"{0} != {1}".format(df.shape[0], len(weights))
)

Expand All @@ -97,7 +95,7 @@ def train_test_split_weights(
test_ids = []
test_weights = 0
train_weights = 0
for i in range(0, df.shape[0]):
for i in range(df.shape[0]):
w = weights[i]
if balance == 0:
h = randint(0, 1)
Expand All @@ -116,7 +114,7 @@ def train_test_split_weights(
r = abs(train_weights - test_weights) / (1.0 * (train_weights + test_weights))
if r >= fail_imbalanced:
raise ImbalancedSplitException( # pragma: no cover
"Split is imbalanced: train_weights={0} test_weights={1} r={2}."
"Split is imbalanced: train_weights={0} test_weights={1} r={2}." # noqa: UP030
"".format(train_weights, test_weights, r)
)

Expand Down
45 changes: 23 additions & 22 deletions pandas_streaming/df/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@ class StreamingDataFrameSchemaError(Exception):
Reveals an issue with inconsistant schemas.
"""

pass


class StreamingDataFrame:
"""
Expand Down Expand Up @@ -273,9 +271,11 @@ def localf(a0=args[0]):
**kwargs_create,
)

def fct1(st=st, args=args, chunksize=chunksize, kw=kwargs.copy()):
def fct1(
st=st, args=args, chunksize=chunksize, kw=kwargs.copy() # noqa: B008
):
st.seek(0)
for r in pandas.read_json(
for r in pandas.read_json( # noqa: UP028
st, *args, chunksize=chunksize, nrows=chunksize, lines=True, **kw
):
yield r
Expand All @@ -293,8 +293,8 @@ def fct1(st=st, args=args, chunksize=chunksize, kw=kwargs.copy()):
**kwargs_create,
)

def fct2(args=args, chunksize=chunksize, kw=kwargs.copy()):
for r in pandas.read_json(
def fct2(args=args, chunksize=chunksize, kw=kwargs.copy()): # noqa: B008
for r in pandas.read_json( # noqa: UP028
*args, chunksize=chunksize, nrows=chunksize, **kw
):
yield r
Expand All @@ -318,10 +318,10 @@ def fct2(args=args, chunksize=chunksize, kw=kwargs.copy()):
**kwargs_create,
)

def fct3(st=st, args=args, chunksize=chunksize, kw=kwargs.copy()):
def fct3(st=st, args=args, chunksize=chunksize, kw=kwargs.copy()): # noqa: B008
if hasattr(st, "seek"):
st.seek(0)
for r in pandas.read_json(
for r in pandas.read_json( # noqa: UP028
st, *args, chunksize=chunksize, nrows=chunksize, lines=True, **kw
):
yield r
Expand Down Expand Up @@ -438,7 +438,7 @@ def __iter__(self):
elif self.check_schema:
if list(it.columns) != sch[0]: # pylint: disable=E1136
raise StreamingDataFrameSchemaError( # pragma: no cover
"Column names are different after row {0}\nFirst chunk: {1}"
"Column names are different after row {0}\nFirst chunk: {1}" # noqa: UP030
"\nCurrent chunk: {2}".format(rows, sch[0], list(it.columns))
) # pylint: disable=E1136
if list(it.dtypes) != sch[1]: # pylint: disable=E1136
Expand All @@ -454,7 +454,7 @@ def __iter__(self):
errdf = errdf[errdf["diff"]]
errdf.to_csv(tdf, sep=",", index=False)
raise StreamingDataFrameSchemaError(
"Column types are different after row {0}. You may use option "
"Column types are different after row {0}. You may use option " # noqa: UP030
'dtype={{"column_name": str}} to force the type on this column.'
"\n---\n{1}".format(rows, tdf.getvalue())
)
Expand Down Expand Up @@ -502,9 +502,7 @@ def to_csv(self, path_or_buf=None, **kwargs) -> "StreamingDataFrame":
st = StringIO()
close = False
elif isinstance(path_or_buf, str):
st = open( # pylint: disable=R1732
path_or_buf, "w", encoding=kwargs.get("encoding")
)
st = open(path_or_buf, "w", encoding=kwargs.get("encoding")) # noqa: SIM115
close = True
else:
st = path_or_buf
Expand Down Expand Up @@ -537,7 +535,7 @@ def iterrows(self):
See :epkg:`pandas:DataFrame:iterrows`.
"""
for df in self:
for it in df.iterrows():
for it in df.iterrows(): # noqa: UP028
yield it

def head(self, n=5) -> pandas.DataFrame:
Expand Down Expand Up @@ -579,7 +577,8 @@ def where(self, *args, **kwargs) -> "StreamingDataFrame":
"""
kwargs["inplace"] = False
return StreamingDataFrame(
lambda: map(lambda df: df.where(*args, **kwargs), self), **self.get_kwargs()
lambda: map(lambda df: df.where(*args, **kwargs), self), # noqa: C417
**self.get_kwargs(),
)

def sample(self, reservoir=False, cache=False, **kwargs) -> "StreamingDataFrame":
Expand Down Expand Up @@ -608,7 +607,7 @@ def sample(self, reservoir=False, cache=False, **kwargs) -> "StreamingDataFrame"
df = sdf.to_df()
return StreamingDataFrame.read_df(df, chunksize=df.shape[0])
return StreamingDataFrame(
lambda: map(lambda df: df.sample(**kwargs), self),
lambda: map(lambda df: df.sample(**kwargs), self), # noqa: C417
**self.get_kwargs(),
stable=False,
)
Expand Down Expand Up @@ -684,7 +683,7 @@ def drop(
if inplace:
raise NotImplementedError(f"drop is not implemented for inplace={inplace}.")
return StreamingDataFrame(
lambda: map(
lambda: map( # noqa: C417
lambda df: df.drop(
labels,
axis=axis,
Expand All @@ -706,7 +705,8 @@ def apply(self, *args, **kwargs) -> "StreamingDataFrame":
<pandas_streaming.df.dataframe.StreamingDataFrame>`.
"""
return StreamingDataFrame(
lambda: map(lambda df: df.apply(*args, **kwargs), self), **self.get_kwargs()
lambda: map(lambda df: df.apply(*args, **kwargs), self), # noqa: C417
**self.get_kwargs(),
)

def applymap(self, *args, **kwargs) -> "StreamingDataFrame":
Expand All @@ -716,7 +716,7 @@ def applymap(self, *args, **kwargs) -> "StreamingDataFrame":
<pandas_streaming.df.dataframe.StreamingDataFrame>`.
"""
return StreamingDataFrame(
lambda: map(lambda df: df.applymap(*args, **kwargs), self),
lambda: map(lambda df: df.applymap(*args, **kwargs), self), # noqa: C417
**self.get_kwargs(),
)

Expand Down Expand Up @@ -773,7 +773,7 @@ def _concath(self, others):
others = [others]

def iterateh(self, others):
cols = tuple([self] + others)
cols = (self, *others)
for dfs in zip(*cols):
nrows = [_.shape[0] for _ in dfs]
if min(nrows) != max(nrows):
Expand Down Expand Up @@ -1382,7 +1382,7 @@ def __init__(self, iter_creation, check_schema=True, stable=True):
)
if len(self.columns) != 1:
raise RuntimeError( # pragma: no cover
f"A series can contain only one column not " f"{len(self.columns)!r}."
f"A series can contain only one column not {len(self.columns)!r}."
)

def apply(self, *args, **kwargs) -> "StreamingDataFrame":
Expand All @@ -1391,7 +1391,8 @@ def apply(self, *args, **kwargs) -> "StreamingDataFrame":
This function returns a @see cl StreamingSeries.
"""
return StreamingSeries(
lambda: map(lambda df: df.apply(*args, **kwargs), self), **self.get_kwargs()
lambda: map(lambda df: df.apply(*args, **kwargs), self), # noqa: C417
**self.get_kwargs(),
)

def __add__(self, value):
Expand Down
Loading
Loading