Skip to content

Commit 21415a0

Browse files
authored
add drop feature (#393)
1 parent 000d795 commit 21415a0

File tree

2 files changed

+77
-0
lines changed

2 files changed

+77
-0
lines changed

quixstreams/dataframe/dataframe.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -966,6 +966,40 @@ def hopping_window(
966966
name=name,
967967
)
968968

969+
def drop(self, columns: Union[str, List[str]]) -> Self:
970+
"""
971+
Drop column(s) from the message value (value must support `del`, like a dict).
972+
973+
This operation occurs in-place, meaning reassignment is entirely OPTIONAL: the
974+
original `StreamingDataFrame` is returned for chaining (`sdf.update().print()`).
975+
976+
977+
Example Snippet:
978+
979+
```python
980+
# Remove columns "x" and "y" from the value.
981+
# This would transform {"x": 1, "y": 2, "z": 3} to {"z": 3}
982+
983+
sdf = StreamingDataframe()
984+
sdf.drop(["x", "y"])
985+
```
986+
987+
:param columns: a single column name or a list of names, where names are `str`
988+
:return: a new StreamingDataFrame instance
989+
"""
990+
if isinstance(columns, list):
991+
if not columns:
992+
return self
993+
if not all(isinstance(s, str) for s in columns):
994+
raise TypeError(f"column list must contain strings only")
995+
elif isinstance(columns, str):
996+
columns = [columns]
997+
else:
998+
raise TypeError(
999+
f"Expected a string or a list of strings, not {type(columns)}"
1000+
)
1001+
return self._add_update(lambda value: _drop(value, columns), metadata=False)
1002+
9691003
def _produce(
9701004
self,
9711005
topic: Topic,
@@ -1111,6 +1145,16 @@ def __bool__(self):
11111145
)
11121146

11131147

1148+
def _drop(value: Dict, columns: List[str]):
1149+
"""
1150+
remove columns from the value, inplace
1151+
:param value: a dict or something that supports `del`
1152+
:param columns: a list of column names
1153+
"""
1154+
for column in columns:
1155+
del value[column]
1156+
1157+
11141158
def _as_metadata_func(
11151159
func: Union[ApplyCallbackStateful, FilterCallbackStateful, UpdateCallbackStateful]
11161160
) -> Union[

tests/test_quixstreams/test_dataframe/test_dataframe.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,39 @@ def test_print(self, dataframe_factory, metadata, expected, capsys):
390390

391391
assert expected in capsys.readouterr().out
392392

393+
@pytest.mark.parametrize(
394+
"columns, expected",
395+
[
396+
("col_a", {"col_b": 2, "col_c": 3}),
397+
(["col_a"], {"col_b": 2, "col_c": 3}),
398+
(["col_a", "col_b"], {"col_c": 3}),
399+
],
400+
)
401+
def test_drop(self, dataframe_factory, columns, expected):
402+
value = {"col_a": 1, "col_b": 2, "col_c": 3}
403+
key, timestamp, headers = b"key", 0, []
404+
sdf = dataframe_factory()
405+
sdf.drop(columns)
406+
assert sdf.test(value=value, key=key, timestamp=timestamp, headers=headers)[
407+
0
408+
] == (expected, key, timestamp, headers)
409+
410+
@pytest.mark.parametrize("columns", [["col_a", 3], b"col_d", {"col_a"}])
411+
def test_drop_invalid_columns(self, dataframe_factory, columns):
412+
sdf = dataframe_factory()
413+
with pytest.raises(TypeError):
414+
sdf.drop(columns)
415+
416+
def test_drop_empty_list(self, dataframe_factory):
417+
"""
418+
Dropping an empty list is ignored entirely.
419+
"""
420+
sdf = dataframe_factory()
421+
pre_drop_stream = sdf.stream.tree()
422+
sdf = sdf.drop([])
423+
post_drop_stream = sdf.stream.tree()
424+
assert pre_drop_stream == post_drop_stream
425+
393426

394427
class TestStreamingDataFrameApplyExpand:
395428
def test_apply_expand(self, dataframe_factory):

0 commit comments

Comments
 (0)