3
3
import contextvars
4
4
import functools
5
5
import operator
6
+ import pprint
6
7
from copy import deepcopy
7
8
from datetime import timedelta
8
9
from typing import (
@@ -282,6 +283,9 @@ def update(
282
283
The result of the function will be ignored, and the original value will be
283
284
passed downstream.
284
285
286
+ This operation occurs in-place, meaning reassignment is entirely OPTIONAL: the
287
+ original `StreamingDataFrame` is returned for chaining (`sdf.update().print()`).
288
+
285
289
286
290
Example Snippet:
287
291
@@ -297,7 +301,8 @@ def func(values: list, state: State):
297
301
298
302
sdf = StreamingDataframe()
299
303
sdf = sdf.update(func, stateful=True)
300
- sdf = sdf.update(lambda value: print("Received value: ", value))
304
+ # does not require reassigning
305
+ sdf.update(lambda v: v.append(1))
301
306
```
302
307
303
308
:param func: function to update value
@@ -306,6 +311,7 @@ def func(values: list, state: State):
306
311
:param metadata: if True, the callback will receive key, timestamp and headers
307
312
along with the value.
308
313
Default - `False`.
314
+ :return: the updated StreamingDataFrame instance (reassignment NOT required).
309
315
"""
310
316
if stateful :
311
317
self ._register_store ()
@@ -319,15 +325,14 @@ def func(values: list, state: State):
319
325
func = cast (UpdateWithMetadataCallbackStateful , with_metadata_func ),
320
326
processing_context = self ._processing_context ,
321
327
)
322
- stream = self .stream . add_update (
328
+ return self ._add_update (
323
329
cast (UpdateWithMetadataCallback , stateful_func ), metadata = True
324
330
)
325
331
else :
326
- stream = self .stream . add_update (
332
+ return self ._add_update (
327
333
cast (Union [UpdateCallback , UpdateWithMetadataCallback ], func ),
328
334
metadata = metadata ,
329
335
)
330
- return self .__dataframe_clone__ (stream = stream )
331
336
332
337
@overload
333
338
def filter (self , func : FilterCallback ) -> Self : ...
@@ -546,6 +551,9 @@ def to_topic(
546
551
"""
547
552
Produce current value to a topic. You can optionally specify a new key.
548
553
554
+ This operation occurs in-place, meaning reassignment is entirely OPTIONAL: the
555
+ original `StreamingDataFrame` is returned for chaining (`sdf.update().print()`).
556
+
549
557
Example Snippet:
550
558
551
559
```python
@@ -560,17 +568,18 @@ def to_topic(
560
568
561
569
sdf = app.dataframe(input_topic)
562
570
sdf = sdf.to_topic(output_topic_0)
563
- sdf = sdf.to_topic(output_topic_1, key=lambda data: data["a_field"])
571
+ # does not require reassigning
572
+ sdf.to_topic(output_topic_1, key=lambda data: data["a_field"])
564
573
```
565
574
566
575
:param topic: instance of `Topic`
567
576
:param key: a callable to generate a new message key, optional.
568
577
If passed, the return type of this callable must be serializable
569
578
by `key_serializer` defined for this Topic object.
570
579
By default, the current message key will be used.
571
-
580
+ :return: the updated StreamingDataFrame instance (reassignment NOT required).
572
581
"""
573
- return self .update (
582
+ return self ._add_update (
574
583
lambda value , orig_key , timestamp , headers : self ._produce (
575
584
topic = topic ,
576
585
value = value ,
@@ -673,6 +682,48 @@ def _set_headers_callback(
673
682
stream = self .stream .add_transform (func = _set_headers_callback )
674
683
return self .__dataframe_clone__ (stream = stream )
675
684
685
+ def print (self , pretty : bool = True , metadata : bool = False ) -> Self :
686
+ """
687
+ Print out the current message value (and optionally, the message metadata) to
688
+ stdout (console) (like the built-in `print` function).
689
+
690
+ Can also output a more dict-friendly format with `pretty=True`.
691
+
692
+ This operation occurs in-place, meaning reassignment is entirely OPTIONAL: the
693
+ original `StreamingDataFrame` is returned for chaining (`sdf.update().print()`).
694
+
695
+ > NOTE: prints the current (edited) values, not the original values.
696
+
697
+ Example Snippet:
698
+
699
+ ```python
700
+ from quixstreams import Application
701
+
702
+
703
+ app = Application()
704
+ input_topic = app.topic("data")
705
+
706
+ sdf = app.dataframe(input_topic)
707
+ sdf["edited_col"] = sdf["orig_col"] + "edited"
708
+ # print the updated message value with the newly added column
709
+ sdf.print()
710
+ ```
711
+
712
+ :param pretty: Whether to use "pprint" formatting, which uses new-lines and
713
+ indents for easier console reading (but might be worse for log parsing).
714
+ :param metadata: Whether to additionally print the key, timestamp, and headers
715
+ :return: the updated StreamingDataFrame instance (reassignment NOT required).
716
+ """
717
+ print_args = ["value" , "key" , "timestamp" , "headers" ]
718
+ if pretty :
719
+ printer = functools .partial (pprint .pprint , indent = 2 , sort_dicts = False )
720
+ else :
721
+ printer = print
722
+ return self ._add_update (
723
+ lambda * args : printer ({print_args [i ]: args [i ] for i in range (len (args ))}),
724
+ metadata = metadata ,
725
+ )
726
+
676
727
def compose (
677
728
self ,
678
729
sink : Optional [Callable [[Any , Any , int , Any ], None ]] = None ,
@@ -929,6 +980,14 @@ def _produce(
929
980
)
930
981
self ._producer .produce_row (row = row , topic = topic , key = key , timestamp = timestamp )
931
982
983
+ def _add_update (
984
+ self ,
985
+ func : Union [UpdateCallback , UpdateWithMetadataCallback ],
986
+ metadata : bool = False ,
987
+ ):
988
+ self ._stream = self ._stream .add_update (func , metadata = metadata )
989
+ return self
990
+
932
991
def _register_store (self ):
933
992
"""
934
993
Register the default store for input topic in StateStoreManager
@@ -986,7 +1045,7 @@ def __setitem__(self, item_key: Any, item: Union[Self, object]):
986
1045
# Update an item key with a result of another sdf.apply()
987
1046
diff = self .stream .diff (item .stream )
988
1047
other_sdf_composed = diff .compose_returning ()
989
- stream = self .stream . add_update (
1048
+ self ._add_update (
990
1049
lambda value , key , timestamp , headers : operator .setitem (
991
1050
value ,
992
1051
item_key ,
@@ -997,18 +1056,15 @@ def __setitem__(self, item_key: Any, item: Union[Self, object]):
997
1056
elif isinstance (item , StreamingSeries ):
998
1057
# Update an item key with a result of another series
999
1058
series_composed = item .compose_returning ()
1000
- stream = self .stream . add_update (
1059
+ self ._add_update (
1001
1060
lambda value , key , timestamp , headers : operator .setitem (
1002
1061
value , item_key , series_composed (value , key , timestamp , headers )[0 ]
1003
1062
),
1004
1063
metadata = True ,
1005
1064
)
1006
1065
else :
1007
1066
# Update an item key with a constant
1008
- stream = self .stream .add_update (
1009
- lambda value : operator .setitem (value , item_key , item )
1010
- )
1011
- self ._stream = stream
1067
+ self ._add_update (lambda value : operator .setitem (value , item_key , item ))
1012
1068
1013
1069
@overload
1014
1070
def __getitem__ (self , item : str ) -> StreamingSeries : ...
0 commit comments