@@ -38,6 +38,50 @@ def _(mo):
38
38
def _ ():
39
39
from datetime import datetime
40
40
41
+ import pandas as pd
42
+ import polars as pl
43
+
44
+ data1 = {"store" : [1 , 1 , 2 ], "date_id" : [4 , 5 , 6 ]}
45
+ data2 = {"store" : [1 , 2 ], "sales" : [7 , 8 ]}
46
+
47
+ pandas_df1 = pd .DataFrame (data1 )
48
+ pandas_df2 = pd .DataFrame (data2 )
49
+
50
+ # The outputs are the same
51
+ for _ in range (5 ):
52
+ # Left join
53
+ pandas_df = pd .merge (pandas_df1 , pandas_df2 , on = "store" , how = "left" )
54
+
55
+ # Cumulative sum of sales within each store
56
+ pandas_df ["cumulative_sales" ] = pandas_df .groupby ("store" )["sales" ].cumsum ()
57
+
58
+ print (pandas_df )
59
+ return data1 , data2 , datetime , pd , pl
60
+
61
+
62
+ @app .cell
63
+ def _ (data1 , data2 , pl ):
64
+ polars_df1 = pl .DataFrame (data1 ).lazy ()
65
+ polars_df2 = pl .DataFrame (data2 ).lazy ()
66
+
67
+ # The outputs are not the same
68
+ for _ in range (5 ):
69
+ print (
70
+ polars_df1 .join (polars_df2 , on = "store" , how = "left" )
71
+ .with_columns (cumulative_sales = pl .col ("sales" ).cum_sum ().over ("store" ))
72
+ .collect (engine = "streaming" )
73
+ )
74
+ return
75
+
76
+
77
+ @app .cell (hide_code = True )
78
+ def _ (mo ):
79
+ mo .md (r"""## Eager-only solution""" )
80
+ return
81
+
82
+
83
+ @app .cell
84
+ def _ (datetime , pd ):
41
85
data = {
42
86
"sale_date" : [
43
87
datetime (2025 , 5 , 22 ),
@@ -57,31 +101,10 @@ def _():
57
101
],
58
102
"sales" : [1100 , None , 1450 , 501 , 500 , None ],
59
103
}
60
- return (data ,)
61
104
62
-
63
- @app .cell
64
- def _ (data , pd ):
65
105
pdf = pd .DataFrame (data )
66
- pdf ["sales" ] = pdf .groupby ("store" )["sales" ].ffill ()
67
- pdf
68
- return
69
-
70
-
71
- @app .cell
72
- def _ (data , pl ):
73
- lazy_df = pl .DataFrame (data ).lazy ()
74
- lazy_df .with_columns (
75
- pl .col ("sales" ).fill_null (strategy = "forward" ).over ("store" )
76
- ).collect ()
77
- # ⚠️ This may not work as expected unless you specify order_by="sale_date"
78
- return
79
-
80
-
81
- @app .cell (hide_code = True )
82
- def _ (mo ):
83
- mo .md (r"""## Eager-only solution""" )
84
- return
106
+ print (pdf )
107
+ return (data ,)
85
108
86
109
87
110
@app .cell
@@ -103,18 +126,19 @@ def agnostic_ffill_by_store(df_native: IntoFrameT) -> IntoFrameT:
103
126
104
127
105
128
@app .cell
106
- def _ (agnostic_ffill_by_store , data ):
107
- import pandas as pd
108
- import polars as pl
109
-
129
+ def _ (agnostic_ffill_by_store , data , pd ):
110
130
# pandas.DataFrame
111
131
df_pandas = pd .DataFrame (data )
112
132
agnostic_ffill_by_store (df_pandas )
133
+ return (df_pandas ,)
134
+
113
135
136
+ @app .cell
137
+ def _ (agnostic_ffill_by_store , data , pl ):
114
138
# polars.DataFrame
115
139
df_polars = pl .DataFrame (data )
116
140
agnostic_ffill_by_store (df_polars )
117
- return df_pandas , df_polars , pd , pl
141
+ return ( df_polars ,)
118
142
119
143
120
144
@app .cell
@@ -170,7 +194,12 @@ def _(agnostic_ffill_by_store_improved, df_polars):
170
194
@app .cell
171
195
def _ (agnostic_ffill_by_store_improved , df_pandas ):
172
196
# Note that it still supports pandas
173
- agnostic_ffill_by_store_improved (df_pandas )
197
+ print (agnostic_ffill_by_store_improved (df_pandas ))
198
+ return
199
+
200
+
201
+ @app .cell
202
+ def _ ():
174
203
return
175
204
176
205
0 commit comments