Skip to content

Commit 0062b91

Browse files
committed
Add notebook for "Eager vs Lazy DataFrames: One Fix to Make Your Code Work Anywhere" article
1 parent d401e14 commit 0062b91

File tree

1 file changed

+137
-0
lines changed

1 file changed

+137
-0
lines changed
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
import marimo
2+
3+
__generated_with = "0.13.11"
4+
app = marimo.App(width="medium")
5+
6+
7+
@app.cell
8+
def _():
9+
import marimo as mo
10+
return (mo,)
11+
12+
13+
@app.cell
14+
def _(mo):
15+
mo.md(
16+
r"""# Eager vs Lazy DataFrames: One Fix to Make Your Code Work Anywhere"""
17+
)
18+
return
19+
20+
21+
@app.cell
22+
def _():
23+
from datetime import datetime
24+
25+
data = {
26+
"sale_date": [
27+
datetime(2025, 5, 22),
28+
datetime(2025, 5, 23),
29+
datetime(2025, 5, 24),
30+
datetime(2025, 5, 22),
31+
datetime(2025, 5, 23),
32+
datetime(2025, 5, 24),
33+
],
34+
"store": [
35+
"Thimphu",
36+
"Thimphu",
37+
"Thimphu",
38+
"Paro",
39+
"Paro",
40+
"Paro",
41+
],
42+
"sales": [1100, None, 1450, 501, 500, None],
43+
}
44+
return (data,)
45+
46+
47+
@app.cell
48+
def _(mo):
49+
mo.md(r"""## Eager-only solution""")
50+
return
51+
52+
53+
@app.cell
54+
def _():
55+
import narwhals as nw
56+
from narwhals.typing import IntoFrameT
57+
58+
59+
def agnostic_ffill_by_store(df_native: IntoFrameT) -> IntoFrameT:
60+
# Supports pandas and Polars.DataFrame, but not lazy ones.
61+
return (
62+
nw.from_native(df_native)
63+
.with_columns(
64+
nw.col("sales").fill_null(strategy="forward").over("store")
65+
)
66+
.to_native()
67+
)
68+
return IntoFrameT, agnostic_ffill_by_store, nw
69+
70+
71+
@app.cell
72+
def _(agnostic_ffill_by_store, data):
73+
import polars as pl
74+
import pandas as pd
75+
import pyarrow as pa
76+
77+
# pandas.DataFrame
78+
df_pandas = pd.DataFrame(data)
79+
agnostic_ffill_by_store(df_pandas)
80+
81+
# polars.DataFrame
82+
df_polars = pl.DataFrame(data)
83+
agnostic_ffill_by_store(df_polars)
84+
return (df_pandas,)
85+
86+
87+
@app.cell
88+
def _():
89+
import duckdb
90+
91+
duckdb_rel = duckdb.table("df_polars")
92+
duckdb_rel
93+
return (duckdb_rel,)
94+
95+
96+
@app.cell
97+
def _(mo):
98+
mo.md(r"""## Eager and lazy solution""")
99+
return
100+
101+
102+
@app.cell
103+
def _(IntoFrameT, nw):
104+
def agnostic_ffill_by_store_improved(df_native: IntoFrameT) -> IntoFrameT:
105+
return (
106+
nw.from_native(df_native)
107+
.with_columns(
108+
nw.col("sales")
109+
.fill_null(strategy="forward")
110+
# Note the `order_by` statement
111+
.over("store", order_by="sale_date")
112+
)
113+
.to_native()
114+
)
115+
return (agnostic_ffill_by_store_improved,)
116+
117+
118+
@app.cell
119+
def _(agnostic_ffill_by_store_improved, duckdb_rel):
120+
agnostic_ffill_by_store_improved(duckdb_rel)
121+
return
122+
123+
124+
@app.cell
125+
def _(agnostic_ffill_by_store_improved, df_pandas):
126+
# Note that it still supports pandas
127+
agnostic_ffill_by_store_improved(df_pandas)
128+
return
129+
130+
131+
@app.cell
132+
def _():
133+
return
134+
135+
136+
if __name__ == "__main__":
137+
app.run()

0 commit comments

Comments
 (0)