Skip to content

Commit 1db2d43

Browse files
Merge pull request #29 from CodeCutTech/nw-ordering
Nw ordering
2 parents d401e14 + 90ce367 commit 1db2d43

File tree

4 files changed

+225
-2
lines changed

4 files changed

+225
-2
lines changed
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
# /// script
2+
# requires-python = ">=3.11"
3+
# dependencies = [
4+
# "duckdb==1.3.0",
5+
# "marimo",
6+
# "narwhals==1.40.0",
7+
# "pandas==2.2.3",
8+
# "polars==1.30.0",
9+
# "pyarrow==20.0.0",
10+
# ]
11+
# ///
12+
13+
import marimo
14+
15+
__generated_with = "0.13.7"
16+
app = marimo.App(width="medium")
17+
18+
19+
@app.cell
20+
def _():
21+
import marimo as mo
22+
return (mo,)
23+
24+
25+
@app.cell(hide_code=True)
26+
def _(mo):
27+
mo.md(r"""# Eager vs Lazy DataFrames: One Fix to Make Your Code Work Anywhere""")
28+
return
29+
30+
31+
@app.cell
32+
def _():
33+
from datetime import datetime
34+
35+
data = {
36+
"sale_date": [
37+
datetime(2025, 5, 22),
38+
datetime(2025, 5, 23),
39+
datetime(2025, 5, 24),
40+
datetime(2025, 5, 22),
41+
datetime(2025, 5, 23),
42+
datetime(2025, 5, 24),
43+
],
44+
"store": [
45+
"Thimphu",
46+
"Thimphu",
47+
"Thimphu",
48+
"Paro",
49+
"Paro",
50+
"Paro",
51+
],
52+
"sales": [1100, None, 1450, 501, 500, None],
53+
}
54+
return (data,)
55+
56+
57+
@app.cell(hide_code=True)
58+
def _(mo):
59+
mo.md(r"""## Eager-only solution""")
60+
return
61+
62+
63+
@app.cell
64+
def _():
65+
import narwhals as nw
66+
from narwhals.typing import IntoFrameT
67+
68+
69+
def agnostic_ffill_by_store(df_native: IntoFrameT) -> IntoFrameT:
70+
# Supports pandas and Polars.DataFrame, but not lazy ones.
71+
return (
72+
nw.from_native(df_native)
73+
.with_columns(
74+
nw.col("sales").fill_null(strategy="forward").over("store")
75+
)
76+
.to_native()
77+
)
78+
return IntoFrameT, agnostic_ffill_by_store, nw
79+
80+
81+
@app.cell
82+
def _(agnostic_ffill_by_store, data):
83+
import pandas as pd
84+
import polars as pl
85+
86+
# pandas.DataFrame
87+
df_pandas = pd.DataFrame(data)
88+
agnostic_ffill_by_store(df_pandas)
89+
90+
# polars.DataFrame
91+
df_polars = pl.DataFrame(data)
92+
agnostic_ffill_by_store(df_polars)
93+
return (df_pandas,)
94+
95+
96+
@app.cell
97+
def _():
98+
import duckdb
99+
100+
duckdb_rel = duckdb.table("df_polars")
101+
duckdb_rel
102+
return (duckdb_rel,)
103+
104+
105+
@app.cell(hide_code=True)
106+
def _(mo):
107+
mo.md(r"""## Eager and lazy solution""")
108+
return
109+
110+
111+
@app.cell
112+
def _(IntoFrameT, nw):
113+
def agnostic_ffill_by_store_improved(df_native: IntoFrameT) -> IntoFrameT:
114+
return (
115+
nw.from_native(df_native)
116+
.with_columns(
117+
nw.col("sales")
118+
.fill_null(strategy="forward")
119+
# Note the `order_by` statement
120+
.over("store", order_by="sale_date")
121+
)
122+
.to_native()
123+
)
124+
return (agnostic_ffill_by_store_improved,)
125+
126+
127+
@app.cell
128+
def _(agnostic_ffill_by_store_improved, duckdb_rel):
129+
agnostic_ffill_by_store_improved(duckdb_rel)
130+
return
131+
132+
133+
@app.cell
134+
def _(agnostic_ffill_by_store_improved, df_pandas):
135+
# Note that it still supports pandas
136+
agnostic_ffill_by_store_improved(df_pandas)
137+
return
138+
139+
140+
if __name__ == "__main__":
141+
app.run()

public/data_science_tools/narwhals.html

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)