Skip to content

Commit bc7f54f

Browse files
edit narwhals row ordering
1 parent 94d58a9 commit bc7f54f

File tree

3 files changed

+61
-52
lines changed

3 files changed

+61
-52
lines changed

.github/workflows/deploy.yml

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -9,31 +9,11 @@ permissions:
99
contents: write
1010

1111
jobs:
12-
build:
13-
runs-on: ubuntu-latest
14-
steps:
15-
- name: Checkout
16-
uses: actions/checkout@v4
17-
18-
- name: Upload artifact
19-
uses: actions/upload-artifact@v4
20-
with:
21-
name: public
22-
path: ./public
23-
2412
deploy:
2513
runs-on: ubuntu-latest
26-
needs: build
2714
steps:
2815
- name: Checkout
2916
uses: actions/checkout@v4
30-
31-
- name: Download artifact
32-
uses: actions/download-artifact@v4
33-
with:
34-
name: public
35-
path: ./public
36-
3717
- name: Deploy
3818
uses: peaceiris/actions-gh-pages@v4
3919
if: github.ref == 'refs/heads/master'

data_science_tools/narwhals_row_ordering.py

Lines changed: 58 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,50 @@ def _(mo):
3838
def _():
3939
from datetime import datetime
4040

41+
import pandas as pd
42+
import polars as pl
43+
44+
data1 = {"store": [1, 1, 2], "date_id": [4, 5, 6]}
45+
data2 = {"store": [1, 2], "sales": [7, 8]}
46+
47+
pandas_df1 = pd.DataFrame(data1)
48+
pandas_df2 = pd.DataFrame(data2)
49+
50+
# The outputs are the same
51+
for _ in range(5):
52+
# Left join
53+
pandas_df = pd.merge(pandas_df1, pandas_df2, on="store", how="left")
54+
55+
# Cumulative sum of sales within each store
56+
pandas_df["cumulative_sales"] = pandas_df.groupby("store")["sales"].cumsum()
57+
58+
print(pandas_df)
59+
return data1, data2, datetime, pd, pl
60+
61+
62+
@app.cell
63+
def _(data1, data2, pl):
64+
polars_df1 = pl.DataFrame(data1).lazy()
65+
polars_df2 = pl.DataFrame(data2).lazy()
66+
67+
# The outputs are not the same
68+
for _ in range(5):
69+
print(
70+
polars_df1.join(polars_df2, on="store", how="left")
71+
.with_columns(cumulative_sales=pl.col("sales").cum_sum().over("store"))
72+
.collect(engine="streaming")
73+
)
74+
return
75+
76+
77+
@app.cell(hide_code=True)
78+
def _(mo):
79+
mo.md(r"""## Eager-only solution""")
80+
return
81+
82+
83+
@app.cell
84+
def _(datetime, pd):
4185
data = {
4286
"sale_date": [
4387
datetime(2025, 5, 22),
@@ -57,31 +101,10 @@ def _():
57101
],
58102
"sales": [1100, None, 1450, 501, 500, None],
59103
}
60-
return (data,)
61104

62-
63-
@app.cell
64-
def _(data, pd):
65105
pdf = pd.DataFrame(data)
66-
pdf["sales"] = pdf.groupby("store")["sales"].ffill()
67-
pdf
68-
return
69-
70-
71-
@app.cell
72-
def _(data, pl):
73-
lazy_df = pl.DataFrame(data).lazy()
74-
lazy_df.with_columns(
75-
pl.col("sales").fill_null(strategy="forward").over("store")
76-
).collect()
77-
# ⚠️ This may not work as expected unless you specify order_by="sale_date"
78-
return
79-
80-
81-
@app.cell(hide_code=True)
82-
def _(mo):
83-
mo.md(r"""## Eager-only solution""")
84-
return
106+
print(pdf)
107+
return (data,)
85108

86109

87110
@app.cell
@@ -103,18 +126,19 @@ def agnostic_ffill_by_store(df_native: IntoFrameT) -> IntoFrameT:
103126

104127

105128
@app.cell
106-
def _(agnostic_ffill_by_store, data):
107-
import pandas as pd
108-
import polars as pl
109-
129+
def _(agnostic_ffill_by_store, data, pd):
110130
# pandas.DataFrame
111131
df_pandas = pd.DataFrame(data)
112132
agnostic_ffill_by_store(df_pandas)
133+
return (df_pandas,)
134+
113135

136+
@app.cell
137+
def _(agnostic_ffill_by_store, data, pl):
114138
# polars.DataFrame
115139
df_polars = pl.DataFrame(data)
116140
agnostic_ffill_by_store(df_polars)
117-
return df_pandas, df_polars, pd, pl
141+
return (df_polars,)
118142

119143

120144
@app.cell
@@ -170,7 +194,12 @@ def _(agnostic_ffill_by_store_improved, df_polars):
170194
@app.cell
171195
def _(agnostic_ffill_by_store_improved, df_pandas):
172196
# Note that it still supports pandas
173-
agnostic_ffill_by_store_improved(df_pandas)
197+
print(agnostic_ffill_by_store_improved(df_pandas))
198+
return
199+
200+
201+
@app.cell
202+
def _():
174203
return
175204

176205

0 commit comments

Comments
 (0)