Skip to content

Commit 410d9c0

Browse files
hide markdown
1 parent 5e24432 commit 410d9c0

File tree

2 files changed

+30
-13
lines changed

2 files changed

+30
-13
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ Collection of useful data science topics along with articles and videos.
99
## The Data Scientist's Toolkit: 100+ Essential Tools for Modern Analytics
1010

1111
To receive a condensed overview of these tools and additional resources, sign up for [CodeCut's free PDF guide](https://codecut.ai/data-scientist-toolkit/?utm_source=github&utm_medium=data_science_repo&utm_campaign=free_pdf). This comprehensive 264-page document covers over 100 essential data science tools, providing you with a valuable reference for your work.
12-
12+
1313
## How to Download the Code in This Repository to Your Local Machine
1414

1515
To download the code in this repo, you can simply use git clone
@@ -93,7 +93,7 @@ git clone https://github.com/khuyentran1401/Data-science
9393

9494
| Title | Article | Repository | Video
9595
| ------------- |:-------------:| :-----:| :-----:|
96-
| Pytest for Data Scientists | [🔗](https://codecut.ai/pytest-for-data-scientists-3/?utm_source=github&utm_medium=data_science_repo&utm_campaign=blog) | [🔗](https://github.com/khuyentran1401/Data-science/tree/master/data_science_tools/pytest) | [🔗](https://www.youtube.com/playlist?list=PLnK6m_JBRVNoYEer9hBmTNwkYB3gmbOPO)
96+
| Pytest for Data Scientists | [🔗](https://codecut.ai/git-for-data-scientists-learn-git-through-practical-examples/?utm_source=github&utm_medium=data_science_repo&utm_campaign=blog) | [🔗](https://github.com/khuyentran1401/Data-science/tree/master/data_science_tools/pytest) | [🔗](https://www.youtube.com/playlist?list=PLnK6m_JBRVNoYEer9hBmTNwkYB3gmbOPO)
9797
| 4 Lessor-Known Yet Awesome Tips for Pytest | [🔗](https://towardsdatascience.com/4-lessor-known-yet-awesome-tips-for-pytest-2117d8a62d9c) | [🔗](https://github.com/khuyentran1401/Data-science/tree/master/data_science_tools/advanced_pytest)
9898
| DeepDiff — Recursively Find and Ignore Trivial Differences Using Python | [🔗](https://towardsdatascience.com/deepdiff-recursively-find-and-ignore-trivial-differences-using-python-231a5524f41d) | [🔗](https://github.com/khuyentran1401/Data-science/blob/master/productive_tools/deepdiff_example.ipynb)
9999
| Checklist — Behavioral Testing of NLP Models | [🔗](https://towardsdatascience.com/checklist-behavioral-testing-of-nlp-models-491cf11f0238) | [🔗](https://github.com/khuyentran1401/Data-science/blob/master/nlp/checklist/checklist_examples.ipynb)

data_science_tools/narwhals.py

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,31 @@
1+
# /// script
2+
# requires-python = ">=3.11"
3+
# dependencies = [
4+
# "duckdb==1.2.2",
5+
# "marimo",
6+
# "narwhals==1.39.0",
7+
# "pandas==2.2.3",
8+
# "polars==1.29.0",
9+
# "pyarrow==20.0.0",
10+
# "pyspark==3.5.5",
11+
# "sqlframe==3.32.1",
12+
# ]
13+
# ///
14+
115
import marimo
216

3-
__generated_with = "0.13.7"
17+
__generated_with = "0.13.6"
418
app = marimo.App(width="medium")
519

620

721
@app.cell
822
def _():
923
import marimo as mo
24+
1025
return (mo,)
1126

1227

13-
@app.cell
28+
@app.cell(hide_code=True)
1429
def _(mo):
1530
mo.md(
1631
r"""
@@ -42,7 +57,7 @@ def monthly_aggregate_bad(user_df):
4257
return df.resample("MS", on="date")[["price"]].mean()
4358

4459

45-
@app.cell
60+
@app.cell(hide_code=True)
4661
def _(mo):
4762
mo.md(
4863
r"""
@@ -56,12 +71,11 @@ def _(mo):
5671

5772
@app.cell
5873
def _(F):
74+
import duckdb
5975
import pandas as pd
6076
import polars as pl
61-
import duckdb
6277
import pyspark
6378

64-
6579
def monthly_aggregate_unmaintainable(user_df):
6680
if isinstance(user_df, pd.DataFrame):
6781
result = user_df.resample("MS", on="date")[["price"]].mean()
@@ -90,10 +104,11 @@ def monthly_aggregate_unmaintainable(user_df):
90104
).sort("date")
91105
# TODO: more branches for PyArrow, Dask, etc... :sob:
92106
return result
107+
93108
return duckdb, pd, pl
94109

95110

96-
@app.cell
111+
@app.cell(hide_code=True)
97112
def _(mo):
98113
mo.md(
99114
r"""
@@ -113,7 +128,6 @@ def _():
113128
import narwhals as nw
114129
from narwhals.typing import IntoFrameT
115130

116-
117131
def monthly_aggregate(user_df: IntoFrameT) -> IntoFrameT:
118132
return (
119133
nw.from_native(user_df)
@@ -122,10 +136,11 @@ def monthly_aggregate(user_df: IntoFrameT) -> IntoFrameT:
122136
.sort("date")
123137
.to_native()
124138
)
139+
125140
return (monthly_aggregate,)
126141

127142

128-
@app.cell
143+
@app.cell(hide_code=True)
129144
def _(mo):
130145
mo.md(r"""## Demo: let's verify that it works!""")
131146
return
@@ -161,13 +176,15 @@ def _(data, monthly_aggregate, pl):
161176
@app.cell
162177
def _(duckdb, monthly_aggregate):
163178
# DuckDB
164-
rel = duckdb.sql("""
179+
rel = duckdb.sql(
180+
"""
165181
from values (timestamp '2020-01-01', 1),
166182
(timestamp '2020-01-08', 4),
167183
(timestamp '2020-02-03', 3)
168184
df(date, price)
169185
select *
170-
""")
186+
"""
187+
)
171188
monthly_aggregate(rel)
172189
return
173190

@@ -182,7 +199,7 @@ def _(data, monthly_aggregate):
182199
return
183200

184201

185-
@app.cell
202+
@app.cell(hide_code=True)
186203
def _(mo):
187204
mo.md(
188205
r"""

0 commit comments

Comments
 (0)