Skip to content

Commit 0057320

Browse files
committed
add standalone evaluate notebook
1 parent 980fcdd commit 0057320

File tree

1 file changed

+237
-0
lines changed

1 file changed

+237
-0
lines changed
Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# 📊 Running Standalone Evaluate\n",
8+
"This notebook allows you to run the Evaluate step with your own training and synthetic data. This is helpful if\n",
9+
"- You want to compare the output from Gretel Synthetics to other means of generating synthetic data\n",
10+
"- You want to make sure that the train/test split is consistent across multiple Safe Synthetics runs so that the scores are comparable"
11+
]
12+
},
13+
{
14+
"cell_type": "markdown",
15+
"metadata": {},
16+
"source": [
17+
"## 💾 Install Gretel SDK"
18+
]
19+
},
20+
{
21+
"cell_type": "code",
22+
"execution_count": null,
23+
"metadata": {
24+
"id": "qxXA-UJVMRhI"
25+
},
26+
"outputs": [],
27+
"source": [
28+
"%%capture\n",
29+
"%pip install -U gretel-client"
30+
]
31+
},
32+
{
33+
"cell_type": "markdown",
34+
"metadata": {},
35+
"source": [
36+
"## 🌐 Configure your Gretel Session"
37+
]
38+
},
39+
{
40+
"cell_type": "code",
41+
"execution_count": null,
42+
"metadata": {
43+
"id": "-MMNWeINRAZr"
44+
},
45+
"outputs": [],
46+
"source": [
47+
"# Set Gretel API key as an environment variable\n",
48+
"import os\n",
49+
"os.environ[\"GRETEL_API_KEY\"] = \"grtu....\""
50+
]
51+
},
52+
{
53+
"cell_type": "code",
54+
"execution_count": null,
55+
"metadata": {
56+
"id": "sCL3uQSLMTrs"
57+
},
58+
"outputs": [],
59+
"source": [
60+
"from gretel_client import create_or_get_unique_project\n",
61+
"from gretel_client.config import get_session_config\n",
62+
"from gretel_client.navigator_client import Gretel\n",
63+
"\n",
64+
"gretel = Gretel()\n",
65+
"project_name = \"test-project\"\n",
66+
"session = get_session_config()\n",
67+
"project = create_or_get_unique_project(name=project_name, session=session)\n",
68+
"\n",
69+
"project.get_console_url()"
70+
]
71+
},
72+
{
73+
"cell_type": "markdown",
74+
"metadata": {},
75+
"source": [
76+
"## 🔬 Load real and synthetic data"
77+
]
78+
},
79+
{
80+
"cell_type": "code",
81+
"execution_count": null,
82+
"metadata": {
83+
"id": "p0mN5rdCNbJ5"
84+
},
85+
"outputs": [],
86+
"source": [
87+
"import pandas as pd\n",
88+
"\n",
89+
"from sklearn.model_selection import train_test_split\n",
90+
"\n",
91+
"real_ds = \"https://gretel-datasets.s3.us-west-2.amazonaws.com/hipaa_patients.csv\"\n",
92+
"synthetic_ds = \"https://gretel-datasets.s3.us-west-2.amazonaws.com/synthetic_hipaa_patients.csv\"\n",
93+
"real_df = pd.read_csv(real_ds)\n",
94+
"synthetic_df = pd.read_csv(synthetic_ds)\n",
95+
"\n",
96+
"train_df, holdout_df = train_test_split(df, test_size=0.05, random_state=42)\n",
97+
"\n",
98+
"print(f\"Number of rows - train: {len(train_df)}\")\n",
99+
"print(f\"Number of rows - holdout: {len(holdout_df)}\")\n",
100+
"print(f\"Number of rows - synthetic: {len(synthetic_df)}\")\n",
101+
"train_df.head()"
102+
]
103+
},
104+
{
105+
"cell_type": "code",
106+
"execution_count": null,
107+
"metadata": {
108+
"id": "V5HFCFW2M8FT"
109+
},
110+
"outputs": [],
111+
"source": [
112+
"# Convert any Pandas Data Frames to Datasets\n",
113+
"from gretel_client.files import FileClient\n",
114+
"\n",
115+
"file_client = FileClient()\n",
116+
"\n",
117+
"resp_train = file_client.upload(train_df, \"dataset\")\n",
118+
"train_file_id = resp_train.id\n",
119+
"resp_holdout = file_client.upload(holdout_df, \"dataset\")\n",
120+
"holdout_file_id = resp_holdout.id\n",
121+
"resp_synthetic = file_client.upload(synthetic_df, \"dataset\")\n",
122+
"synthetic_file_id = resp_synthetic.id"
123+
]
124+
},
125+
{
126+
"cell_type": "markdown",
127+
"metadata": {},
128+
"source": [
129+
"## 🏃 Run Evaluate"
130+
]
131+
},
132+
{
133+
"cell_type": "code",
134+
"execution_count": null,
135+
"metadata": {
136+
"id": "au2V0khbM5CJ"
137+
},
138+
"outputs": [],
139+
"source": [
140+
"import requests\n",
141+
"import yaml\n",
142+
"\n",
143+
"def run_workflow(config: str):\n",
144+
" \"\"\"Create a workflow, and workflow run from a given yaml config. Blocks and\n",
145+
" prints log lines until the workflow reaches a terminal state.\n",
146+
"\n",
147+
" Args:\n",
148+
" config: The workflow config to run.\n",
149+
" \"\"\"\n",
150+
" config_dict = yaml.safe_load(config)\n",
151+
"\n",
152+
" response = requests.post(\n",
153+
" f\"{session.endpoint}/v2/workflows/exec_batch\",\n",
154+
" json={\n",
155+
" \"workflow_config\": config_dict,\n",
156+
" \"project_id\": project.project_guid,\n",
157+
" },\n",
158+
" headers={\"Authorization\": session.api_key}\n",
159+
" )\n",
160+
" response_body = response.json()\n",
161+
"\n",
162+
" print(response_body)\n",
163+
"\n",
164+
" workflow_id = response_body[\"workflow_id\"]\n",
165+
" workflow_run_id = response_body[\"workflow_run_id\"]\n",
166+
"\n",
167+
" workflow_run_url = (\n",
168+
" f\"{project.get_console_url().replace(project.project_guid, '')}workflows/\"\n",
169+
" f\"{workflow_id}/runs/{workflow_run_id}\"\n",
170+
" )\n",
171+
"\n",
172+
" print(f\"workflow: {workflow_id}\")\n",
173+
" print(f\"workflow run id: {workflow_run_id}\")\n",
174+
" print(workflow_run_url)"
175+
]
176+
},
177+
{
178+
"cell_type": "code",
179+
"execution_count": null,
180+
"metadata": {
181+
"id": "rtjEVolUM-Yf"
182+
},
183+
"outputs": [],
184+
"source": [
185+
"eval_config = f\"\"\"\n",
186+
"name: evaluate\n",
187+
"version: \"2\"\n",
188+
"\n",
189+
"steps:\n",
190+
" - name: holdout\n",
191+
" task: holdout\n",
192+
" inputs: [{train_file_id}, {holdout_file_id}]\n",
193+
" config: {{}}\n",
194+
" - name: eval\n",
195+
" task: evaluate_safe_synthetics_dataset\n",
196+
" inputs: [{synthetic_file_id}, \"holdout\"]\n",
197+
" config: {{}}\n",
198+
"\"\"\"\n",
199+
"\n",
200+
"run_workflow(eval_config)"
201+
]
202+
},
203+
{
204+
"cell_type": "code",
205+
"execution_count": null,
206+
"metadata": {
207+
"id": "rWmL8_iFRlv8"
208+
},
209+
"outputs": [],
210+
"source": []
211+
}
212+
],
213+
"metadata": {
214+
"colab": {
215+
"provenance": []
216+
},
217+
"kernelspec": {
218+
"display_name": ".venv",
219+
"language": "python",
220+
"name": "python3"
221+
},
222+
"language_info": {
223+
"codemirror_mode": {
224+
"name": "ipython",
225+
"version": 3
226+
},
227+
"file_extension": ".py",
228+
"mimetype": "text/x-python",
229+
"name": "python",
230+
"nbconvert_exporter": "python",
231+
"pygments_lexer": "ipython3",
232+
"version": "3.12.10"
233+
}
234+
},
235+
"nbformat": 4,
236+
"nbformat_minor": 0
237+
}

0 commit comments

Comments
 (0)