Skip to content

Commit 8b18a68

Browse files
authored
[FSTORE-1777] Tutorial for benchmarking Deployments (#326)
* adding benchmarking information for deployments * updating README.md * renaming folder and adding hint for the locust * adding hint for locust ip * adding hint for locust ip
1 parent 2e5992f commit 8b18a68

20 files changed

+3092
-1
lines changed

benchmarks/online-inference-pipeline/1_fraud_online_feature_pipeline.ipynb

Lines changed: 642 additions & 0 deletions
Large diffs are not rendered by default.

benchmarks/online-inference-pipeline/2_fraud_online_training_pipeline.ipynb

Lines changed: 1824 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"id": "f16367c8",
6+
"metadata": {},
7+
"source": [
8+
"## <span style=\"color:#ff5f27;\"> 📡 Connecting to Hopsworks Feature Store </span>"
9+
]
10+
},
11+
{
12+
"cell_type": "code",
13+
"execution_count": 6,
14+
"id": "ed952ece",
15+
"metadata": {},
16+
"outputs": [
17+
{
18+
"name": "stdout",
19+
"output_type": "stream",
20+
"text": [
21+
"2025-06-26 12:02:10,824 INFO: Closing external client and cleaning up certificates.\n",
22+
"Connection closed.\n",
23+
"2025-06-26 12:02:10,835 INFO: Initializing external client\n",
24+
"2025-06-26 12:02:10,836 INFO: Base URL: https://10.87.42.15:28181\n",
25+
"2025-06-26 12:02:11,542 INFO: Python Engine initialized.\n",
26+
"\n",
27+
"Logged in to project, explore it here https://10.87.42.15:28181/p/119\n"
28+
]
29+
}
30+
],
31+
"source": [
32+
"import hopsworks\n",
33+
"\n",
34+
"project = hopsworks.login()\n",
35+
"\n",
36+
"fs = project.get_feature_store()"
37+
]
38+
},
39+
{
40+
"cell_type": "markdown",
41+
"id": "e98e32ce",
42+
"metadata": {},
43+
"source": [
44+
"## <span style=\"color:#ff5f27;\"> ⚙️ Feature Group Retrieval</span>"
45+
]
46+
},
47+
{
48+
"cell_type": "code",
49+
"execution_count": 7,
50+
"id": "d2a8475b",
51+
"metadata": {},
52+
"outputs": [
53+
{
54+
"name": "stdout",
55+
"output_type": "stream",
56+
"text": [
57+
"Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.02s) \n"
58+
]
59+
},
60+
{
61+
"data": {
62+
"text/plain": [
63+
"array([4307206161394478, 4991539658091830, 4556426990917111,\n",
64+
" 4897277640695450, 4123638178254919, 4215909337633098,\n",
65+
" 4883806594247243, 4565376751743421, 4134800299253298,\n",
66+
" 4598649623090127, 4454908897243389, 4628483972728572,\n",
67+
" 4837617840384848, 4359225696258815, 4758035858626403,\n",
68+
" 4689840185625851, 4893428073388709, 4899899195688156,\n",
69+
" 4564193664676304, 4834372953306161, 4277322646120192,\n",
70+
" 4536307339137659, 4322617096913250, 4382251375646022,\n",
71+
" 4167653876012714])"
72+
]
73+
},
74+
"execution_count": 7,
75+
"metadata": {},
76+
"output_type": "execute_result"
77+
}
78+
],
79+
"source": [
80+
"# Retrieve the 'transactions_fraud_online_fg' feature group\n",
81+
"trans_fg = fs.get_feature_group(\n",
82+
" 'transactions_fraud_online_fg',\n",
83+
" version=1,\n",
84+
")\n",
85+
"\n",
86+
"# Retrieve the first 5 unique credit card numbers (cc_nums)\n",
87+
"cc_nums = trans_fg.select('cc_num').show(25).cc_num.astype(int).values\n",
88+
"\n",
89+
"# Display the obtained cc_nums\n",
90+
"cc_nums"
91+
]
92+
},
93+
{
94+
"cell_type": "markdown",
95+
"id": "903df073",
96+
"metadata": {},
97+
"source": [
98+
"## <span style='color:#ff5f27'>🚀 Fetch Deployment</span>"
99+
]
100+
},
101+
{
102+
"cell_type": "code",
103+
"execution_count": 20,
104+
"id": "4303ac82",
105+
"metadata": {},
106+
"outputs": [],
107+
"source": [
108+
"# Access the Model Serving\n",
109+
"ms = project.get_model_serving()\n",
110+
"\n",
111+
"# Specify the deployment name\n",
112+
"deployment_async_rdrs = ms.get_deployment(\"deploymentasyncrdrs\")"
113+
]
114+
},
115+
{
116+
"cell_type": "markdown",
117+
"id": "045ba7e4",
118+
"metadata": {},
119+
"source": [
120+
"## <span style='color:#ff5f27'>🔮 Predicting using deployment</span>"
121+
]
122+
},
123+
{
124+
"cell_type": "code",
125+
"execution_count": 23,
126+
"id": "42196023",
127+
"metadata": {},
128+
"outputs": [],
129+
"source": [
130+
"# Get the first credit card number\n",
131+
"inputs = [{\"cc_num\":int(cc_num)} for cc_num in cc_nums]"
132+
]
133+
},
134+
{
135+
"cell_type": "code",
136+
"execution_count": 25,
137+
"id": "596f3241",
138+
"metadata": {},
139+
"outputs": [
140+
{
141+
"data": {
142+
"text/plain": [
143+
"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
144+
]
145+
},
146+
"execution_count": 25,
147+
"metadata": {},
148+
"output_type": "execute_result"
149+
}
150+
],
151+
"source": [
152+
"predictions_async_rdrs = deployment_async_rdrs.predict(inputs=inputs)[\"predictions\"]\n",
153+
"predictions_async_rdrs"
154+
]
155+
},
156+
{
157+
"cell_type": "markdown",
158+
"id": "1b80b358-eb1b-4e7d-bfbc-fab86d25f2bb",
159+
"metadata": {},
160+
"source": [
161+
"### Stop Deployment\n",
162+
"To stop the deployment you simply run:"
163+
]
164+
},
165+
{
166+
"cell_type": "code",
167+
"execution_count": null,
168+
"id": "b618ce93-acfe-46b5-a107-1706adaf53a7",
169+
"metadata": {},
170+
"outputs": [],
171+
"source": [
172+
"# Stop the deployment\n",
173+
"deployment.stop(await_stopped=180)"
174+
]
175+
}
176+
],
177+
"metadata": {
178+
"kernelspec": {
179+
"display_name": "Python 3 (ipykernel)",
180+
"language": "python",
181+
"name": "python3"
182+
},
183+
"language_info": {
184+
"codemirror_mode": {
185+
"name": "ipython",
186+
"version": 3
187+
},
188+
"file_extension": ".py",
189+
"mimetype": "text/x-python",
190+
"name": "python",
191+
"nbconvert_exporter": "python",
192+
"pygments_lexer": "ipython3",
193+
"version": "3.10.18"
194+
}
195+
},
196+
"nbformat": 4,
197+
"nbformat_minor": 5
198+
}
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# Benchmarking a Deployment on Hopsworks
2+
3+
This repository benchmarks a deployment running inside **Hopsworks** using [Locust](https://locust.io/).
4+
5+
## Benchmarking Steps
6+
7+
1. **Create a Deployment**
8+
- Run all the provided notebooks to set up your deployment inside Hopsworks.
9+
10+
2. **Configure Target Host**
11+
- Add the **host name** and **IP address** of your deployment in [`locustfile.py`](https://github.com/logicalclocks/hopsworks-tutorials/blob/master/benchmarks/model-deployment/locust/locustfile.py#L12).
12+
- You can find this information in the Hopsworks **Deployment UI**.
13+
14+
3. **Add Hopsworks API Key**
15+
- Insert your Hopsworks API key into the same [`locustfile.py`](https://github.com/logicalclocks/hopsworks-tutorials/blob/master/benchmarks/model-deployment/locust/locustfile.py#L12).
16+
- Generate the API key by following [this guide](https://docs.hopsworks.ai/latest/user_guides/projects/api_key/create_api_key/).
17+
18+
4. **Build the Locust Docker Image**
19+
- Use the provided [Dockerfile](https://github.com/logicalclocks/hopsworks-tutorials/blob/master/benchmarks/model-deployment/locust/Dockerfile) to build a Locust image.
20+
- Push the image to your preferred container registry.
21+
22+
5. **Update Kubernetes Manifests**
23+
- Update the image URL in both:
24+
- [`master-deployment.yaml`](https://github.com/logicalclocks/hopsworks-tutorials/blob/master/benchmarks/model-deployment/locust/kubernetes-locust/master-deployment.yaml#L28)
25+
- [`slave-deployment.yaml`](https://github.com/logicalclocks/hopsworks-tutorials/blob/master/benchmarks/model-deployment/locust/kubernetes-locust/slave-deployment.yaml#L28)
26+
27+
6. **Deploy Locust**
28+
- Run the [deployment script](https://github.com/logicalclocks/hopsworks-tutorials/blob/master/benchmarks/model-deployment/locust/kubernetes-locust/deploy.sh) to deploy Locust master and worker nodes.
29+
- This will deploy into a Kubernetes namespace named `locust`.
30+
- **Note:** Ensure you have `kubectl` access to the cluster.
31+
32+
7. **Access Locust UI**
33+
- Once deployed, port-forward port `8089` from the `locust-master` service to your local machine.
34+
- Access the Locust Web UI at [http://localhost:8089](http://localhost:8089) to run and monitor your load tests.
35+
36+
37+
## Benchmarks
38+
39+
One benchmark that has been performed targets **5000 RPS** with a **P99 latency below 50 ms**. This performance level can be achieved on Hopsworks 4.1 using the following configuration:
40+
41+
1. **RonDB REST Servers [(RDRS)](https://docs.rondb.com/rondb_rest_api/)**
42+
- Replicas: 2
43+
- CPU Limits: 4
44+
- CPU Requests: 4
45+
46+
2. **Istio Ingress Gateways**
47+
- Replicas: 3
48+
- CPU Limits: 4
49+
- CPU Requests: 4
50+
51+
3. **Predictors**
52+
- Replicas: 48
53+
- CPU Limits: 1
54+
- CPU Requests: 1
55+
56+
The high number of replicas for predictors is necessary to mitigate the effects of Python's [Global Interpreter Lock (GIL)](https://wiki.python.org/moin/GlobalInterpreterLock). This allows for greater parallelism and lower latency, especially at high RPS.
57+
58+
You can view the full benchmark report generated by Locust [here](https://github.com/logicalclocks/hopsworks-tutorials/blob/master/locust_reports/locust_report_5k_rps_25_batch_size.pdf).
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
from math import radians
2+
import numpy as np
3+
import pandas as pd
4+
from typing import Union
5+
6+
def haversine(long: pd.Series, lat: pd.Series, shift: int) -> np.ndarray:
7+
"""
8+
Compute Haversine distance between each consecutive coordinate in (long, lat).
9+
10+
Parameters:
11+
- long: pandas Series, longitude values
12+
- lat: pandas Series, latitude values
13+
- shift: int, the number of positions to shift for calculating distances
14+
15+
Returns:
16+
- numpy array, Haversine distances
17+
"""
18+
long_shifted = long.shift(shift)
19+
lat_shifted = lat.shift(shift)
20+
long_diff = long_shifted - long
21+
lat_diff = lat_shifted - lat
22+
23+
a = np.sin(lat_diff/2.0)**2
24+
b = np.cos(lat) * np.cos(lat_shifted) * np.sin(long_diff/2.0)**2
25+
c = 2*np.arcsin(np.sqrt(a + b))
26+
27+
return c
28+
29+
30+
def time_delta(datetime_value: pd.Series, shift: int) -> pd.Series:
31+
"""
32+
Compute time difference between each consecutive transaction.
33+
34+
Parameters:
35+
- datetime_value: pandas Series, datetime values
36+
- shift: int, the number of positions to shift for calculating time differences
37+
38+
Returns:
39+
- pandas Series, time differences
40+
"""
41+
time_shifted = datetime_value.shift(shift)
42+
return time_shifted
43+
44+
45+
def calculate_loc_delta_t_plus_1(df: pd.DataFrame) -> pd.DataFrame:
46+
"""
47+
Calculate loc_delta_t_plus_1 for each group.
48+
49+
Parameters:
50+
- group: pandas DataFrame group, grouped by 'cc_num'
51+
52+
Returns:
53+
- pandas Series, loc_delta_t_plus_1 values
54+
"""
55+
df["loc_delta_t_plus_1"] = df.groupby("cc_num").apply(
56+
lambda x: haversine(x["longitude"], x["latitude"], 1)
57+
).reset_index(level=0, drop=True).fillna(0)
58+
return df
59+
60+
61+
def calculate_loc_delta_t_minus_1(df: pd.DataFrame) -> pd.DataFrame:
62+
"""
63+
Calculate loc_delta_t_minus_1 for each group.
64+
65+
Parameters:
66+
- group: pandas DataFrame group, grouped by 'cc_num'
67+
68+
Returns:
69+
- pandas Series, loc_delta_t_minus_1 values
70+
"""
71+
df["loc_delta_t_minus_1"] = df.groupby("cc_num").apply(
72+
lambda x: haversine(x["longitude"], x["latitude"], -1)
73+
).reset_index(level=0, drop=True).fillna(0)
74+
return df
75+
76+
77+
def calculate_time_delta_t_minus_1(df: pd.DataFrame) -> pd.DataFrame:
78+
"""
79+
Calculate time_delta_t_minus_1 for each group.
80+
81+
Parameters:
82+
- group: pandas DataFrame group, grouped by 'cc_num'
83+
84+
Returns:
85+
- pandas Series, time_delta_t_minus_1 values
86+
"""
87+
df["time_delta_t_minus_1"] = df.groupby("cc_num").apply(lambda x: time_delta(x["datetime"], -1))\
88+
.reset_index(level=0, drop=True)
89+
return df
90+
91+
92+
def prepare_transactions_fraud(trans_df: pd.DataFrame) -> pd.DataFrame:
93+
"""
94+
Prepare transaction data with engineered features for fraud detection.
95+
96+
Parameters:
97+
- trans_df: pandas DataFrame, transaction data
98+
99+
Returns:
100+
- pandas DataFrame, prepared transaction data with engineered features
101+
"""
102+
# Sort values and convert latitude and longitude to radians
103+
trans_df.sort_values("datetime", inplace=True)
104+
trans_df[["longitude", "latitude"]] = trans_df[["longitude", "latitude"]].applymap(radians)
105+
106+
# Calculate loc_delta_t_plus_1, loc_delta_t_minus_1, and time_delta_t_minus_1 using groupby
107+
trans_df = calculate_loc_delta_t_plus_1(trans_df)
108+
109+
trans_df = calculate_loc_delta_t_minus_1(trans_df)
110+
111+
trans_df = calculate_time_delta_t_minus_1(trans_df)
112+
113+
# Normalize time_delta_t_minus_1 to days and handle missing values
114+
trans_df["time_delta_t_minus_1"] = (trans_df["time_delta_t_minus_1"] - trans_df["datetime"]) / np.timedelta64(1, 'D')
115+
trans_df["time_delta_t_minus_1"] = trans_df["time_delta_t_minus_1"].fillna(0)
116+
117+
# Select relevant columns, drop duplicates, and reset index
118+
trans_df = trans_df[["tid", "datetime", "cc_num", "amount", "country", "fraud_label",
119+
"loc_delta_t_plus_1", "loc_delta_t_minus_1", "time_delta_t_minus_1"]]
120+
trans_df = trans_df.drop_duplicates(subset=['cc_num', 'datetime']).reset_index(drop=True)
121+
122+
return trans_df
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
FROM locustio/locust
2+
3+
COPY ./ /home/locust

benchmarks/online-inference-pipeline/locust/README.md

Whitespace-only changes.
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#!/bin/bash
2+
kubectl create namespace locust
3+
4+
kubectl apply -f master-deployment.yaml -n locust
5+
kubectl apply -f slave-deployment.yaml -n locust
6+
kubectl apply -f service.yaml -n locust
7+
kubectl apply -f nodeport.yaml -n locust

0 commit comments

Comments
 (0)