logicalclocks
diff --git a/‎benchmarks/online-inference-pipeline/1_fraud_online_feature_pipeline.ipynb
Lines changed: 642 additions & 0 deletions b/‎benchmarks/online-inference-pipeline/1_fraud_online_feature_pipeline.ipynb
Lines changed: 642 additions & 0 deletions
diff --git a/‎benchmarks/online-inference-pipeline/2_fraud_online_training_pipeline.ipynb
Lines changed: 1824 additions & 0 deletions b/‎benchmarks/online-inference-pipeline/2_fraud_online_training_pipeline.ipynb
Lines changed: 1824 additions & 0 deletions
diff --git a/‎benchmarks/online-inference-pipeline/3_fraud_online_inference_pipeline.ipynb
Lines changed: 198 additions & 0 deletions b/‎benchmarks/online-inference-pipeline/3_fraud_online_inference_pipeline.ipynb
Lines changed: 198 additions & 0 deletions
diff --git a/‎benchmarks/online-inference-pipeline/README.md
Lines changed: 58 additions & 0 deletions b/‎benchmarks/online-inference-pipeline/README.md
Lines changed: 58 additions & 0 deletions
diff --git a/‎benchmarks/online-inference-pipeline/features/transactions_fraud.py
Lines changed: 122 additions & 0 deletions b/‎benchmarks/online-inference-pipeline/features/transactions_fraud.py
Lines changed: 122 additions & 0 deletions
diff --git a/‎benchmarks/online-inference-pipeline/locust/Dockerfile
Lines changed: 3 additions & 0 deletions b/‎benchmarks/online-inference-pipeline/locust/Dockerfile
Lines changed: 3 additions & 0 deletions
diff --git a/‎benchmarks/online-inference-pipeline/locust/README.md b/‎benchmarks/online-inference-pipeline/locust/README.md
diff --git a/‎benchmarks/online-inference-pipeline/locust/kubernetes-locust/deploy.sh
Lines changed: 7 additions & 0 deletions b/‎benchmarks/online-inference-pipeline/locust/kubernetes-locust/deploy.sh
Lines changed: 7 additions & 0 deletions
@@ -0,0 +1,198 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "f16367c8",
+   "metadata": {},
+   "source": [
+    "## <span style=\"color:#ff5f27;\"> 📡 Connecting to Hopsworks Feature Store </span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "ed952ece",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2025-06-26 12:02:10,824 INFO: Closing external client and cleaning up certificates.\n",
+      "Connection closed.\n",
+      "2025-06-26 12:02:10,835 INFO: Initializing external client\n",
+      "2025-06-26 12:02:10,836 INFO: Base URL: https://10.87.42.15:28181\n",
+      "2025-06-26 12:02:11,542 INFO: Python Engine initialized.\n",
+      "\n",
+      "Logged in to project, explore it here https://10.87.42.15:28181/p/119\n"
+     ]
+    }
+   ],
+   "source": [
+    "import hopsworks\n",
+    "\n",
+    "project = hopsworks.login()\n",
+    "\n",
+    "fs = project.get_feature_store()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e98e32ce",
+   "metadata": {},
+   "source": [
+    "## <span style=\"color:#ff5f27;\"> ⚙️ Feature Group Retrieval</span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "d2a8475b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.02s) \n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "array([4307206161394478, 4991539658091830, 4556426990917111,\n",
+       "       4897277640695450, 4123638178254919, 4215909337633098,\n",
+       "       4883806594247243, 4565376751743421, 4134800299253298,\n",
+       "       4598649623090127, 4454908897243389, 4628483972728572,\n",
+       "       4837617840384848, 4359225696258815, 4758035858626403,\n",
+       "       4689840185625851, 4893428073388709, 4899899195688156,\n",
+       "       4564193664676304, 4834372953306161, 4277322646120192,\n",
+       "       4536307339137659, 4322617096913250, 4382251375646022,\n",
+       "       4167653876012714])"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Retrieve the 'transactions_fraud_online_fg' feature group\n",
+    "trans_fg = fs.get_feature_group(\n",
+    "    'transactions_fraud_online_fg',\n",
+    "    version=1,\n",
+    ")\n",
+    "\n",
+    "# Retrieve the first 5 unique credit card numbers (cc_nums)\n",
+    "cc_nums = trans_fg.select('cc_num').show(25).cc_num.astype(int).values\n",
+    "\n",
+    "# Display the obtained cc_nums\n",
+    "cc_nums"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "903df073",
+   "metadata": {},
+   "source": [
+    "## <span style='color:#ff5f27'>🚀 Fetch Deployment</span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "4303ac82",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Access the Model Serving\n",
+    "ms = project.get_model_serving()\n",
+    "\n",
+    "# Specify the deployment name\n",
+    "deployment_async_rdrs = ms.get_deployment(\"deploymentasyncrdrs\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "045ba7e4",
+   "metadata": {},
+   "source": [
+    "## <span style='color:#ff5f27'>🔮 Predicting using deployment</span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "42196023",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get the first credit card number\n",
+    "inputs = [{\"cc_num\":int(cc_num)} for cc_num in cc_nums]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "id": "596f3241",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "predictions_async_rdrs = deployment_async_rdrs.predict(inputs=inputs)[\"predictions\"]\n",
+    "predictions_async_rdrs"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1b80b358-eb1b-4e7d-bfbc-fab86d25f2bb",
+   "metadata": {},
+   "source": [
+    "### Stop Deployment\n",
+    "To stop the deployment you simply run:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b618ce93-acfe-46b5-a107-1706adaf53a7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Stop the deployment\n",
+    "deployment.stop(await_stopped=180)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
@@ -0,0 +1,58 @@
+# Benchmarking a Deployment on Hopsworks
+
+This repository benchmarks a deployment running inside **Hopsworks** using [Locust](https://locust.io/).
+
+## Benchmarking Steps
+
+1. **Create a Deployment**
+   - Run all the provided notebooks to set up your deployment inside Hopsworks.
+
+2. **Configure Target Host**
+   - Add the **host name** and **IP address** of your deployment in [`locustfile.py`](https://github.com/logicalclocks/hopsworks-tutorials/blob/master/benchmarks/model-deployment/locust/locustfile.py#L12).
+   - You can find this information in the Hopsworks **Deployment UI**.
+
+3. **Add Hopsworks API Key**
+   - Insert your Hopsworks API key into the same [`locustfile.py`](https://github.com/logicalclocks/hopsworks-tutorials/blob/master/benchmarks/model-deployment/locust/locustfile.py#L12).
+   - Generate the API key by following [this guide](https://docs.hopsworks.ai/latest/user_guides/projects/api_key/create_api_key/).
+
+4. **Build the Locust Docker Image**
+   - Use the provided [Dockerfile](https://github.com/logicalclocks/hopsworks-tutorials/blob/master/benchmarks/model-deployment/locust/Dockerfile) to build a Locust image.
+   - Push the image to your preferred container registry.
+
+5. **Update Kubernetes Manifests**
+   - Update the image URL in both:
+     - [`master-deployment.yaml`](https://github.com/logicalclocks/hopsworks-tutorials/blob/master/benchmarks/model-deployment/locust/kubernetes-locust/master-deployment.yaml#L28)
+     - [`slave-deployment.yaml`](https://github.com/logicalclocks/hopsworks-tutorials/blob/master/benchmarks/model-deployment/locust/kubernetes-locust/slave-deployment.yaml#L28)
+
+6. **Deploy Locust**
+   - Run the [deployment script](https://github.com/logicalclocks/hopsworks-tutorials/blob/master/benchmarks/model-deployment/locust/kubernetes-locust/deploy.sh) to deploy Locust master and worker nodes.
+   - This will deploy into a Kubernetes namespace named `locust`.
+   - **Note:** Ensure you have `kubectl` access to the cluster.
+
+7. **Access Locust UI**
+   - Once deployed, port-forward port `8089` from the `locust-master` service to your local machine.
+   - Access the Locust Web UI at [http://localhost:8089](http://localhost:8089) to run and monitor your load tests.
+
+
+## Benchmarks
+
+One benchmark that has been performed targets **5000 RPS** with a **P99 latency below 50 ms**. This performance level can be achieved on Hopsworks 4.1 using the following configuration:
+
+1. **RonDB REST Servers [(RDRS)](https://docs.rondb.com/rondb_rest_api/)**  
+   - Replicas: 2  
+   - CPU Limits: 4  
+   - CPU Requests: 4
+
+2. **Istio Ingress Gateways**  
+   - Replicas: 3  
+   - CPU Limits: 4  
+   - CPU Requests: 4
+
+3. **Predictors**  
+   - Replicas: 48  
+   - CPU Limits: 1  
+   - CPU Requests: 1
+
+The high number of replicas for predictors is necessary to mitigate the effects of Python's [Global Interpreter Lock (GIL)](https://wiki.python.org/moin/GlobalInterpreterLock). This allows for greater parallelism and lower latency, especially at high RPS.
+
+You can view the full benchmark report generated by Locust [here](https://github.com/logicalclocks/hopsworks-tutorials/blob/master/locust_reports/locust_report_5k_rps_25_batch_size.pdf).
@@ -0,0 +1,122 @@
+from math import radians
+import numpy as np
+import pandas as pd
+from typing import Union
+
+def haversine(long: pd.Series, lat: pd.Series, shift: int) -> np.ndarray:
+    """
+    Compute Haversine distance between each consecutive coordinate in (long, lat).
+
+    Parameters:
+    - long: pandas Series, longitude values
+    - lat: pandas Series, latitude values
+    - shift: int, the number of positions to shift for calculating distances
+
+    Returns:
+    - numpy array, Haversine distances
+    """
+    long_shifted = long.shift(shift)
+    lat_shifted = lat.shift(shift)
+    long_diff = long_shifted - long
+    lat_diff = lat_shifted - lat
+
+    a = np.sin(lat_diff/2.0)**2
+    b = np.cos(lat) * np.cos(lat_shifted) * np.sin(long_diff/2.0)**2
+    c = 2*np.arcsin(np.sqrt(a + b))
+
+    return c
+
+
+def time_delta(datetime_value: pd.Series, shift: int) -> pd.Series:
+    """
+    Compute time difference between each consecutive transaction.
+
+    Parameters:
+    - datetime_value: pandas Series, datetime values
+    - shift: int, the number of positions to shift for calculating time differences
+
+    Returns:
+    - pandas Series, time differences
+    """
+    time_shifted = datetime_value.shift(shift)
+    return time_shifted
+
+
+def calculate_loc_delta_t_plus_1(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Calculate loc_delta_t_plus_1 for each group.
+
+    Parameters:
+    - group: pandas DataFrame group, grouped by 'cc_num'
+
+    Returns:
+    - pandas Series, loc_delta_t_plus_1 values
+    """
+    df["loc_delta_t_plus_1"] = df.groupby("cc_num").apply(
+        lambda x: haversine(x["longitude"], x["latitude"], 1)
+        ).reset_index(level=0, drop=True).fillna(0)
+    return df
+
+
+def calculate_loc_delta_t_minus_1(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Calculate loc_delta_t_minus_1 for each group.
+
+    Parameters:
+    - group: pandas DataFrame group, grouped by 'cc_num'
+
+    Returns:
+    - pandas Series, loc_delta_t_minus_1 values
+    """
+    df["loc_delta_t_minus_1"] = df.groupby("cc_num").apply(
+        lambda x: haversine(x["longitude"], x["latitude"], -1)
+        ).reset_index(level=0, drop=True).fillna(0)
+    return df
+
+
+def calculate_time_delta_t_minus_1(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Calculate time_delta_t_minus_1 for each group.
+
+    Parameters:
+    - group: pandas DataFrame group, grouped by 'cc_num'
+
+    Returns:
+    - pandas Series, time_delta_t_minus_1 values
+    """
+    df["time_delta_t_minus_1"] = df.groupby("cc_num").apply(lambda x: time_delta(x["datetime"], -1))\
+        .reset_index(level=0, drop=True)
+    return df
+
+
+def prepare_transactions_fraud(trans_df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Prepare transaction data with engineered features for fraud detection.
+
+    Parameters:
+    - trans_df: pandas DataFrame, transaction data
+
+    Returns:
+    - pandas DataFrame, prepared transaction data with engineered features
+    """
+    # Sort values and convert latitude and longitude to radians
+    trans_df.sort_values("datetime", inplace=True)
+    trans_df[["longitude", "latitude"]] = trans_df[["longitude", "latitude"]].applymap(radians)
+
+    # Calculate loc_delta_t_plus_1, loc_delta_t_minus_1, and time_delta_t_minus_1 using groupby
+    trans_df = calculate_loc_delta_t_plus_1(trans_df)
+
+    trans_df = calculate_loc_delta_t_minus_1(trans_df)
+
+    trans_df = calculate_time_delta_t_minus_1(trans_df)
+
+    # Normalize time_delta_t_minus_1 to days and handle missing values
+    trans_df["time_delta_t_minus_1"] = (trans_df["time_delta_t_minus_1"] - trans_df["datetime"]) / np.timedelta64(1, 'D')
+    trans_df["time_delta_t_minus_1"] = trans_df["time_delta_t_minus_1"].fillna(0)
+
+    # Select relevant columns, drop duplicates, and reset index
+    trans_df = trans_df[["tid", "datetime", "cc_num", "amount", "country", "fraud_label",
+                         "loc_delta_t_plus_1", "loc_delta_t_minus_1", "time_delta_t_minus_1"]]
+    trans_df = trans_df.drop_duplicates(subset=['cc_num', 'datetime']).reset_index(drop=True)
+
+    return trans_df
@@ -0,0 +1,3 @@
+FROM locustio/locust
+
+COPY ./ /home/locust
@@ -0,0 +1,7 @@
+#!/bin/bash
+kubectl create namespace locust
+
+kubectl apply -f master-deployment.yaml -n locust
+kubectl apply -f slave-deployment.yaml -n locust
+kubectl apply -f service.yaml -n locust
+kubectl apply -f nodeport.yaml -n locust
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+FROM locustio/locust`
	`2`	`+`
	`3`	`+COPY ./ /home/locust`