Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
299 changes: 299 additions & 0 deletions jenkins-scripts/tools/statistics-analysis/node_timing.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,299 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import seaborn as sns\n",
"from datetime import datetime\n",
"sns.set(font_scale=1.2)\n",
"sns.set_style(\"whitegrid\")\n",
"\n",
"def load_data(file_path):\n",
" columns = ['timestamp', 'node_name', 'status', 'labels', 'current_job']\n",
" df = pd.read_csv(file_path, names=columns, header=0)\n",
" df['timestamp'] = pd.to_datetime(df['timestamp'], format='%Y-%m-%d %H:%M:%S')\n",
" df['current_job'] = df['current_job'].replace('None', np.nan)\n",
" # Replace the labels using the logic: if docker and gpu-reliable exists in labels use linux-gpu\n",
" def apply_label_rules(labels):\n",
" if 'docker' in labels and 'gpu-reliable' in labels:\n",
" return 'linux-gpu'\n",
" elif 'docker' in labels and 'gpu-reliable' not in labels:\n",
" return 'linux'\n",
" elif 'win' in labels and 'gpu-reliable' in labels:\n",
" return 'windows-gpu'\n",
" elif 'win' in labels and 'gpu-reliable' not in labels:\n",
" return 'windows' \n",
" elif 'osx' in labels:\n",
" for label in labels.split(', '):\n",
" if label.startswith('osx_'):\n",
" return label\n",
" elif 'linux-arm64' in labels:\n",
" return 'linux-arm64' \n",
" else:\n",
" return labels\n",
"\n",
" df['labels'] = df['labels'].apply(apply_label_rules) \n",
" #Filter rows that have status = 'Not Working'\n",
" df = df[df['status'] != 'Not Working'] \n",
" return df\n",
"\n",
"def calculate_node_stats(df):\n",
" nodes = df['node_name'].unique()\n",
" node_stats = {}\n",
" \n",
" for node in nodes:\n",
" node_df = df[df['node_name'] == node].sort_values('timestamp')\n",
" first_seen = node_df['timestamp'].min()\n",
" last_seen = node_df['timestamp'].max()\n",
" time_alive = (last_seen - first_seen).total_seconds() / 60\n",
" \n",
" busy_records = node_df[node_df['status'] == 'Busy']\n",
" busy_periods = []\n",
" if not busy_records.empty:\n",
" node_df['status_changed'] = node_df['status'] != node_df['status'].shift(1)\n",
" status_changes = node_df[node_df['status_changed']].copy()\n",
" last_records = node_df.groupby('status').last().reset_index()\n",
" status_change_points = pd.concat([status_changes, last_records]).sort_values('timestamp')\n",
" \n",
" current_status = None\n",
" start_time = None\n",
" \n",
" for _, row in status_change_points.iterrows():\n",
" if current_status == 'Busy' and start_time is not None:\n",
" duration = (row['timestamp'] - start_time).total_seconds() / 60\n",
" busy_periods.append({\n",
" 'start': start_time,\n",
" 'end': row['timestamp'],\n",
" 'duration': duration,\n",
" 'job': row['current_job']\n",
" })\n",
" \n",
" current_status = row['status']\n",
" start_time = row['timestamp']\n",
" \n",
" total_busy_time = sum(period['duration'] for period in busy_periods)\n",
" busy_percentage = (total_busy_time / time_alive * 100) if time_alive > 0 else 0\n",
" \n",
" node_stats[node] = {\n",
" 'first_seen': first_seen,\n",
" 'last_seen': last_seen,\n",
" 'time_alive': time_alive,\n",
" 'busy_time': total_busy_time,\n",
" 'idle_time': time_alive - total_busy_time,\n",
" 'busy_percentage': busy_percentage,\n",
" 'busy_periods': busy_periods,\n",
" 'status_history': node_df[['timestamp', 'status', 'current_job']].to_dict('records'),\n",
" 'labels': node_df['labels'].iloc[0]\n",
" }\n",
" \n",
" return node_stats\n",
"\n",
"def generate_node_report(node_stats):\n",
" sorted_nodes = sorted(node_stats.items(), key=lambda x: x[1]['busy_percentage'], reverse=True)\n",
" report_data = []\n",
" for node, stats in sorted_nodes:\n",
" report_data.append({\n",
" 'Node Name': node,\n",
" 'Time Alive (min)': round(stats['time_alive'], 2),\n",
" 'Busy Time (min)': round(stats['busy_time'], 2),\n",
" 'Idle Time (min)': round(stats['idle_time'], 2),\n",
" 'Busy (%)': round(stats['busy_percentage'], 2),\n",
" 'First Seen': stats['first_seen'],\n",
" 'Last Seen': stats['last_seen'],\n",
" 'Number of Jobs': len(stats['busy_periods']),\n",
" 'Labels': stats['labels']\n",
" })\n",
" \n",
" report_df = pd.DataFrame(report_data)\n",
" return report_df\n",
"\n",
"def analyze_jenkins_nodes(df, title='Jenkins Nodes Activity'):\n",
" node_stats = calculate_node_stats(df)\n",
" report = generate_node_report(node_stats)\n",
" display(report.style.set_caption(title).set_table_styles(\n",
" [{'selector': 'thead th', 'props': [('background-color', '#f7f7f9'), ('color', '#333'), ('border', '1px solid #ddd')]},\n",
" {'selector': 'tbody tr:nth-child(even)', 'props': [('background-color', '#f9f9f9')]},\n",
" {'selector': 'tbody tr:nth-child(odd)', 'props': [('background-color', '#fff')]},\n",
" {'selector': 'tbody td', 'props': [('border', '1px solid #ddd'), ('color', '#000')]}]\n",
" ).set_properties(**{'text-align': 'center'})) \n",
" return df, node_stats, report\n",
"\n",
"\n",
"df = load_data(\"agent_data.csv\")\n",
"# Absolute\n",
"# df, node_stats, report = analyze_jenkins_nodes(df)\n",
"# filter data to show only workdays\n",
"df_workdays = df[df['timestamp'].dt.dayofweek < 5]\n",
"df_workdays, node_stats_workdays, report = analyze_jenkins_nodes(df_workdays, title='Jenkins Nodes Activity (Workdays)')\n",
"# filter data to show only weekends\n",
"df_weekends = df[df['timestamp'].dt.dayofweek >= 5]\n",
"df_weekends, node_stats_weekends, report = analyze_jenkins_nodes(df_weekends, title='Jenkins Nodes Activity (Weekends)')\n",
"\n",
"# Generate daily hourly activity report. Group the data by day and hour and calculate the busy/idle times\n",
"def calculate_hourly_stats(df):\n",
" hourly_stats = {}\n",
" for _, row in df.iterrows():\n",
" day = row['timestamp'].date()\n",
" hour = row['timestamp'].hour\n",
" if day not in hourly_stats:\n",
" hourly_stats[day] = {}\n",
" \n",
" if hour not in hourly_stats[day]:\n",
" hourly_stats[day][hour] = {\n",
" 'busy_time': 0,\n",
" 'idle_time': 0,\n",
" 'not_working_time': 0\n",
" }\n",
" \n",
" if row['status'] == 'Busy':\n",
" hourly_stats[day][hour]['busy_time'] += 1\n",
" elif row['status'] == 'Idle':\n",
" hourly_stats[day][hour]['idle_time'] += 1\n",
" elif row['status'] == 'Not Working':\n",
" hourly_stats[day][hour]['not_working_time'] += 1\n",
" else:\n",
" raise ValueError(f\"Unknown status in node: {row['status']}\")\n",
" \n",
" \n",
" return hourly_stats\n",
"\n",
"hourly_stats_workdays = calculate_hourly_stats(df_workdays)\n",
"hourly_stats_weekends = calculate_hourly_stats(df_weekends)\n",
"\n",
"# With the hourly stats calculate the percentage of busy time for each hour of the day\n",
"#f from all days\n",
"def calculate_hourly_percentage(hourly_stats, title='Jenkins Nodes Hourly Activity'):\n",
" hourly_percentage = {}\n",
" for day, stats in hourly_stats.items():\n",
" for hour, values in stats.items():\n",
" if hour not in hourly_percentage:\n",
" hourly_percentage[hour] = {\n",
" 'busy_time': 0,\n",
" 'idle_time': 0\n",
" }\n",
" \n",
" hourly_percentage[hour]['busy_time'] += values['busy_time']\n",
" hourly_percentage[hour]['idle_time'] += values['idle_time']\n",
" \n",
" for hour, values in hourly_percentage.items():\n",
" total_time = values['busy_time'] + values['idle_time']\n",
" values['busy_percentage'] = (values['busy_time'] / total_time * 100) if total_time > 0 else 0\n",
" \n",
" # print the hourly percentage in a nice table\n",
" hourly_percentage_df = pd.DataFrame(hourly_percentage).T\n",
" hourly_percentage_df = hourly_percentage_df.rename_axis('Hour').reset_index()\n",
" hourly_percentage_df = hourly_percentage_df[['Hour', 'busy_time', 'idle_time', 'busy_percentage']]\n",
" display(hourly_percentage_df.style.set_caption(title).set_table_styles(\n",
" [{'selector': 'thead th', 'props': [('background-color', '#f7f7f9'), ('color', '#333'), ('border', '1px solid #ddd')]},\n",
" {'selector': 'tbody tr:nth-child(even)', 'props': [('background-color', '#f9f9f9')]},\n",
" {'selector': 'tbody tr:nth-child(odd)', 'props': [('background-color', '#fff')]},\n",
" {'selector': 'tbody td', 'props': [('border', '1px solid #ddd'), ('color', '#000')]}]\n",
" ).set_properties(**{'text-align': 'center'}))\n",
"\n",
"\n",
" \n",
" return hourly_percentage\n",
"\n",
"\n",
"hourly_percentage_workdays = calculate_hourly_percentage(hourly_stats_workdays, title='Jenkins Nodes Hourly Activity (Workdays)')\n",
"hourly_percentage_weekends = calculate_hourly_percentage(hourly_stats_weekends, title='Jenkins Nodes Hourly Activity (Weekends)')\n",
" \n",
"# Group by labels and recalculate the busy/idle times proportionally the time alive of each node\n",
"def calculate_label_stats(node_stats):\n",
" label_stats = {}\n",
" for node, stats in node_stats.items():\n",
" label = stats['labels']\n",
" if label not in label_stats:\n",
" label_stats[label] = {\n",
" 'time_alive': 0,\n",
" 'busy_time': 0,\n",
" 'idle_time': 0\n",
" }\n",
" \n",
" label_stats[label]['time_alive'] += stats['time_alive']\n",
" label_stats[label]['busy_time'] += stats['busy_time']\n",
" label_stats[label]['idle_time'] += stats['idle_time']\n",
" \n",
" for label, stats in label_stats.items():\n",
" stats['busy_percentage'] = (stats['busy_time'] / stats['time_alive'] * 100) if stats['time_alive'] > 0 else 0\n",
" \n",
" return label_stats\n",
"\n",
"def generate_label_report(node_stats, title='Jenkins Nodes Activity by Labels'):\n",
" label_stats = calculate_label_stats(node_stats)\n",
" label_report = pd.DataFrame(label_stats).T\n",
" label_report = label_report.sort_values('busy_percentage', ascending=False)\n",
" label_report = label_report.rename_axis('Labels').reset_index()\n",
" label_report = label_report[['Labels', 'time_alive', 'busy_time', 'idle_time', 'busy_percentage']]\n",
" display(label_report.style.set_caption(title).set_table_styles(\n",
" [{'selector': 'thead th', 'props': [('background-color', '#f7f7f9'), ('color', '#333'), ('border', '1px solid #ddd')]},\n",
" {'selector': 'tbody tr:nth-child(even)', 'props': [('background-color', '#f9f9f9')]},\n",
" {'selector': 'tbody tr:nth-child(odd)', 'props': [('background-color', '#fff')]},\n",
" {'selector': 'tbody td', 'props': [('border', '1px solid #ddd'), ('color', '#000')]}]\n",
" ).set_properties(**{'text-align': 'center'}))\n",
" return label_report\n",
"\n",
"label_report_workdays = generate_label_report(node_stats_workdays, title='Jenkins Nodes Activity by Labels (workdays)')\n",
"label_report_weekends = generate_label_report(node_stats_weekends, title='Jenkins Nodes Activity by Labels (weekends)')\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# generate statistics for the busy periods\n",
"def generate_busy_periods_report(node_stats, title='Jenkins Nodes Busy Periods'):\n",
" busy_periods_data = []\n",
" for node, stats in node_stats.items():\n",
" for period in stats['busy_periods']:\n",
" busy_periods_data.append({\n",
" 'Node Name': node,\n",
" 'Start Time': period['start'],\n",
" 'End Time': period['end'],\n",
" 'Duration (min)': round(period['duration'], 2),\n",
" 'Job': period['job']\n",
" })\n",
" \n",
" busy_periods_report = pd.DataFrame(busy_periods_data)\n",
" busy_periods_report = busy_periods_report.sort_values('Start Time')\n",
" display(busy_periods_report.style.set_caption(title).set_table_styles(\n",
" [{'selector': 'thead th', 'props': [('background-color', '#f7f7f9'), ('color', '#333'), ('border', '1px solid #ddd')]},\n",
" {'selector': 'tbody tr:nth-child(even)', 'props': [('background-color', '#f9f9f9')]},\n",
" {'selector': 'tbody tr:nth-child(odd)', 'props': [('background-color', '#fff')]},\n",
" {'selector': 'tbody td', 'props': [('border', '1px solid #ddd'), ('color', '#000')]}]\n",
" ).set_properties(**{'text-align': 'center'}))\n",
" return busy_periods_report\n",
"\n",
"generate_busy_periods_report(node_stats_workdays, title='Jenkins Nodes Busy Periods (workdays)')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "default",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading