Skip to content

Add User Group Diagnostics dashboard and use unescaped usernames #148

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 24 additions & 3 deletions dashboards/common.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,33 @@ local var = grafonnet.dashboard.variable;
+ var.query.selectionOptions.withIncludeAll(value=true, customAllValue='.*')
+ var.query.queryTypes.withLabelValues('namespace', 'kube_service_labels{service="hub"}')
,
user_pod:
var.query.new('user_pod')
hub_name:
var.query.new('hub_name')
+ var.query.withDatasourceFromVariable(self.prometheus)
+ var.query.selectionOptions.withMulti()
+ var.query.selectionOptions.withIncludeAll(value=true, customAllValue='.*')
+ var.query.queryTypes.withLabelValues('pod', 'kube_pod_labels{label_app="jupyterhub", label_component="singleuser-server", namespace=~"$hub"}')
+ var.query.queryTypes.withLabelValues('namespace', 'kube_service_labels{service="hub"}')
,
namespace:
var.query.new('namespace')
+ var.query.withDatasourceFromVariable(self.prometheus)
+ var.query.selectionOptions.withMulti()
+ var.query.selectionOptions.withIncludeAll(value=true, customAllValue='.*')
+ var.query.queryTypes.withLabelValues('namespace', 'kube_pod_labels')
,
user_group:
var.query.new('user_group')
+ var.query.withDatasourceFromVariable(self.prometheus)
+ var.query.selectionOptions.withMulti()
+ var.query.selectionOptions.withIncludeAll(value=true, customAllValue='.*')
+ var.query.queryTypes.withLabelValues('usergroup', 'jupyterhub_user_group_info')
,
user_name:
var.query.new('user_name')
+ var.query.withDatasourceFromVariable(self.prometheus)
+ var.query.selectionOptions.withMulti()
+ var.query.selectionOptions.withIncludeAll(value=true, customAllValue='.*')
+ var.query.queryTypes.withLabelValues('annotation_hub_jupyter_org_username', 'kube_pod_annotations{ namespace=~"$hub_name"}')
,
// Queries should use the 'instance' label when querying metrics that
// come from collectors present on each node - such as node_exporter or
Expand Down
196 changes: 196 additions & 0 deletions dashboards/group.jsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
#!/usr/bin/env -S jsonnet -J ../vendor
local grafonnet = import 'github.com/grafana/grafonnet/gen/grafonnet-v11.1.0/main.libsonnet';
local dashboard = grafonnet.dashboard;
local ts = grafonnet.panel.timeSeries;
local prometheus = grafonnet.query.prometheus;

local common = import './common.libsonnet';

local memoryUsage =
common.tsOptions
+ ts.new('Memory Usage')
+ ts.panelOptions.withDescription(
|||
Per group memory usage

Requires https://github.com/2i2c-org/jupyterhub-groups-exporter to
be set up.
|||
)
+ ts.standardOptions.withUnit('bytes')
+ ts.queryOptions.withTargets([
prometheus.new(
'$PROMETHEUS_DS',
|||
sum(
container_memory_working_set_bytes{name!="", pod=~"jupyter-.*", namespace=~"$hub_name"}
* on (namespace, pod) group_left(annotation_hub_jupyter_org_username, usergroup)
group(
kube_pod_annotations{namespace=~"$hub_name", annotation_hub_jupyter_org_username=~".*", pod=~"jupyter-.*"}
) by (pod, namespace, annotation_hub_jupyter_org_username)
* on (namespace, annotation_hub_jupyter_org_username) group_left(usergroup)
group(
label_replace(jupyterhub_user_group_info{namespace=~"$hub_name", username=~".*", usergroup=~"$user_group"},
"annotation_hub_jupyter_org_username", "$1", "username", "(.+)")
) by (annotation_hub_jupyter_org_username, usergroup, namespace)
) by (usergroup, namespace)
|||
)
+ prometheus.withLegendFormat('{{ usergroup }} - ({{ namespace }})'),
]);


local cpuUsage =
common.tsOptions
+ ts.new('CPU Usage')
+ ts.panelOptions.withDescription(
|||
Per group CPU usage

Requires https://github.com/2i2c-org/jupyterhub-groups-exporter to
be set up.
|||
)
+ ts.standardOptions.withUnit('percentunit')
+ ts.queryOptions.withTargets([
prometheus.new(
'$PROMETHEUS_DS',
|||
sum(
# exclude name="" because the same container can be reported
# with both no name and `name=k8s_...`,
# in which case sum() by (pod) reports double the actual metric
irate(container_cpu_usage_seconds_total{name!="", pod=~"jupyter-.*"}[5m])
* on (namespace, pod) group_left(annotation_hub_jupyter_org_username)
group(
kube_pod_annotations{namespace=~"$hub_name", annotation_hub_jupyter_org_username=~".*"}
) by (pod, namespace, annotation_hub_jupyter_org_username)
* on (namespace, annotation_hub_jupyter_org_username) group_left(usergroup)
group(
label_replace(jupyterhub_user_group_info{namespace=~"$hub_name", username=~".*", usergroup=~"$user_group"},
"annotation_hub_jupyter_org_username", "$1", "username", "(.+)")
) by (annotation_hub_jupyter_org_username, usergroup, namespace)
) by (usergroup, namespace)
|||
)
+ prometheus.withLegendFormat('{{ usergroup }} - ({{ namespace }})'),
]);

local homedirSharedUsage =
common.tsOptions
+ ts.new('Home Directory Usage (on shared home directories)')
+ ts.panelOptions.withDescription(
|||
Per group home directory size, when using a shared home directory.

Requires https://github.com/yuvipanda/prometheus-dirsize-exporter and https://github.com/2i2c-org/jupyterhub-groups-exporter to
be set up.
|||
)
+ ts.standardOptions.withUnit('bytes')
+ ts.queryOptions.withTargets([
prometheus.new(
'$PROMETHEUS_DS',
|||
sum(
max(
dirsize_total_size_bytes{namespace=~"$hub_name"}
) by (namespace, directory)
* on (namespace, directory) group_left(usergroup)
group(
label_replace(
jupyterhub_user_group_info{namespace=~"$hub_name", username_escaped=~".*", usergroup=~"$user_group"},
"directory", "$1", "username_escaped", "(.+)")
) by (directory, namespace, usergroup)
) by (namespace, usergroup)
|||
)
+ prometheus.withLegendFormat('{{ usergroup }} - ({{ namespace }})'),
]);

local memoryRequests =
common.tsOptions
+ ts.new('Memory Requests')
+ ts.panelOptions.withDescription(
|||
Per group memory requests

Requires https://github.com/2i2c-org/jupyterhub-groups-exporter to
be set up.
|||
)
+ ts.standardOptions.withUnit('bytes')
+ ts.queryOptions.withTargets([
prometheus.new(
'$PROMETHEUS_DS',
|||
sum(
kube_pod_container_resource_requests{resource="memory", namespace=~"$hub_name", pod=~"jupyter-.*"} * on (namespace, pod)
group_left(annotation_hub_jupyter_org_username) group(
kube_pod_annotations{namespace=~"$hub_name", annotation_hub_jupyter_org_username=~".*"}
) by (pod, namespace, annotation_hub_jupyter_org_username)
* on (namespace, annotation_hub_jupyter_org_username) group_left(usergroup)
group(
label_replace(jupyterhub_user_group_info{namespace=~"$hub_name", username=~".*", usergroup=~"$user_group"},
"annotation_hub_jupyter_org_username", "$1", "username", "(.+)")
) by (annotation_hub_jupyter_org_username, usergroup, namespace)
) by (usergroup, namespace)
|||
)
+ prometheus.withLegendFormat('{{ usergroup }} - ({{ namespace }})'),
]);

local cpuRequests =
common.tsOptions
+ ts.new('CPU Requests')
+ ts.panelOptions.withDescription(
|||
Per group CPU requests

Requires https://github.com/2i2c-org/jupyterhub-groups-exporter to
be set up.
|||
)
+ ts.standardOptions.withUnit('percentunit')
+ ts.queryOptions.withTargets([
prometheus.new(
'$PROMETHEUS_DS',
|||
sum(
kube_pod_container_resource_requests{resource="cpu", namespace=~"$hub_name", pod=~"jupyter-.*"} * on (namespace, pod)
group_left(annotation_hub_jupyter_org_username) group(
kube_pod_annotations{namespace=~"$hub_name", annotation_hub_jupyter_org_username=~".*"}
) by (pod, namespace, annotation_hub_jupyter_org_username)
* on (namespace, annotation_hub_jupyter_org_username) group_left(usergroup)
group(
label_replace(jupyterhub_user_group_info{namespace=~"$hub_name", username=~".*", usergroup=~"$user_group"},
"annotation_hub_jupyter_org_username", "$1", "username", "(.+)")
) by (annotation_hub_jupyter_org_username, usergroup, namespace)
) by (usergroup, namespace)
|||
)
+ prometheus.withLegendFormat('{{ usergroup }} - ({{ namespace }})'),
]);

dashboard.new('User Group Diagnostics Dashboard')
+ dashboard.withTags(['jupyterhub'])
+ dashboard.withUid('group-diagnostics-dashboard')
+ dashboard.withEditable(true)
+ dashboard.withVariables([
common.variables.prometheus,
common.variables.hub_name,
common.variables.user_group,
])
+ dashboard.withPanels(
grafonnet.util.grid.makeGrid(
[
memoryUsage,
cpuUsage,
homedirSharedUsage,
memoryRequests,
cpuRequests,
],
panelWidth=24,
panelHeight=12,
)
)
74 changes: 41 additions & 33 deletions dashboards/user.jsonnet
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ local memoryUsage =
+ ts.new('Memory Usage')
+ ts.panelOptions.withDescription(
|||
Per-user per-server memory usage
Per user memory usage
|||
)
+ ts.standardOptions.withUnit('bytes')
Expand All @@ -20,18 +20,15 @@ local memoryUsage =
'$PROMETHEUS_DS',
|||
sum(
# exclude name="" because the same container can be reported
# with both no name and `name=k8s_...`,
# in which case sum() by (pod) reports double the actual metric
container_memory_working_set_bytes{name!="", instance=~"$instance"}
* on (namespace, pod) group_left(container)
group(
kube_pod_labels{label_app="jupyterhub", label_component="singleuser-server", namespace=~"$hub", pod=~"$user_pod"}
) by (pod, namespace)
) by (pod, namespace)
container_memory_working_set_bytes{name!="", pod=~"jupyter-.*", namespace=~"$hub_name"}
* on (namespace, pod) group_left(annotation_hub_jupyter_org_username)
group(
kube_pod_annotations{namespace=~"$hub_name", annotation_hub_jupyter_org_username=~"$user_name", pod=~"jupyter-.*"}
) by (pod, namespace, annotation_hub_jupyter_org_username)
) by (annotation_hub_jupyter_org_username, namespace)
|||
)
+ prometheus.withLegendFormat('{{ pod }} - ({{ namespace }})'),
+ prometheus.withLegendFormat('{{ annotation_hub_jupyter_org_username }} - ({{ namespace }})'),
]);


Expand All @@ -40,7 +37,7 @@ local cpuUsage =
+ ts.new('CPU Usage')
+ ts.panelOptions.withDescription(
|||
Per-user per-server CPU usage
Per user CPU usage
|||
)
+ ts.standardOptions.withUnit('percentunit')
Expand All @@ -52,15 +49,15 @@ local cpuUsage =
# exclude name="" because the same container can be reported
# with both no name and `name=k8s_...`,
# in which case sum() by (pod) reports double the actual metric
irate(container_cpu_usage_seconds_total{name!="", instance=~"$instance"}[5m])
* on (namespace, pod) group_left(container)
irate(container_cpu_usage_seconds_total{name!="", pod=~"jupyter-.*"}[5m])
* on (namespace, pod) group_left(annotation_hub_jupyter_org_username)
group(
kube_pod_labels{label_app="jupyterhub", label_component="singleuser-server", namespace=~"$hub", pod=~"$user_pod"}
) by (pod, namespace)
) by (pod, namespace)
kube_pod_annotations{namespace=~"$hub_name", annotation_hub_jupyter_org_username=~"$user_name"}
) by (pod, namespace, annotation_hub_jupyter_org_username)
) by (annotation_hub_jupyter_org_username, namespace)
|||
)
+ prometheus.withLegendFormat('{{ pod }} - ({{ namespace }})'),
+ prometheus.withLegendFormat('{{ annotation_hub_jupyter_org_username }} - ({{ namespace }})'),
]);

local homedirSharedUsage =
Expand All @@ -87,19 +84,25 @@ local homedirSharedUsage =
'$PROMETHEUS_DS',
|||
max(
dirsize_total_size_bytes{namespace="$hub"}
) by (directory, namespace)
dirsize_total_size_bytes{namespace=~"$hub_name"}
* on (namespace, directory) group_left(username)
group(
label_replace(
jupyterhub_user_group_info{namespace=~"$hub_name", username_escaped=~".*"},
"directory", "$1", "username_escaped", "(.+)")
) by (directory, namespace, username)
) by (namespace, username)
|||
)
+ prometheus.withLegendFormat('{{ directory }} - ({{ namespace }})'),
+ prometheus.withLegendFormat('{{ username }} - ({{ namespace }})'),
]);

local memoryRequests =
common.tsOptions
+ ts.new('Memory Requests')
+ ts.panelOptions.withDescription(
|||
Per-user per-server memory Requests
Per-user memory requests
|||
)
+ ts.standardOptions.withUnit('bytes')
Expand All @@ -108,19 +111,22 @@ local memoryRequests =
'$PROMETHEUS_DS',
|||
sum(
kube_pod_container_resource_requests{resource="memory", namespace=~"$hub", node=~"$instance"}
) by (pod, namespace)
kube_pod_container_resource_requests{resource="memory", namespace=~"$hub_name", pod=~"jupyter-.*"} * on (namespace, pod)
group_left(annotation_hub_jupyter_org_username) group(
kube_pod_annotations{namespace=~"$hub_name", annotation_hub_jupyter_org_username=~"$user_name"}
) by (pod, namespace, annotation_hub_jupyter_org_username)
) by (annotation_hub_jupyter_org_username, namespace)
|||
)
+ prometheus.withLegendFormat('{{ pod }} - ({{ namespace }})'),
+ prometheus.withLegendFormat('{{ annotation_hub_jupyter_org_username }} - ({{ namespace }})'),
]);

local cpuRequests =
common.tsOptions
+ ts.new('CPU Requests')
+ ts.panelOptions.withDescription(
|||
Per-user per-server CPU Requests
Per user CPU requests
|||
)
+ ts.standardOptions.withUnit('percentunit')
Expand All @@ -129,22 +135,24 @@ local cpuRequests =
'$PROMETHEUS_DS',
|||
sum(
kube_pod_container_resource_requests{resource="cpu", namespace=~"$hub", node=~"$instance"}
) by (pod, namespace)
kube_pod_container_resource_requests{resource="cpu", namespace=~"$hub_name", pod=~"jupyter-.*"} * on (namespace, pod)
group_left(annotation_hub_jupyter_org_username) group(
kube_pod_annotations{namespace=~"$hub_name", annotation_hub_jupyter_org_username=~"$user_name"}
) by (pod, namespace, annotation_hub_jupyter_org_username)
) by (annotation_hub_jupyter_org_username, namespace)
|||
)
+ prometheus.withLegendFormat('{{ pod }} - ({{ namespace }})'),
+ prometheus.withLegendFormat('{{ annotation_hub_jupyter_org_username }} - ({{ namespace }})'),
]);

dashboard.new('User Diagnostics Dashboard')
+ dashboard.withTags(['jupyterhub'])
+ dashboard.withUid('user-pod-diagnostics-dashboard')
+ dashboard.withUid('user-diagnostics-dashboard')
+ dashboard.withEditable(true)
+ dashboard.withVariables([
common.variables.prometheus,
common.variables.hub,
common.variables.user_pod,
common.variables.instance,
common.variables.hub_name,
common.variables.user_name,
])
+ dashboard.withPanels(
grafonnet.util.grid.makeGrid(
Expand Down