Skip to content

Commit c9b2208

Browse files
authored
feat: Jupyterhub oauth support (#581)
Signed-off-by: omrishiv <327609+omrishiv@users.noreply.github.com>
1 parent be5e370 commit c9b2208

File tree

6 files changed

+347
-10
lines changed

6 files changed

+347
-10
lines changed

ai-ml/jupyterhub/addons.tf

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Use this data source to get the ARN of a certificate in AWS Certificate Manager (ACM)
22
data "aws_acm_certificate" "issued" {
3-
count = var.jupyter_hub_auth_mechanism == "cognito" ? 1 : 0
3+
count = var.jupyter_hub_auth_mechanism != "dummy" ? 1 : 0
44
domain = var.acm_certificate_domain
55
statuses = ["ISSUED"]
66
}
@@ -267,11 +267,12 @@ module "eks_data_addons" {
267267
values = [templatefile("${path.module}/helm/jupyterhub/jupyterhub-values-${var.jupyter_hub_auth_mechanism}.yaml", {
268268
ssl_cert_arn = try(data.aws_acm_certificate.issued[0].arn, "")
269269
jupyterdomain = try("https://${var.jupyterhub_domain}/hub/oauth_callback", "")
270-
authorize_url = try("https://${local.cognito_custom_domain}.auth.${local.region}.amazoncognito.com/oauth2/authorize", "")
271-
token_url = try("https://${local.cognito_custom_domain}.auth.${local.region}.amazoncognito.com/oauth2/token", "")
272-
userdata_url = try("https://${local.cognito_custom_domain}.auth.${local.region}.amazoncognito.com/oauth2/userInfo", "")
273-
client_id = try(aws_cognito_user_pool_client.user_pool_client[0].id, "")
274-
client_secret = try(aws_cognito_user_pool_client.user_pool_client[0].client_secret, "")
270+
authorize_url = var.oauth_domain != "" ? "${var.oauth_domain}/auth" : try("https://${local.cognito_custom_domain}.auth.${local.region}.amazoncognito.com/oauth2/authorize", "")
271+
token_url = var.oauth_domain != "" ? "${var.oauth_domain}/token" : try("https://${local.cognito_custom_domain}.auth.${local.region}.amazoncognito.com/oauth2/token", "")
272+
userdata_url = var.oauth_domain != "" ? "${var.oauth_domain}/userinfo" : try("https://${local.cognito_custom_domain}.auth.${local.region}.amazoncognito.com/oauth2/userInfo", "")
273+
username_key = try(var.oauth_username_key, "")
274+
client_id = var.oauth_jupyter_client_id != "" ? var.oauth_jupyter_client_id : try(aws_cognito_user_pool_client.user_pool_client[0].id, "")
275+
client_secret = var.oauth_jupyter_client_secret != "" ? var.oauth_jupyter_client_secret : try(aws_cognito_user_pool_client.user_pool_client[0].client_secret, "")
275276
user_pool_id = try(aws_cognito_user_pool.pool[0].id, "")
276277
identity_pool_id = try(aws_cognito_identity_pool.identity_pool[0].id, "")
277278
jupyter_single_user_sa_name = kubernetes_service_account_v1.jupyterhub_single_user_sa.metadata[0].name
Lines changed: 273 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,273 @@
1+
hub:
2+
db:
3+
pvc:
4+
storage: 50Gi
5+
storageClassName: gp3
6+
authenticatePrometheus: false
7+
config:
8+
GenericOAuthenticator:
9+
oauth_callback_url: ${jupyterdomain}
10+
client_id: ${client_id}
11+
client_secret: ${client_secret}
12+
authorize_url: ${authorize_url}
13+
token_url: ${token_url}
14+
userdata_url: ${userdata_url}
15+
scope:
16+
- openid
17+
- profile
18+
username_key: "${username_key}"
19+
login_service: "oauth"
20+
allow_all: true # Allows all oauth authenticated users to use Jupyterhub. For finer grained control, you can use `allowed_users`: https://jupyterhub.readthedocs.io/en/stable/tutorial/getting-started/authenticators-users-basics.html#deciding-who-is-allowed
21+
JupyterHub:
22+
authenticator_class: generic-oauth
23+
proxy:
24+
https:
25+
enabled: true
26+
type: offload
27+
service:
28+
annotations:
29+
service.beta.kubernetes.io/aws-load-balancer-ssl-cert: ${ssl_cert_arn}
30+
service.beta.kubernetes.io/aws-load-balancer-ssl-ports: "https"
31+
service.beta.kubernetes.io/aws-load-balancer-backend-protocol: "tcp"
32+
service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "3600"
33+
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
34+
service.beta.kubernetes.io/aws-load-balancer-scheme: internal
35+
service.beta.kubernetes.io/aws-load-balancer-type: external
36+
service.beta.kubernetes.io/aws-load-balancer-cross-zone-load-balancing-enabled: 'true'
37+
service.beta.kubernetes.io/aws-load-balancer-ip-address-type: ipv4
38+
39+
singleuser:
40+
startTimeout: 1200 # 20 mins to spin up a notebook server for GPU including the image pull
41+
profileList:
42+
- display_name: Elyra (CPU)
43+
description: "Elyra Notebooks | Karpenter Autoscaling"
44+
kubespawner_override:
45+
image: public.ecr.aws/data-on-eks/elyra-jupyter:3.15.0
46+
node_selector:
47+
NodePool: default
48+
cpu_guarantee: 2
49+
mem_guarantee: 8G
50+
cpu_limit: 4
51+
mem_limit: 8G
52+
cmd: null
53+
- display_name: Data Engineering (CPU)
54+
description: "PySpark Notebooks | Karpenter AutoScaling"
55+
profile_options:
56+
image:
57+
display_name: "Image"
58+
choices:
59+
pyspark350:
60+
display_name: "PySpark 3.5.0 + Python 3.11"
61+
default: true
62+
kubespawner_override:
63+
image: jupyter/pyspark-notebook:spark-3.5.0
64+
pyspark341:
65+
display_name: "PySpark 3.4.1 + Python 3.11"
66+
kubespawner_override:
67+
image: jupyter/pyspark-notebook:spark-3.4.1
68+
kubespawner_override:
69+
node_selector:
70+
NodePool: default
71+
cpu_guarantee: 2
72+
mem_guarantee: 8G
73+
cpu_limit: 4
74+
mem_limit: 8G
75+
cmd: null
76+
# NOTE:
77+
- display_name: Trainium (trn1)
78+
description: "Trainium | Karpenter AutoScaling"
79+
profile_options:
80+
image:
81+
display_name: "Image"
82+
choices:
83+
pytorch1131:
84+
display_name: "PyTorch 1.13.1 + torch-neuronx"
85+
default: true
86+
kubespawner_override:
87+
image: public.ecr.aws/data-on-eks/pytorch-neuronx:latest
88+
tflow2101:
89+
display_name: "Tensorflow 2.10.1 + tensorflow-neuronx"
90+
kubespawner_override:
91+
image: public.ecr.aws/data-on-eks/tensorflow-neuronx:latest
92+
kubespawner_override:
93+
node_selector:
94+
NodePool: trainium
95+
hub.jupyter.org/node-purpose: user
96+
tolerations:
97+
- key: aws.amazon.com/neuroncore
98+
operator: Exists
99+
effect: NoSchedule
100+
- key: aws.amazon.com/neuron
101+
operator: Exists
102+
effect: NoSchedule
103+
- key: "hub.jupyter.org/dedicated" # According to optimization docs https://z2jh.jupyter.org/en/latest/administrator/optimization.html
104+
operator: "Equal"
105+
value: "user"
106+
effect: "NoSchedule"
107+
cpu_guarantee: 2
108+
mem_guarantee: 10G
109+
cpu_limit: 2
110+
mem_limit: 10G
111+
extra_resource_limits:
112+
aws.amazon.com/neuron: "1"
113+
cmd: "start-singleuser.sh"
114+
- display_name: Inferentia (inf2)
115+
description: "Inferentia | Karpenter AutoScaling"
116+
profile_options:
117+
image:
118+
display_name: "Image"
119+
choices:
120+
pytorch1131:
121+
display_name: "PyTorch + torch-neuronx"
122+
default: true
123+
kubespawner_override:
124+
image: public.ecr.aws/data-on-eks/pytorch-neuronx:latest
125+
tflow2101:
126+
display_name: "Tensorflow + tensorflow-neuronx"
127+
kubespawner_override:
128+
image: public.ecr.aws/data-on-eks/tensorflow-neuronx:latest
129+
kubespawner_override:
130+
node_selector:
131+
NodePool: inferentia
132+
hub.jupyter.org/node-purpose: user
133+
tolerations:
134+
- key: aws.amazon.com/neuroncore
135+
operator: Exists
136+
effect: NoSchedule
137+
- key: aws.amazon.com/neuron
138+
operator: Exists
139+
effect: NoSchedule
140+
- key: "hub.jupyter.org/dedicated" # According to optimization docs https://z2jh.jupyter.org/en/latest/administrator/optimization.html
141+
operator: "Equal"
142+
value: "user"
143+
effect: "NoSchedule"
144+
cpu_guarantee: 20
145+
mem_guarantee: 100G
146+
cpu_limit: 20
147+
mem_limit: 100G
148+
extra_resource_limits:
149+
aws.amazon.com/neuron: "1"
150+
cmd: null
151+
- display_name: Data Science (GPU + Time-Slicing - G5)
152+
default: true
153+
description: "GPU Time-Slicing with Single GPU VMs (G5 2x, 4x, 8x, 16x) | nvidia.com/gpu: 1 | Karpenter AutoScaling"
154+
kubespawner_override:
155+
# namespace: data-team-a
156+
image: cschranz/gpu-jupyter:v1.6_cuda-11.8_ubuntu-22.04_python-only
157+
node_selector:
158+
NodePool: gpu-ts # TIME-SLICING: Use this config with time-slicing mode
159+
hub.jupyter.org/node-purpose: user
160+
tolerations:
161+
- key: "nvidia.com/gpu"
162+
operator: "Exists"
163+
effect: "NoSchedule"
164+
- key: "hub.jupyter.org/dedicated" # According to optimization docs https://z2jh.jupyter.org/en/latest/administrator/optimization.html
165+
operator: "Equal"
166+
value: "user"
167+
effect: "NoSchedule"
168+
extra_resource_limits:
169+
nvidia.com/gpu: "1" # TIME-SLICING: Use a slice of GPU using time-slicing mode
170+
cpu_limit: 2
171+
mem_limit: 4G
172+
cpu_guarantee: 2
173+
mem_guarantee: 4G
174+
cmd: "start-singleuser.sh"
175+
# Karpenter doesn't support for requesting resources with MIG slices e.g., nvidia.com/mig-1g.5gb: 1, or nvidia.com/mig-2g.20gb: 1 etc.
176+
# Hence, this profile relies on Managed node groups with GPU MIG enabled
177+
- display_name: Data Science (GPU + MIG on P4d.24xlarge)
178+
description: "GPU MIG with P4d instances | nvidia.com/mig-1g.5gb: 1 | Cluster Autoscaler"
179+
kubespawner_override:
180+
image: cschranz/gpu-jupyter:v1.6_cuda-11.8_ubuntu-22.04_python-only
181+
node_selector:
182+
provisioner: cluster-autoscaler
183+
node.kubernetes.io/instance-type: p4d.24xlarge
184+
hub.jupyter.org/node-purpose: user
185+
tolerations:
186+
- key: "nvidia.com/gpu"
187+
operator: "Exists"
188+
effect: "NoSchedule"
189+
- key: "hub.jupyter.org/dedicated" # According to optimization docs https://z2jh.jupyter.org/en/latest/administrator/optimization.html
190+
operator: "Equal"
191+
value: "user"
192+
effect: "NoSchedule"
193+
extra_resource_guarantees:
194+
nvidia.com/mig-1g.5gb: 1 # or nvidia.com/mig-2g.10gb OR nvidia.com/mig-3g.20gb
195+
# extra_resource_limits:
196+
# nvidia.com/gpu: "8" # TIME-SLICING: Use a slice of GPU using time-slicing mode
197+
cpu_guarantee: 2
198+
mem_guarantee: 10G
199+
cpu_limit: 2
200+
mem_limit: 10G
201+
cmd: "start-singleuser.sh"
202+
- display_name: Data Science (GPU - P4d.24xlarge)
203+
description: "GPU with P4d instances | Karpenter Autoscaler"
204+
kubespawner_override:
205+
image: cschranz/gpu-jupyter:v1.6_cuda-11.8_ubuntu-22.04_python-only
206+
node_selector:
207+
NodePool: gpu-mig
208+
hub.jupyter.org/node-purpose: user
209+
tolerations:
210+
- key: "nvidia.com/gpu"
211+
operator: "Exists"
212+
effect: "NoSchedule"
213+
- key: "hub.jupyter.org/dedicated" # According to optimization docs https://z2jh.jupyter.org/en/latest/administrator/optimization.html
214+
operator: "Equal"
215+
value: "user"
216+
effect: "NoSchedule"
217+
extra_resource_limits:
218+
nvidia.com/gpu: "8"
219+
cpu_guarantee: 2
220+
mem_guarantee: 10G
221+
cpu_limit: 2
222+
mem_limit: 10G
223+
cmd: "start-singleuser.sh"
224+
storage:
225+
type: "static"
226+
static:
227+
pvcName: "efs-persist"
228+
subPath: "home/{username}"
229+
extraVolumes:
230+
- name: jupyterhub-shared
231+
persistentVolumeClaim:
232+
claimName: efs-persist-shared
233+
extraVolumeMounts:
234+
- name: jupyterhub-shared
235+
mountPath: /home/shared
236+
readOnly: false
237+
serviceAccountName: ${jupyter_single_user_sa_name}
238+
allowPrivilegeEscalation: true
239+
extraPodConfig: # This is needed for Jovyan user running in every single pod, access the Service Account
240+
securityContext:
241+
fsGroup: 100
242+
extraEnv: # Sudo needed to configure the proper permissions to start the notebook instance
243+
GRANT_SUDO: "yes"
244+
NOTEBOOK_ARGS: "--allow-root"
245+
CHOWN_HOME: "yes"
246+
CHOWN_HOME_OPTS: "-R"
247+
CHOWN_EXTRA: "/home/shared"
248+
uid: 0
249+
fsGid: 0
250+
cmd: null
251+
252+
# Optimizations configured according to this doc https://z2jh.jupyter.org/en/latest/administrator/optimization.html
253+
scheduling:
254+
userScheduler:
255+
enabled: true
256+
podPriority:
257+
enabled: true
258+
userPlaceholder:
259+
enabled: false
260+
replicas: 1
261+
userPods:
262+
nodeAffinity:
263+
matchNodePurpose: require # This will force single-user pods to use an specific karpenter provisioner
264+
265+
prePuller:
266+
hook:
267+
enabled: false
268+
continuous:
269+
# NOTE: if used with Karpenter, also add user-placeholders
270+
enabled: false
271+
272+
global:
273+
safeToShowValues: false

ai-ml/jupyterhub/variables.tf

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ variable "secondary_cidr_blocks" {
3636
# Example of public domain name(<subdomain-name>.<domain-name>.com): eks.jupyter-doeks.dynamic-dns.com
3737
variable "jupyter_hub_auth_mechanism" {
3838
type = string
39-
description = "Allowed values: cognito, dummy"
39+
description = "Allowed values: cognito, dummy, oauth"
4040
default = "dummy"
4141
}
4242

@@ -53,8 +53,39 @@ variable "acm_certificate_domain" {
5353
description = "Enter domain name with wildcard and ensure ACM certificate is created for this domain name, e.g. *.example.com"
5454
default = ""
5555
}
56+
57+
# Only needed if auth mechanism is set to cognito or oauth. This is the domain for jupyterhub
5658
variable "jupyterhub_domain" {
5759
type = string
58-
description = "Enter sub-domain name for jupyterhub to be hosted, e.g. eks.example.com. Only needed if auth mechanism is set to cognito"
60+
description = "Enter domain name for jupyterhub to be hosted, e.g. eks.example.com. Only needed if auth mechanism is set to cognito or oauth"
61+
default = ""
62+
}
63+
64+
# Only needed if auth mechanism is set to oauth. This is the root path for the oidc endpoints
65+
variable "oauth_domain" {
66+
type = string
67+
description = "Enter oauth domain and endpoint, e.g. https://keycloak.example.com/realms/master/protocol/openid-connect. Only needed if auth mechanism is set to oauth"
68+
default = ""
69+
}
70+
71+
# Only needed if auth mechanism is set to oauth. This is the id of the client
72+
variable "oauth_jupyter_client_id" {
73+
type = string
74+
description = "Enter oauth client id for jupyterhub, e.g. jupyterhub. Only needed if auth mechanism is set to oauth"
75+
default = ""
76+
}
77+
78+
# Only needed if auth mechanism is set to oauth. This is the secret for the client
79+
variable "oauth_jupyter_client_secret" {
80+
type = string
81+
description = "Enter oauth client secret. Only needed if auth mechanism is set to oauth"
82+
default = ""
83+
sensitive = true
84+
}
85+
86+
# Only needed if auth mechanism is set to oauth. This is the key to use for looking up the username.
87+
variable "oauth_username_key" {
88+
type = string
89+
description = "oauth field for the username. e.g. 'preferred_username' Only needed if auth mechanism is set to oauth"
5990
default = ""
6091
}
Loading
134 KB
Loading

0 commit comments

Comments
 (0)