Skip to content

Commit c8e6b5b

Browse files
authored
chore: EMR Spark Rapids upgrade (#259)
* chore: EMR Spark Rapids upgrade * Karpenter updates
1 parent b7a7804 commit c8e6b5b

File tree

10 files changed

+368
-433
lines changed

10 files changed

+368
-433
lines changed

ai-ml/emr-spark-rapids/README.md

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,6 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/
3535
| <a name="module_kubernetes_data_addons"></a> [kubernetes\_data\_addons](#module\_kubernetes\_data\_addons) | ../../workshop/modules/terraform-aws-eks-data-addons | n/a |
3636
| <a name="module_s3_bucket"></a> [s3\_bucket](#module\_s3\_bucket) | terraform-aws-modules/s3-bucket/aws | ~> 3.0 |
3737
| <a name="module_vpc"></a> [vpc](#module\_vpc) | terraform-aws-modules/vpc/aws | ~> 5.0 |
38-
| <a name="module_vpc_endpoints"></a> [vpc\_endpoints](#module\_vpc\_endpoints) | terraform-aws-modules/vpc/aws//modules/vpc-endpoints | ~> 5.0 |
39-
| <a name="module_vpc_endpoints_sg"></a> [vpc\_endpoints\_sg](#module\_vpc\_endpoints\_sg) | terraform-aws-modules/security-group/aws | ~> 5.0 |
4038

4139
## Resources
4240

@@ -64,15 +62,12 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/
6462
|------|-------------|------|---------|:--------:|
6563
| <a name="input_eks_cluster_version"></a> [eks\_cluster\_version](#input\_eks\_cluster\_version) | EKS Cluster version | `string` | `"1.26"` | no |
6664
| <a name="input_enable_amazon_prometheus"></a> [enable\_amazon\_prometheus](#input\_enable\_amazon\_prometheus) | Enable AWS Managed Prometheus service | `bool` | `true` | no |
67-
| <a name="input_enable_kubecost"></a> [enable\_kubecost](#input\_enable\_kubecost) | Enable Kubecost | `bool` | `false` | no |
68-
| <a name="input_enable_vpc_endpoints"></a> [enable\_vpc\_endpoints](#input\_enable\_vpc\_endpoints) | Enable VPC Endpoints | `string` | `false` | no |
65+
| <a name="input_enable_kubecost"></a> [enable\_kubecost](#input\_enable\_kubecost) | Enable Kubecost | `bool` | `true` | no |
6966
| <a name="input_name"></a> [name](#input\_name) | Name of the VPC and EKS Cluster | `string` | `"emr-spark-rapids"` | no |
70-
| <a name="input_private_subnets"></a> [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 254 IPs per Subnet/AZ for Private NAT + NLB + Airflow + EC2 Jumphost etc. | `list(string)` | <pre>[<br> "10.1.1.0/24",<br> "10.1.2.0/24"<br>]</pre> | no |
71-
| <a name="input_public_subnets"></a> [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet/AZ | `list(string)` | <pre>[<br> "10.1.0.0/26",<br> "10.1.0.64/26"<br>]</pre> | no |
7267
| <a name="input_region"></a> [region](#input\_region) | Region | `string` | `"us-west-2"` | no |
73-
| <a name="input_secondary_cidr_blocks"></a> [secondary\_cidr\_blocks](#input\_secondary\_cidr\_blocks) | Secondary CIDR blocks to be attached to VPC | `string` | `"100.64.0.0/16"` | no |
68+
| <a name="input_secondary_cidr_blocks"></a> [secondary\_cidr\_blocks](#input\_secondary\_cidr\_blocks) | Secondary CIDR blocks to be attached to VPC | `list(string)` | <pre>[<br> "100.64.0.0/16"<br>]</pre> | no |
7469
| <a name="input_tags"></a> [tags](#input\_tags) | Default tags | `map(string)` | `{}` | no |
75-
| <a name="input_vpc_cidr"></a> [vpc\_cidr](#input\_vpc\_cidr) | VPC CIDR. This should be a valid private (RFC 1918) CIDR range | `string` | `"10.1.0.0/16"` | no |
70+
| <a name="input_vpc_cidr"></a> [vpc\_cidr](#input\_vpc\_cidr) | VPC CIDR. This should be a valid private (RFC 1918) CIDR range | `string` | `"10.1.0.0/21"` | no |
7671

7772
## Outputs
7873

ai-ml/emr-spark-rapids/addons.tf

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,11 @@ module "eks_blueprints_addons" {
8484
#---------------------------------------
8585
enable_karpenter = true
8686
karpenter_enable_spot_termination = true
87+
karpenter_node = {
88+
iam_role_additional_policies = {
89+
AmazonSSMManagedInstanceCore = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore"
90+
}
91+
}
8792
karpenter = {
8893
repository_username = data.aws_ecrpublic_authorization_token.token.user_name
8994
repository_password = data.aws_ecrpublic_authorization_token.token.password
@@ -98,7 +103,7 @@ module "eks_blueprints_addons" {
98103
}
99104

100105
#---------------------------------------
101-
# Adding AWS Load Balancer Controller
106+
# AWS Load Balancer Controller
102107
#---------------------------------------
103108
enable_aws_load_balancer_controller = true
104109
#---------------------------------------
@@ -140,8 +145,7 @@ module "eks_blueprints_addons" {
140145
module "kubernetes_data_addons" {
141146
# Please note that local source will be replaced once the below repo is public
142147
# source = "https://github.com/aws-ia/terraform-aws-kubernetes-data-addons"
143-
source = "../../workshop/modules/terraform-aws-eks-data-addons"
144-
148+
source = "../../workshop/modules/terraform-aws-eks-data-addons"
145149
oidc_provider_arn = module.eks.oidc_provider_arn
146150

147151
#---------------------------------------------------------------
@@ -155,9 +159,11 @@ module "kubernetes_data_addons" {
155159
#---------------------------------------------------------------
156160
# Kubecost Add-on
157161
#---------------------------------------------------------------
162+
# Note: Kubecost add-on depdends on Kube Prometheus Stack add-on for storing the metrics
158163
enable_kubecost = var.enable_kubecost
159164
kubecost_helm_config = {
160165
values = [templatefile("${path.module}/helm-values/kubecost-values.yaml", {})]
166+
version = "1.104.5"
161167
repository_username = data.aws_ecrpublic_authorization_token.token.user_name
162168
repository_password = data.aws_ecrpublic_authorization_token.token.password
163169
}

ai-ml/emr-spark-rapids/data.tf

Lines changed: 0 additions & 21 deletions
This file was deleted.

ai-ml/emr-spark-rapids/eks.tf

Lines changed: 266 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,266 @@
1+
#---------------------------------------------------------------
2+
# EKS Cluster
3+
#---------------------------------------------------------------
4+
5+
module "eks" {
6+
source = "terraform-aws-modules/eks/aws"
7+
version = "~> 19.15"
8+
9+
cluster_name = local.name
10+
cluster_version = var.eks_cluster_version
11+
12+
cluster_endpoint_public_access = true # if true, Your cluster API server is accessible from the internet. You can, optionally, limit the CIDR blocks that can access the public endpoint.
13+
14+
vpc_id = module.vpc.vpc_id
15+
# Filtering only Secondary CIDR private subnets starting with "100.". Subnet IDs where the EKS Control Plane ENIs will be created
16+
subnet_ids = compact([for subnet_id, cidr_block in zipmap(module.vpc.private_subnets, module.vpc.private_subnets_cidr_blocks) : substr(cidr_block, 0, 4) == "100." ? subnet_id : null])
17+
18+
manage_aws_auth_configmap = true
19+
aws_auth_roles = [
20+
{
21+
rolearn = module.eks_blueprints_addons.karpenter.iam_role_arn
22+
username = "system:node:{{EC2PrivateDNSName}}"
23+
groups = [
24+
"system:bootstrappers",
25+
"system:nodes",
26+
]
27+
},
28+
{
29+
# Required for EMR on EKS virtual cluster
30+
rolearn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/AWSServiceRoleForAmazonEMRContainers"
31+
username = "emr-containers"
32+
groups = []
33+
},
34+
]
35+
36+
#---------------------------------------
37+
# Note: This can further restricted to specific required for each Add-on and your application
38+
#---------------------------------------
39+
# Extend cluster security group rules
40+
cluster_security_group_additional_rules = {
41+
ingress_nodes_ephemeral_ports_tcp = {
42+
description = "Nodes on ephemeral ports"
43+
protocol = "tcp"
44+
from_port = 1025
45+
to_port = 65535
46+
type = "ingress"
47+
source_node_security_group = true
48+
}
49+
}
50+
51+
# Extend node-to-node security group rules
52+
node_security_group_additional_rules = {
53+
ingress_self_all = {
54+
description = "Node to node all ports/protocols"
55+
protocol = "-1"
56+
from_port = 0
57+
to_port = 0
58+
type = "ingress"
59+
self = true
60+
}
61+
# Allows Control Plane Nodes to talk to Worker nodes on all ports. Added this to simplify the example and further avoid issues with Add-ons communication with Control plane.
62+
# This can be restricted further to specific port based on the requirement for each Add-on e.g., metrics-server 4443, spark-operator 8080, karpenter 8443 etc.
63+
# Change this according to your security requirements if needed
64+
ingress_cluster_to_node_all_traffic = {
65+
description = "Cluster API to Nodegroup all traffic"
66+
protocol = "-1"
67+
from_port = 0
68+
to_port = 0
69+
type = "ingress"
70+
source_cluster_security_group = true
71+
}
72+
}
73+
74+
eks_managed_node_group_defaults = {
75+
iam_role_additional_policies = {
76+
# Not required, but used in the example to access the nodes to inspect mounted volumes
77+
AmazonSSMManagedInstanceCore = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore"
78+
}
79+
}
80+
81+
eks_managed_node_groups = {
82+
# We recommend to have a MNG to place your critical workloads and add-ons
83+
# Then rely on Karpenter to scale your workloads
84+
# You can also make uses on nodeSelector and Taints/tolerations to spread workloads on MNG or Karpenter provisioners
85+
core_node_group = {
86+
name = "core-node-group"
87+
description = "EKS managed node group example launch template"
88+
# Filtering only Secondary CIDR private subnets starting with "100.". Subnet IDs where the nodes/node groups will be provisioned
89+
subnet_ids = compact([for subnet_id, cidr_block in zipmap(module.vpc.private_subnets, module.vpc.private_subnets_cidr_blocks) : substr(cidr_block, 0, 4) == "100." ? subnet_id : null])
90+
91+
min_size = 3
92+
max_size = 9
93+
desired_size = 3
94+
95+
ami_type = "AL2_x86_64"
96+
instance_types = ["m5.xlarge"]
97+
98+
ebs_optimized = true
99+
block_device_mappings = {
100+
xvda = {
101+
device_name = "/dev/xvda"
102+
ebs = {
103+
volume_size = 100
104+
volume_type = "gp3"
105+
}
106+
}
107+
}
108+
109+
labels = {
110+
WorkerType = "ON_DEMAND"
111+
NodeGroupType = "core"
112+
"nvidia.com/gpu.deploy.operands" = false
113+
}
114+
115+
tags = {
116+
Name = "core-node-grp",
117+
"karpenter.sh/discovery" = local.name
118+
}
119+
}
120+
spark_driver_ng = {
121+
name = "spark-driver-ng"
122+
description = "Spark managed node group for Driver pods with cpu and Ubuntu AMI"
123+
# Filtering only Secondary CIDR private subnets starting with "100.". Subnet IDs where the nodes/node groups will be provisioned
124+
subnet_ids = [element(compact([for subnet_id, cidr_block in zipmap(module.vpc.private_subnets, module.vpc.private_subnets_cidr_blocks) : substr(cidr_block, 0, 4) == "100." ? subnet_id : null]), 0)]
125+
126+
# Ubuntu image for EKs Cluster 1.26 https://cloud-images.ubuntu.com/aws-eks/
127+
ami_id = data.aws_ami.ubuntu.image_id
128+
129+
# This will ensure the bootstrap user data is used to join the node
130+
# By default, EKS managed node groups will not append bootstrap script;
131+
# this adds it back in using the default template provided by the module
132+
# Note: this assumes the AMI provided is an EKS optimized AMI derivative
133+
enable_bootstrap_user_data = true
134+
135+
min_size = 1
136+
max_size = 8
137+
desired_size = 1
138+
139+
force_update_version = true
140+
instance_types = ["m5.xlarge"] # 4 vCPU and 16GB
141+
142+
ebs_optimized = true
143+
# This bloc device is used only for root volume. Adjust volume according to your size.
144+
# NOTE: Dont use this volume for Spark workloads
145+
block_device_mappings = {
146+
xvda = {
147+
device_name = "/dev/sda1"
148+
ebs = {
149+
volume_size = 100
150+
volume_type = "gp3"
151+
}
152+
}
153+
}
154+
155+
labels = {
156+
WorkerType = "ON_DEMAND"
157+
NodeGroupType = "spark-driver-ca"
158+
"nvidia.com/gpu.deploy.operands" = false
159+
}
160+
161+
taints = [{
162+
key = "spark-driver-ca"
163+
value = true
164+
effect = "NO_SCHEDULE"
165+
}]
166+
167+
tags = {
168+
Name = "spark-driver-ca"
169+
}
170+
}
171+
spark_gpu_ng = {
172+
name = "spark-gpu-ng"
173+
description = "Spark managed Ubuntu GPU node group for executor pods with launch template"
174+
# Filtering only Secondary CIDR private subnets starting with "100.". Subnet IDs where the nodes/node groups will be provisioned
175+
subnet_ids = [element(compact([for subnet_id, cidr_block in zipmap(module.vpc.private_subnets, module.vpc.private_subnets_cidr_blocks) : substr(cidr_block, 0, 4) == "100." ? subnet_id : null]), 0)]
176+
177+
# Ubuntu image for EKS Cluster 1.26 https://cloud-images.ubuntu.com/aws-eks/
178+
ami_id = data.aws_ami.ubuntu.image_id
179+
180+
# This will ensure the bootstrap user data is used to join the node
181+
# By default, EKS managed node groups will not append bootstrap script;
182+
# this adds it back in using the default template provided by the module
183+
# Note: this assumes the AMI provided is an EKS optimized AMI derivative
184+
enable_bootstrap_user_data = true
185+
186+
# NVMe instance store volumes are automatically enumerated and assigned a device
187+
pre_bootstrap_user_data = <<-EOT
188+
echo "Running a custom user data script"
189+
set -ex
190+
apt-get update
191+
apt-get install -y nvme-cli mdadm xfsprogs
192+
193+
# Fetch the list of NVMe devices
194+
DEVICES=$(lsblk -d -o NAME | grep nvme)
195+
196+
DISK_ARRAY=()
197+
198+
for DEV in $DEVICES
199+
do
200+
# Exclude the root disk, /dev/nvme0n1, from the list of devices
201+
if [[ $${DEV} != "nvme0n1" ]]; then
202+
NVME_INFO=$(nvme id-ctrl --raw-binary "/dev/$${DEV}" | cut -c3073-3104 | tr -s ' ' | sed 's/ $//g')
203+
# Check if the device is Amazon EC2 NVMe Instance Storage
204+
if [[ $${NVME_INFO} == *"ephemeral"* ]]; then
205+
DISK_ARRAY+=("/dev/$${DEV}")
206+
fi
207+
fi
208+
done
209+
210+
DISK_COUNT=$${#DISK_ARRAY[@]}
211+
212+
if [ $${DISK_COUNT} -eq 0 ]; then
213+
echo "No NVMe SSD disks available. No further action needed."
214+
else
215+
if [ $${DISK_COUNT} -eq 1 ]; then
216+
TARGET_DEV=$${DISK_ARRAY[0]}
217+
mkfs.xfs $${TARGET_DEV}
218+
else
219+
mdadm --create --verbose /dev/md0 --level=0 --raid-devices=$${DISK_COUNT} $${DISK_ARRAY[@]}
220+
mkfs.xfs /dev/md0
221+
TARGET_DEV=/dev/md0
222+
fi
223+
224+
mkdir -p /local1
225+
echo $${TARGET_DEV} /local1 xfs defaults,noatime 1 2 >> /etc/fstab
226+
mount -a
227+
/usr/bin/chown -hR +999:+1000 /local1
228+
fi
229+
EOT
230+
231+
min_size = 8
232+
max_size = 8
233+
desired_size = 8
234+
235+
capacity_type = "SPOT"
236+
instance_types = ["g5.2xlarge"]
237+
238+
ebs_optimized = true
239+
# This block device is used only for root volume. Adjust volume according to your size.
240+
# NOTE: Don't use this volume for Spark workloads
241+
# Ubuntu uses /dev/sda1 as root volume
242+
block_device_mappings = {
243+
xvda = {
244+
device_name = "/dev/sda1"
245+
ebs = {
246+
volume_size = 100
247+
volume_type = "gp3"
248+
}
249+
}
250+
}
251+
252+
labels = {
253+
WorkerType = "SPOT"
254+
NodeGroupType = "spark-ubuntu-gpu-ca"
255+
}
256+
257+
taints = [{ key = "spark-ubuntu-gpu-ca", value = true, effect = "NO_SCHEDULE" }]
258+
259+
tags = {
260+
Name = "spark-ubuntu-gpu",
261+
}
262+
}
263+
}
264+
265+
tags = local.tags
266+
}

ai-ml/emr-spark-rapids/emr-eks.tf

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,21 @@ module "emr_containers" {
22
source = "terraform-aws-modules/emr/aws//modules/virtual-cluster"
33
version = "~> 1.0"
44

5-
for_each = toset(["data-team-a", "data-team-b"])
5+
for_each = toset(["ml-team-a", "ml-team-b"])
66

77
eks_cluster_id = module.eks.cluster_name
88
oidc_provider_arn = module.eks.oidc_provider_arn
99

1010
name = "${module.eks.cluster_name}-emr-${each.value}"
1111
namespace = "emr-${each.value}"
1212

13-
role_name = "${module.eks.cluster_name}-emr-${each.value}"
14-
iam_role_use_name_prefix = false
15-
iam_role_description = "EMR Execution Role for emr-${each.value}"
16-
iam_role_additional_policies = ["arn:aws:iam::aws:policy/AmazonS3FullAccess"] # Attach additional policies for execution IAM Role
13+
role_name = "${module.eks.cluster_name}-emr-${each.value}"
14+
iam_role_use_name_prefix = false
15+
iam_role_description = "EMR Execution Role for emr-${each.value}"
16+
# NOTE: S3 full access added only for testing purpose. You should modify this policy to restrict access to S3 buckets
17+
iam_role_additional_policies = ["arn:aws:iam::aws:policy/AmazonS3FullAccess"]
18+
19+
cloudwatch_log_group_name = "/emr-on-eks-logs/${module.eks.cluster_name}/emr-${each.value}/"
1720

1821
tags = merge(local.tags, { Name = "emr-${each.value}" })
1922
}

0 commit comments

Comments
 (0)