Skip to content

feat: merge aws and azure databricks runtime modules #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
236 changes: 233 additions & 3 deletions README.md

Large diffs are not rendered by default.

192 changes: 192 additions & 0 deletions cluster.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
locals {
spark_conf_single_node = var.cloud_name == "azure" ? {
"spark.master" = "local[*]",
"spark.databricks.cluster.profile" = "singleNode"
} : {}

default_node_type_ids = {
azure_node_type_id = "Standard_D4ds_v5"
aws_node_type_id = "m5d.large"
# gcp_node_type_id = "gcp-default-node-type-id"
}
}

resource "databricks_cluster" "this" {
for_each = { for cluster in var.clusters : cluster.cluster_name => cluster }

cluster_name = each.value.cluster_name
spark_version = each.value.spark_version
node_type_id = coalesce(each.value.node_type_id, local.default_node_type_ids["${var.cloud_name}_node_type_id"])
autotermination_minutes = each.value.autotermination_minutes
data_security_mode = each.value.data_security_mode
custom_tags = var.cloud_name == "azure" && each.value.single_node_enable ? merge({ "ResourceClass" = "SingleNode" }, each.value.custom_tags) : each.value.custom_tags

# Conditional configuration for Spark Conf
spark_conf = merge(
each.value.single_node_enable == true ? local.spark_conf_single_node : {},
each.value.spark_conf
)

# Autoscaling block
dynamic "autoscale" {
for_each = !each.value.single_node_enable ? [1] : []
content {
min_workers = each.value.min_workers
max_workers = each.value.max_workers
}
}

# Specific attributes for AWS
dynamic "aws_attributes" {
for_each = var.cloud_name == "aws" ? [each.value] : []
content {
availability = each.value.aws_attributes.availability
zone_id = each.value.aws_attributes.zone_id
first_on_demand = each.value.aws_attributes.first_on_demand
spot_bid_price_percent = each.value.aws_attributes.spot_bid_price_percent
ebs_volume_count = each.value.aws_attributes.ebs_volume_count
ebs_volume_size = each.value.aws_attributes.ebs_volume_size
ebs_volume_type = each.value.aws_attributes.ebs_volume_type
}
}

# Specific attributes for Azure
dynamic "azure_attributes" {
for_each = var.cloud_name == "azure" ? [each.value] : []
content {
availability = each.value.azure_attributes.availability
first_on_demand = each.value.azure_attributes.first_on_demand
spot_bid_max_price = each.value.azure_attributes.spot_bid_max_price
}
}

# Specific configurations
dynamic "cluster_log_conf" {
for_each = var.cloud_name == "azure" && each.value.cluster_log_conf_destination != null ? [each.value.cluster_log_conf_destination] : []
content {
dynamic "dbfs" {
for_each = var.cloud_name == "azure" ? [1] : []
content {
destination = cluster_log_conf.value
}
}

# TODO
# dynamic "s3" {
# for_each = var.cloud_name == "aws" ? [1] : []
# content {
# destination = "s3://acmecorp-main/cluster-logs"
# region = var.region
# }
# }
}
}

dynamic "init_scripts" {
for_each = each.value.init_scripts_workspace != null ? each.value.init_scripts_workspace : []
content {
workspace {
destination = init_scripts.value
}
}
}

dynamic "init_scripts" {
for_each = each.value.init_scripts_volumes != null ? each.value.init_scripts_volumes : []
content {
volumes {
destination = init_scripts.value
}
}
}

dynamic "init_scripts" {
for_each = var.cloud_name == "azure" && each.value.init_scripts_dbfs != null ? each.value.init_scripts_dbfs : []
content {
dbfs {
destination = init_scripts.value
}
}
}

dynamic "init_scripts" {
for_each = var.cloud_name == "azure" && each.value.init_scripts_abfss != null ? each.value.init_scripts_abfss : []
content {
abfss {
destination = init_scripts.value
}
}
}

# Library configurations
dynamic "library" {
for_each = each.value.pypi_library_repository != null ? each.value.pypi_library_repository : []
content {
pypi {
package = library.value
}
}
}

dynamic "library" {
for_each = each.value.maven_library_repository != null ? each.value.maven_library_repository : []
content {
maven {
coordinates = library.value.coordinates
exclusions = library.value.exclusions
}
}
}
}

resource "databricks_cluster_policy" "this" {
for_each = { for param in var.custom_cluster_policies : (param.name) => param.definition
if param.definition != null
}

name = each.key
definition = jsonencode(each.value)
}

resource "databricks_cluster_policy" "overrides" {
for_each = { for param in var.default_cluster_policies_override : (param.name) => param
if param.definition != null
}

policy_family_id = each.value.family_id
policy_family_definition_overrides = jsonencode(each.value.definition)
name = each.key
}

resource "databricks_permissions" "policy" {
for_each = { for param in var.custom_cluster_policies : param.name => param.can_use
if param.can_use != null
}

cluster_policy_id = databricks_cluster_policy.this[each.key].id

dynamic "access_control" {
for_each = each.value
content {
group_name = access_control.value
permission_level = "CAN_USE"
}
}
}

resource "databricks_permissions" "clusters" {
for_each = {
for v in var.clusters : (v.cluster_name) => v
if length(v.permissions) != 0
}

cluster_id = databricks_cluster.this[each.key].id

dynamic "access_control" {
for_each = each.value.permissions
content {
group_name = access_control.value.group_name
permission_level = access_control.value.permission_level
}
}
}
11 changes: 11 additions & 0 deletions data.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
data "databricks_group" "account_groups" {
for_each = local.iam_account_map

display_name = each.key
}

data "databricks_current_metastore" "this" {
}

data "databricks_sql_warehouses" "all" {
}
26 changes: 26 additions & 0 deletions iam.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
locals {
iam_account_map = tomap({
for group in var.iam_account_groups : group.group_name => group.entitlements
if group.group_name != null
})
}

resource "databricks_group" "this" {
count = var.cloud_name == "azure" && length(local.iam_account_map) == 0 ? length(toset(keys(var.iam_workspace_groups))) : 0

display_name = keys(var.iam_workspace_groups)[count.index]

lifecycle {
ignore_changes = [external_id, allow_cluster_create, allow_instance_pool_create, databricks_sql_access, workspace_access]
}
}

resource "databricks_entitlements" "this" {
for_each = local.iam_account_map

group_id = data.databricks_group.account_groups[each.key].id
allow_cluster_create = contains(coalesce(each.value, ["none"]), "allow_cluster_create")
allow_instance_pool_create = contains(coalesce(each.value, ["none"]), "allow_instance_pool_create")
databricks_sql_access = contains(coalesce(each.value, ["none"]), "databricks_sql_access")
workspace_access = true
}
23 changes: 23 additions & 0 deletions main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
resource "databricks_workspace_conf" "this" {
custom_config = var.custom_config
}

resource "databricks_ip_access_list" "allowed_list" {
label = "allow_in"
list_type = "ALLOW"
ip_addresses = flatten([for v in values(var.ip_addresses) : v])

depends_on = [databricks_workspace_conf.this]
}

resource "databricks_token" "pat" {
count = var.workspace_admin_token_enabled ? 1 : 0
comment = "Terraform Provisioning"
lifetime_seconds = var.pat_token_lifetime_seconds
}

resource "databricks_system_schema" "this" {
for_each = var.system_schemas_enabled ? var.system_schemas : toset([])

schema = each.value
}
21 changes: 21 additions & 0 deletions mount.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
locals {
mount_sp_secrets = var.cloud_name == "azure" ? {
mount_sp_client_id = { value = var.mount_configuration.service_principal.client_id }
mount_sp_secret = { value = var.mount_configuration.service_principal.client_secret }
} : {}
}

resource "databricks_mount" "adls" {
for_each = var.mount_enabled && var.cloud_name == "azure" ? var.mountpoints : {}

name = each.key
uri = "abfss://${each.value["container_name"]}@${each.value["storage_account_name"]}.dfs.core.windows.net"
extra_configs = {
"fs.azure.account.auth.type" : "OAuth",
"fs.azure.account.oauth.provider.type" : "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider",
"fs.azure.account.oauth2.client.id" : var.mount_configuration.service_principal.client_id,
"fs.azure.account.oauth2.client.secret" : databricks_secret.main["mount-sp-secret"].config_reference,
"fs.azure.account.oauth2.client.endpoint" : "https://login.microsoftonline.com/${var.mount_configuration.service_principal.tenant_id}/oauth2/token",
"fs.azure.createRemoteFileSystemDuringInitialization" : "false",
}
}
33 changes: 33 additions & 0 deletions outputs.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
output "sql_endpoint_jdbc_url" {
value = [for n in databricks_sql_endpoint.this : n.jdbc_url]
description = "JDBC connection string of SQL Endpoint"
}

output "sql_endpoint_data_source_id" {
value = [for n in databricks_sql_endpoint.this : n.data_source_id]
description = "ID of the data source for this endpoint"
}

output "token" {
value = length(databricks_token.pat) > 0 ? databricks_token.pat[0].token_value : null
description = "Databricks Personal Authorization Token"
sensitive = true
}

output "clusters" {
value = [for param in var.clusters : {
name = param.cluster_name
id = databricks_cluster.this[param.cluster_name].id
} if length(var.clusters) != 0]
description = "Provides name and unique identifier for the clusters"
}

output "sql_warehouses_list" {
value = data.databricks_sql_warehouses.all.ids
description = "List of IDs of all SQL warehouses in the Databricks workspace."
}

output "metastore_id" {
value = data.databricks_current_metastore.this.id
description = "The ID of the current metastore in the Databricks workspace."
}
78 changes: 78 additions & 0 deletions secrets.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
locals {
secrets_acl_objects_list = flatten([for param in var.secret_scope : [
for permission in param.acl : {
scope = param.scope_name, principal = permission.principal, permission = permission.permission
}] if param.scope_acl != null
])

secret_scope_config = { for object in var.secret_scope : object.scope_name => object }

secret_scope_config_secrets = { for object in flatten([for k, v in local.secret_scope_config : [for secret in v.secrets : {
scope_name = k,
secret_key = secret.key,
secret_value = secret.string_value,
}]]) : "${object.scope_name}:${object.secret_key}" => object }
}

# Secret Scope with SP secrets for mounting Azure Data Lake Storage
resource "databricks_secret_scope" "main" {
count = var.cloud_name == "azure" && var.mount_enabled ? 1 : 0

name = "main"
initial_manage_principal = null
}

resource "databricks_secret" "main" {
for_each = var.cloud_name == "azure" && var.mount_enabled ? local.mount_sp_secrets : {}

key = each.key
string_value = each.value["value"]
scope = databricks_secret_scope.main[0].id

lifecycle {
precondition {
condition = var.cloud_name == "azure" && var.mount_enabled ? length(compact([var.mount_configuration.service_principal.client_id, var.mount_configuration.service_principal.client_secret, var.mount_configuration.service_principal.tenant_id])) == 3 : true
error_message = "To mount ADLS Storage, please provide prerequisite Service Principal values - 'mount_configuration.service_principal.client_id', 'mount_configuration.service_principal.client_secret', 'mount_configuration.service_principal.tenant_id'."
}
}
}

# Custom additional Databricks Secret Scope
resource "databricks_secret_scope" "this" {
for_each = {
for param in var.secret_scope : (param.scope_name) => param
if param.scope_name != null
}

name = each.key

# Key Vault metadata block only for Azure
dynamic "keyvault_metadata" {
for_each = var.cloud_name == "azure" ? [for kv in var.key_vault_secret_scope : kv] : []
content {
resource_id = keyvault_metadata.value.key_vault_id
dns_name = keyvault_metadata.value.dns_name
}
}

# This property is only relevant for Azure
initial_manage_principal = var.cloud_name == "azure" ? null : null
}

resource "databricks_secret" "this" {
for_each = local.secret_scope_config_secrets

key = each.value.secret_key
string_value = each.value.secret_value
scope = databricks_secret_scope.this[each.value.scope_name].id
}

resource "databricks_secret_acl" "this" {
for_each = var.cloud_name == "azure" && length(local.secrets_acl_objects_list) > 0 ? {
for_each = { for entry in local.secrets_acl_objects_list : "${entry.scope}.${entry.principal}.${entry.permission}" => entry }
} : {}

scope = databricks_secret_scope.this[each.value.scope].name
principal = length(var.iam_account_groups) != 0 ? data.databricks_group.account_groups[each.value.principal].display_name : databricks_group.this[each.value.principal].display_name
permission = each.value.permission
}
Loading