diff --git a/README.md b/README.md index 37ecf4c..759453a 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,242 @@ -# Azure <> Terraform module -Terraform module for creation Azure <> +# Databricks Premium Runtime Terraform module +Terraform module for creation Databricks Premium Runtime ## Usage +### **Requires Workspace with "Premium" SKU** + +The main idea behind this module is to deploy resources for Databricks Workspace with Premium SKU only. + +Here we provide some examples of how to provision it with a different options. + +### In example below, these features of given module would be covered: +1. Workspace admins assignment, custom Workspace group creation, group assignments, group entitlements +2. Clusters (i.e., for Unity Catalog and Shared Autoscaling) +3. Workspace IP Access list creation +4. ADLS Gen2 Mount +5. Create Secret Scope and assign permissions to custom groups +6. SQL Endpoint creation and configuration +7. Create Cluster policy +8. Create an Azure Key Vault-backed secret scope +9. Connect to already existing Unity Catalog Metastore + +```hcl +# Prerequisite resources + +# Databricks Workspace with Premium SKU +data "azurerm_databricks_workspace" "example" { + name = "example-workspace" + resource_group_name = "example-rg" +} + +# Databricks Provider configuration +provider "databricks" { + alias = "main" + host = data.azurerm_databricks_workspace.example.workspace_url + azure_workspace_resource_id = data.azurerm_databricks_workspace.example.id +} + +# Key Vault where Service Principal's secrets are stored. Used for mounting Storage Container +data "azurerm_key_vault" "example" { + name = "example-key-vault" + resource_group_name = "example-rg" +} + +# Example usage of module for Runtime Premium resources. +module "databricks_runtime_premium" { + source = "data-platform-hq/databricks-runtime-premium/databricks" + + project = "datahq" + env = "example" + location = "eastus" + + # Parameters of Service principal used for ADLS mount + # Imports App ID and Secret of Service Principal from target Key Vault + key_vault_id = data.azurerm_key_vault.example.id + sp_client_id_secret_name = "sp-client-id" # secret's name that stores Service Principal App ID + sp_key_secret_name = "sp-key" # secret's name that stores Service Principal Secret Key + tenant_id_secret_name = "infra-arm-tenant-id" # secret's name that stores tenant id value + + # 1.1 Workspace admins + workspace_admins = { + user = ["user1@example.com"] + service_principal = ["example-app-id"] + } + + # 1.2 Custom Workspace group with assignments. + # In addition, provides an ability to create group and entitlements. + iam = [{ + group_name = "DEVELOPERS" + permissions = ["ADMIN"] + entitlements = [ + "allow_instance_pool_create", + "allow_cluster_create", + "databricks_sql_access" + ] + }] + + # 2. Databricks clusters configuration, and assign permission to a custom group on clusters. + databricks_cluster_configs = [ { + cluster_name = "Unity Catalog" + data_security_mode = "USER_ISOLATION" + availability = "ON_DEMAND_AZURE" + spot_bid_max_price = 1 + permissions = [{ group_name = "DEVELOPERS", permission_level = "CAN_RESTART" }] + }, + { + cluster_name = "shared autoscaling" + data_security_mode = "NONE" + availability = "SPOT_AZURE" + spot_bid_max_price = -1 + permissions = [{group_name = "DEVELOPERS", permission_level = "CAN_MANAGE"}] + }] + + # 3. Workspace could be accessed only from these IP Addresses: + ip_rules = { + "ip_range_1" = "10.128.0.0/16", + "ip_range_2" = "10.33.0.0/16", + } + + # 4. ADLS Gen2 Mount + mountpoints = { + storage_account_name = data.azurerm_storage_account.example.name + container_name = "example_container" + } + + # 5. Create Secret Scope and assign permissions to custom groups + secret_scope = [{ + scope_name = "extra-scope" + acl = [{ principal = "DEVELOPERS", permission = "READ" }] # Only custom workspace group names are allowed. If left empty then only Workspace admins could access these keys + secrets = [{ key = "secret-name", string_value = "secret-value"}] + }] + + # 6. SQL Warehouse Endpoint + databricks_sql_endpoint = [{ + name = "default" + enable_serverless_compute = true + permissions = [{ group_name = "DEVELOPERS", permission_level = "CAN_USE" },] + }] + + # 7. Databricks cluster policies + custom_cluster_policies = [{ + name = "custom_policy_1", + can_use = "DEVELOPERS", # custom workspace group name, that is allowed to use this policy + definition = { + "autoscale.max_workers": { + "type": "range", + "maxValue": 3, + "defaultValue": 2 + }, + } + }] + + # 8. Azure Key Vault-backed secret scope + key_vault_secret_scope = [{ + name = "external" + key_vault_id = data.azurerm_key_vault.example.id + dns_name = data.azurerm_key_vault.example.vault_uri + }] + + providers = { + databricks = databricks.main + } +} + +# 9 Assignment already existing Unity Catalog Metastore +module "metastore_assignment" { + source = "data-platform-hq/metastore-assignment/databricks" + version = "1.0.0" + + workspace_id = data.azurerm_databricks_workspace.example.workspace_id + metastore_id = "" + + providers = { + databricks = databricks.workspace + } +} + +``` +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 1.0 | +| [databricks](#requirement\_databricks) | ~>1.0 | + +## Providers + +| Name | Version | +|------|---------| +| [databricks](#provider\_databricks) | ~>1.0 | + +## Modules + +No modules. + +## Resources + +| Name | Type | +|------|------| +| [databricks_cluster.this](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/cluster) | resource | +| [databricks_cluster_policy.overrides](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/cluster_policy) | resource | +| [databricks_cluster_policy.this](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/cluster_policy) | resource | +| [databricks_entitlements.this](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/entitlements) | resource | +| [databricks_group.this](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/group) | resource | +| [databricks_ip_access_list.allowed_list](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/ip_access_list) | resource | +| [databricks_mount.adls](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/mount) | resource | +| [databricks_permissions.clusters](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/permissions) | resource | +| [databricks_permissions.policy](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/permissions) | resource | +| [databricks_permissions.sql_endpoint](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/permissions) | resource | +| [databricks_secret.main](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/secret) | resource | +| [databricks_secret.this](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/secret) | resource | +| [databricks_secret_acl.this](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/secret_acl) | resource | +| [databricks_secret_scope.main](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/secret_scope) | resource | +| [databricks_secret_scope.this](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/secret_scope) | resource | +| [databricks_sql_endpoint.this](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/sql_endpoint) | resource | +| [databricks_system_schema.this](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/system_schema) | resource | +| [databricks_token.pat](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/token) | resource | +| [databricks_workspace_conf.this](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/workspace_conf) | resource | +| [databricks_current_metastore.this](https://registry.terraform.io/providers/databricks/databricks/latest/docs/data-sources/current_metastore) | data source | +| [databricks_group.account_groups](https://registry.terraform.io/providers/databricks/databricks/latest/docs/data-sources/group) | data source | +| [databricks_sql_warehouses.all](https://registry.terraform.io/providers/databricks/databricks/latest/docs/data-sources/sql_warehouses) | data source | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [cloud\_name](#input\_cloud\_name) | Cloud Name | `string` | n/a | yes | +| [clusters](#input\_clusters) | Set of objects with parameters to configure Databricks clusters and assign permissions to it for certain custom groups |
set(object({
cluster_name = string
spark_version = optional(string, "15.3.x-scala2.12")
spark_conf = optional(map(any), {})
spark_env_vars = optional(map(any), {})
data_security_mode = optional(string, "USER_ISOLATION")
aws_attributes = optional(object({
availability = optional(string)
zone_id = optional(string)
first_on_demand = optional(number)
spot_bid_price_percent = optional(number)
ebs_volume_count = optional(number)
ebs_volume_size = optional(number)
ebs_volume_type = optional(string)
}), {
availability = "ON_DEMAND"
zone_id = "auto"
first_on_demand = 0
spot_bid_price_percent = 100
ebs_volume_count = 1
ebs_volume_size = 100
ebs_volume_type = "GENERAL_PURPOSE_SSD"
})
azure_attributes = optional(object({
availability = optional(string)
first_on_demand = optional(number)
spot_bid_max_price = optional(number, 1)
}), {
availability = "ON_DEMAND_AZURE"
first_on_demand = 0
})
node_type_id = optional(string, null)
autotermination_minutes = optional(number, 20)
min_workers = optional(number, 1)
max_workers = optional(number, 2)
cluster_log_conf_destination = optional(string, null)
init_scripts_workspace = optional(set(string), [])
init_scripts_volumes = optional(set(string), [])
init_scripts_dbfs = optional(set(string), [])
init_scripts_abfss = optional(set(string), [])
single_user_name = optional(string, null)
single_node_enable = optional(bool, false)
custom_tags = optional(map(string), {})
permissions = optional(set(object({
group_name = string
permission_level = string
})), [])
pypi_library_repository = optional(set(string), [])
maven_library_repository = optional(set(object({
coordinates = string
exclusions = set(string)
})), [])
}))
| `[]` | no | +| [custom\_cluster\_policies](#input\_custom\_cluster\_policies) | Provides an ability to create custom cluster policy, assign it to cluster and grant CAN\_USE permissions on it to certain custom groups
name - name of custom cluster policy to create
can\_use - list of string, where values are custom group names, there groups have to be created with Terraform;
definition - JSON document expressed in Databricks Policy Definition Language. No need to call 'jsonencode()' function on it when providing a value; |
list(object({
name = string
can_use = list(string)
definition = any
}))
|
[
{
"can_use": null,
"definition": null,
"name": null
}
]
| no | +| [custom\_config](#input\_custom\_config) | Map of AD databricks workspace custom config | `map(string)` |
{
"enable-X-Content-Type-Options": "true",
"enable-X-Frame-Options": "true",
"enable-X-XSS-Protection": "true",
"enableDbfsFileBrowser": "false",
"enableExportNotebook": "false",
"enableIpAccessLists": "true",
"enableNotebookTableClipboard": "false",
"enableResultsDownloading": "false",
"enableUploadDataUis": "false",
"enableVerboseAuditLogs": "true",
"enforceUserIsolation": "true",
"storeInteractiveNotebookResultsInCustomerAccount": "true"
}
| no | +| [default\_cluster\_policies\_override](#input\_default\_cluster\_policies\_override) | Provides an ability to override default cluster policy
name - name of cluster policy to override
family\_id - family id of corresponding policy
definition - JSON document expressed in Databricks Policy Definition Language. No need to call 'jsonencode()' function on it when providing a value; |
list(object({
name = string
family_id = string
definition = any
}))
|
[
{
"definition": null,
"family_id": null,
"name": null
}
]
| no | +| [iam\_account\_groups](#input\_iam\_account\_groups) | List of objects with group name and entitlements for this group |
list(object({
group_name = optional(string)
entitlements = optional(list(string))
}))
| `[]` | no | +| [iam\_workspace\_groups](#input\_iam\_workspace\_groups) | Used to create workspace group. Map of group name and its parameters, such as users and service principals added to the group. Also possible to configure group entitlements. |
map(object({
user = optional(list(string))
service_principal = optional(list(string))
entitlements = optional(list(string))
}))
| `{}` | no | +| [ip\_addresses](#input\_ip\_addresses) | A map of IP address ranges | `map(string)` |
{
"all": "0.0.0.0/0"
}
| no | +| [key\_vault\_secret\_scope](#input\_key\_vault\_secret\_scope) | Object with Azure Key Vault parameters required for creation of Azure-backed Databricks Secret scope |
list(object({
name = string
key_vault_id = string
dns_name = string
tenant_id = string
}))
| `[]` | no | +| [mount\_configuration](#input\_mount\_configuration) | Configuration for mounting storage, including only service principal details |
object({
service_principal = object({
client_id = string
client_secret = string
tenant_id = string
})
})
|
{
"service_principal": {
"client_id": null,
"client_secret": null,
"tenant_id": null
}
}
| no | +| [mount\_enabled](#input\_mount\_enabled) | Boolean flag that determines whether mount point for storage account filesystem is created | `bool` | `false` | no | +| [mountpoints](#input\_mountpoints) | Mountpoints for databricks |
map(object({
storage_account_name = string
container_name = string
}))
| `{}` | no | +| [pat\_token\_lifetime\_seconds](#input\_pat\_token\_lifetime\_seconds) | The lifetime of the token, in seconds. If no lifetime is specified, the token remains valid indefinitely | `number` | `315569520` | no | +| [secret\_scope](#input\_secret\_scope) | Provides an ability to create custom Secret Scope, store secrets in it and assigning ACL for access management
scope\_name - name of Secret Scope to create;
acl - list of objects, where 'principal' custom group name, this group is created in 'Premium' module; 'permission' is one of "READ", "WRITE", "MANAGE";
secrets - list of objects, where object's 'key' param is created key name and 'string\_value' is a value for it; |
list(object({
scope_name = string
scope_acl = optional(list(object({
principal = string
permission = string
})))
secrets = optional(list(object({
key = string
string_value = string
})))
}))
| `[]` | no | +| [sql\_endpoint](#input\_sql\_endpoint) | Set of objects with parameters to configure SQL Endpoint and assign permissions to it for certain custom groups |
set(object({
name = string
cluster_size = optional(string, "2X-Small")
min_num_clusters = optional(number, 0)
max_num_clusters = optional(number, 1)
auto_stop_mins = optional(string, "30")
enable_photon = optional(bool, false)
enable_serverless_compute = optional(bool, false)
spot_instance_policy = optional(string, "COST_OPTIMIZED")
warehouse_type = optional(string, "PRO")
permissions = optional(set(object({
group_name = string
permission_level = string
})), [])
}))
| `[]` | no | +| [suffix](#input\_suffix) | Optional suffix that would be added to the end of resources names. | `string` | `""` | no | +| [system\_schemas](#input\_system\_schemas) | Set of strings with all possible System Schema names | `set(string)` |
[
"access",
"billing",
"compute",
"marketplace",
"storage"
]
| no | +| [system\_schemas\_enabled](#input\_system\_schemas\_enabled) | System Schemas only works with assigned Unity Catalog Metastore. Boolean flag to enabled this feature | `bool` | `false` | no | +| [workspace\_admin\_token\_enabled](#input\_workspace\_admin\_token\_enabled) | Boolean flag to specify whether to create Workspace Admin Token | `bool` | n/a | yes | + +## Outputs +| Name | Description | +|------|-------------| +| [clusters](#output\_clusters) | Provides name and unique identifier for the clusters | +| [metastore\_id](#output\_metastore\_id) | The ID of the current metastore in the Databricks workspace. | +| [sql\_endpoint\_data\_source\_id](#output\_sql\_endpoint\_data\_source\_id) | ID of the data source for this endpoint | +| [sql\_endpoint\_jdbc\_url](#output\_sql\_endpoint\_jdbc\_url) | JDBC connection string of SQL Endpoint | +| [sql\_warehouses\_list](#output\_sql\_warehouses\_list) | List of IDs of all SQL warehouses in the Databricks workspace. | +| [token](#output\_token) | Databricks Personal Authorization Token | ## License -Apache 2 Licensed. For more information please see [LICENSE](./LICENSE) +Apache 2 Licensed. For more information please see [LICENSE](https://github.com/data-platform-hq/terraform-databricks-databricks-runtime-premium/blob/main/LICENSE) diff --git a/cluster.tf b/cluster.tf new file mode 100644 index 0000000..0034602 --- /dev/null +++ b/cluster.tf @@ -0,0 +1,192 @@ +locals { + spark_conf_single_node = var.cloud_name == "azure" ? { + "spark.master" = "local[*]", + "spark.databricks.cluster.profile" = "singleNode" + } : {} + + default_node_type_ids = { + azure_node_type_id = "Standard_D4ds_v5" + aws_node_type_id = "m5d.large" + # gcp_node_type_id = "gcp-default-node-type-id" + } +} + +resource "databricks_cluster" "this" { + for_each = { for cluster in var.clusters : cluster.cluster_name => cluster } + + cluster_name = each.value.cluster_name + spark_version = each.value.spark_version + node_type_id = coalesce(each.value.node_type_id, local.default_node_type_ids["${var.cloud_name}_node_type_id"]) + autotermination_minutes = each.value.autotermination_minutes + data_security_mode = each.value.data_security_mode + custom_tags = var.cloud_name == "azure" && each.value.single_node_enable ? merge({ "ResourceClass" = "SingleNode" }, each.value.custom_tags) : each.value.custom_tags + + # Conditional configuration for Spark Conf + spark_conf = merge( + each.value.single_node_enable == true ? local.spark_conf_single_node : {}, + each.value.spark_conf + ) + + # Autoscaling block + dynamic "autoscale" { + for_each = !each.value.single_node_enable ? [1] : [] + content { + min_workers = each.value.min_workers + max_workers = each.value.max_workers + } + } + + # Specific attributes for AWS + dynamic "aws_attributes" { + for_each = var.cloud_name == "aws" ? [each.value] : [] + content { + availability = each.value.aws_attributes.availability + zone_id = each.value.aws_attributes.zone_id + first_on_demand = each.value.aws_attributes.first_on_demand + spot_bid_price_percent = each.value.aws_attributes.spot_bid_price_percent + ebs_volume_count = each.value.aws_attributes.ebs_volume_count + ebs_volume_size = each.value.aws_attributes.ebs_volume_size + ebs_volume_type = each.value.aws_attributes.ebs_volume_type + } + } + + # Specific attributes for Azure + dynamic "azure_attributes" { + for_each = var.cloud_name == "azure" ? [each.value] : [] + content { + availability = each.value.azure_attributes.availability + first_on_demand = each.value.azure_attributes.first_on_demand + spot_bid_max_price = each.value.azure_attributes.spot_bid_max_price + } + } + + # Specific configurations + dynamic "cluster_log_conf" { + for_each = var.cloud_name == "azure" && each.value.cluster_log_conf_destination != null ? [each.value.cluster_log_conf_destination] : [] + content { + dynamic "dbfs" { + for_each = var.cloud_name == "azure" ? [1] : [] + content { + destination = cluster_log_conf.value + } + } + + # TODO + # dynamic "s3" { + # for_each = var.cloud_name == "aws" ? [1] : [] + # content { + # destination = "s3://acmecorp-main/cluster-logs" + # region = var.region + # } + # } + } + } + + dynamic "init_scripts" { + for_each = each.value.init_scripts_workspace != null ? each.value.init_scripts_workspace : [] + content { + workspace { + destination = init_scripts.value + } + } + } + + dynamic "init_scripts" { + for_each = each.value.init_scripts_volumes != null ? each.value.init_scripts_volumes : [] + content { + volumes { + destination = init_scripts.value + } + } + } + + dynamic "init_scripts" { + for_each = var.cloud_name == "azure" && each.value.init_scripts_dbfs != null ? each.value.init_scripts_dbfs : [] + content { + dbfs { + destination = init_scripts.value + } + } + } + + dynamic "init_scripts" { + for_each = var.cloud_name == "azure" && each.value.init_scripts_abfss != null ? each.value.init_scripts_abfss : [] + content { + abfss { + destination = init_scripts.value + } + } + } + + # Library configurations + dynamic "library" { + for_each = each.value.pypi_library_repository != null ? each.value.pypi_library_repository : [] + content { + pypi { + package = library.value + } + } + } + + dynamic "library" { + for_each = each.value.maven_library_repository != null ? each.value.maven_library_repository : [] + content { + maven { + coordinates = library.value.coordinates + exclusions = library.value.exclusions + } + } + } +} + +resource "databricks_cluster_policy" "this" { + for_each = { for param in var.custom_cluster_policies : (param.name) => param.definition + if param.definition != null + } + + name = each.key + definition = jsonencode(each.value) +} + +resource "databricks_cluster_policy" "overrides" { + for_each = { for param in var.default_cluster_policies_override : (param.name) => param + if param.definition != null + } + + policy_family_id = each.value.family_id + policy_family_definition_overrides = jsonencode(each.value.definition) + name = each.key +} + +resource "databricks_permissions" "policy" { + for_each = { for param in var.custom_cluster_policies : param.name => param.can_use + if param.can_use != null + } + + cluster_policy_id = databricks_cluster_policy.this[each.key].id + + dynamic "access_control" { + for_each = each.value + content { + group_name = access_control.value + permission_level = "CAN_USE" + } + } +} + +resource "databricks_permissions" "clusters" { + for_each = { + for v in var.clusters : (v.cluster_name) => v + if length(v.permissions) != 0 + } + + cluster_id = databricks_cluster.this[each.key].id + + dynamic "access_control" { + for_each = each.value.permissions + content { + group_name = access_control.value.group_name + permission_level = access_control.value.permission_level + } + } +} diff --git a/data.tf b/data.tf new file mode 100644 index 0000000..533ffd3 --- /dev/null +++ b/data.tf @@ -0,0 +1,11 @@ +data "databricks_group" "account_groups" { + for_each = local.iam_account_map + + display_name = each.key +} + +data "databricks_current_metastore" "this" { +} + +data "databricks_sql_warehouses" "all" { +} diff --git a/iam.tf b/iam.tf new file mode 100644 index 0000000..82f4871 --- /dev/null +++ b/iam.tf @@ -0,0 +1,26 @@ +locals { + iam_account_map = tomap({ + for group in var.iam_account_groups : group.group_name => group.entitlements + if group.group_name != null + }) +} + +resource "databricks_group" "this" { + count = var.cloud_name == "azure" && length(local.iam_account_map) == 0 ? length(toset(keys(var.iam_workspace_groups))) : 0 + + display_name = keys(var.iam_workspace_groups)[count.index] + + lifecycle { + ignore_changes = [external_id, allow_cluster_create, allow_instance_pool_create, databricks_sql_access, workspace_access] + } +} + +resource "databricks_entitlements" "this" { + for_each = local.iam_account_map + + group_id = data.databricks_group.account_groups[each.key].id + allow_cluster_create = contains(coalesce(each.value, ["none"]), "allow_cluster_create") + allow_instance_pool_create = contains(coalesce(each.value, ["none"]), "allow_instance_pool_create") + databricks_sql_access = contains(coalesce(each.value, ["none"]), "databricks_sql_access") + workspace_access = true +} diff --git a/main.tf b/main.tf new file mode 100644 index 0000000..fecb57d --- /dev/null +++ b/main.tf @@ -0,0 +1,23 @@ +resource "databricks_workspace_conf" "this" { + custom_config = var.custom_config +} + +resource "databricks_ip_access_list" "allowed_list" { + label = "allow_in" + list_type = "ALLOW" + ip_addresses = flatten([for v in values(var.ip_addresses) : v]) + + depends_on = [databricks_workspace_conf.this] +} + +resource "databricks_token" "pat" { + count = var.workspace_admin_token_enabled ? 1 : 0 + comment = "Terraform Provisioning" + lifetime_seconds = var.pat_token_lifetime_seconds +} + +resource "databricks_system_schema" "this" { + for_each = var.system_schemas_enabled ? var.system_schemas : toset([]) + + schema = each.value +} diff --git a/mount.tf b/mount.tf new file mode 100644 index 0000000..fbccac8 --- /dev/null +++ b/mount.tf @@ -0,0 +1,21 @@ +locals { + mount_sp_secrets = var.cloud_name == "azure" ? { + mount_sp_client_id = { value = var.mount_configuration.service_principal.client_id } + mount_sp_secret = { value = var.mount_configuration.service_principal.client_secret } + } : {} +} + +resource "databricks_mount" "adls" { + for_each = var.mount_enabled && var.cloud_name == "azure" ? var.mountpoints : {} + + name = each.key + uri = "abfss://${each.value["container_name"]}@${each.value["storage_account_name"]}.dfs.core.windows.net" + extra_configs = { + "fs.azure.account.auth.type" : "OAuth", + "fs.azure.account.oauth.provider.type" : "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider", + "fs.azure.account.oauth2.client.id" : var.mount_configuration.service_principal.client_id, + "fs.azure.account.oauth2.client.secret" : databricks_secret.main["mount-sp-secret"].config_reference, + "fs.azure.account.oauth2.client.endpoint" : "https://login.microsoftonline.com/${var.mount_configuration.service_principal.tenant_id}/oauth2/token", + "fs.azure.createRemoteFileSystemDuringInitialization" : "false", + } +} diff --git a/outputs.tf b/outputs.tf new file mode 100644 index 0000000..003fea7 --- /dev/null +++ b/outputs.tf @@ -0,0 +1,33 @@ +output "sql_endpoint_jdbc_url" { + value = [for n in databricks_sql_endpoint.this : n.jdbc_url] + description = "JDBC connection string of SQL Endpoint" +} + +output "sql_endpoint_data_source_id" { + value = [for n in databricks_sql_endpoint.this : n.data_source_id] + description = "ID of the data source for this endpoint" +} + +output "token" { + value = length(databricks_token.pat) > 0 ? databricks_token.pat[0].token_value : null + description = "Databricks Personal Authorization Token" + sensitive = true +} + +output "clusters" { + value = [for param in var.clusters : { + name = param.cluster_name + id = databricks_cluster.this[param.cluster_name].id + } if length(var.clusters) != 0] + description = "Provides name and unique identifier for the clusters" +} + +output "sql_warehouses_list" { + value = data.databricks_sql_warehouses.all.ids + description = "List of IDs of all SQL warehouses in the Databricks workspace." +} + +output "metastore_id" { + value = data.databricks_current_metastore.this.id + description = "The ID of the current metastore in the Databricks workspace." +} diff --git a/secrets.tf b/secrets.tf new file mode 100644 index 0000000..68e9c98 --- /dev/null +++ b/secrets.tf @@ -0,0 +1,78 @@ +locals { + secrets_acl_objects_list = flatten([for param in var.secret_scope : [ + for permission in param.acl : { + scope = param.scope_name, principal = permission.principal, permission = permission.permission + }] if param.scope_acl != null + ]) + + secret_scope_config = { for object in var.secret_scope : object.scope_name => object } + + secret_scope_config_secrets = { for object in flatten([for k, v in local.secret_scope_config : [for secret in v.secrets : { + scope_name = k, + secret_key = secret.key, + secret_value = secret.string_value, + }]]) : "${object.scope_name}:${object.secret_key}" => object } +} + +# Secret Scope with SP secrets for mounting Azure Data Lake Storage +resource "databricks_secret_scope" "main" { + count = var.cloud_name == "azure" && var.mount_enabled ? 1 : 0 + + name = "main" + initial_manage_principal = null +} + +resource "databricks_secret" "main" { + for_each = var.cloud_name == "azure" && var.mount_enabled ? local.mount_sp_secrets : {} + + key = each.key + string_value = each.value["value"] + scope = databricks_secret_scope.main[0].id + + lifecycle { + precondition { + condition = var.cloud_name == "azure" && var.mount_enabled ? length(compact([var.mount_configuration.service_principal.client_id, var.mount_configuration.service_principal.client_secret, var.mount_configuration.service_principal.tenant_id])) == 3 : true + error_message = "To mount ADLS Storage, please provide prerequisite Service Principal values - 'mount_configuration.service_principal.client_id', 'mount_configuration.service_principal.client_secret', 'mount_configuration.service_principal.tenant_id'." + } + } +} + +# Custom additional Databricks Secret Scope +resource "databricks_secret_scope" "this" { + for_each = { + for param in var.secret_scope : (param.scope_name) => param + if param.scope_name != null + } + + name = each.key + + # Key Vault metadata block only for Azure + dynamic "keyvault_metadata" { + for_each = var.cloud_name == "azure" ? [for kv in var.key_vault_secret_scope : kv] : [] + content { + resource_id = keyvault_metadata.value.key_vault_id + dns_name = keyvault_metadata.value.dns_name + } + } + + # This property is only relevant for Azure + initial_manage_principal = var.cloud_name == "azure" ? null : null +} + +resource "databricks_secret" "this" { + for_each = local.secret_scope_config_secrets + + key = each.value.secret_key + string_value = each.value.secret_value + scope = databricks_secret_scope.this[each.value.scope_name].id +} + +resource "databricks_secret_acl" "this" { + for_each = var.cloud_name == "azure" && length(local.secrets_acl_objects_list) > 0 ? { + for_each = { for entry in local.secrets_acl_objects_list : "${entry.scope}.${entry.principal}.${entry.permission}" => entry } + } : {} + + scope = databricks_secret_scope.this[each.value.scope].name + principal = length(var.iam_account_groups) != 0 ? data.databricks_group.account_groups[each.value.principal].display_name : databricks_group.this[each.value.principal].display_name + permission = each.value.permission +} diff --git a/variables.tf b/variables.tf new file mode 100644 index 0000000..0604af3 --- /dev/null +++ b/variables.tf @@ -0,0 +1,274 @@ +variable "cloud_name" { + type = string + description = "Cloud Name" +} + +variable "workspace_admin_token_enabled" { + type = bool + description = "Boolean flag to specify whether to create Workspace Admin Token" +} + +variable "suffix" { + type = string + description = "Optional suffix that would be added to the end of resources names." + default = "" +} + +# Identity Access Management variables + +variable "iam_account_groups" { + type = list(object({ + group_name = optional(string) + entitlements = optional(list(string)) + })) + description = "List of objects with group name and entitlements for this group" + default = [] +} + +variable "iam_workspace_groups" { + type = map(object({ + user = optional(list(string)) + service_principal = optional(list(string)) + entitlements = optional(list(string)) + })) + description = "Used to create workspace group. Map of group name and its parameters, such as users and service principals added to the group. Also possible to configure group entitlements." + default = {} + + validation { + condition = length([for item in values(var.iam_workspace_groups)[*] : item.entitlements if item.entitlements != null]) != 0 ? alltrue([ + for entry in flatten(values(var.iam_workspace_groups)[*].entitlements) : contains(["allow_cluster_create", "allow_instance_pool_create", "databricks_sql_access"], entry) if entry != null + ]) : true + error_message = "Entitlements validation. The only suitable values are: databricks_sql_access, allow_instance_pool_create, allow_cluster_create" + } +} + +# SQL Endpoint variables +variable "sql_endpoint" { + type = set(object({ + name = string + cluster_size = optional(string, "2X-Small") + min_num_clusters = optional(number, 0) + max_num_clusters = optional(number, 1) + auto_stop_mins = optional(string, "30") + enable_photon = optional(bool, false) + enable_serverless_compute = optional(bool, false) + spot_instance_policy = optional(string, "COST_OPTIMIZED") + warehouse_type = optional(string, "PRO") + permissions = optional(set(object({ + group_name = string + permission_level = string + })), []) + })) + description = "Set of objects with parameters to configure SQL Endpoint and assign permissions to it for certain custom groups" + default = [] +} + +# Secret Scope variables +variable "secret_scope" { + type = list(object({ + scope_name = string + scope_acl = optional(list(object({ + principal = string + permission = string + }))) + secrets = optional(list(object({ + key = string + string_value = string + }))) + })) + description = <<-EOT +Provides an ability to create custom Secret Scope, store secrets in it and assigning ACL for access management +scope_name - name of Secret Scope to create; +acl - list of objects, where 'principal' custom group name, this group is created in 'Premium' module; 'permission' is one of "READ", "WRITE", "MANAGE"; +secrets - list of objects, where object's 'key' param is created key name and 'string_value' is a value for it; +EOT + default = [] +} + +# Azure Key Vault-backed Secret Scope +variable "key_vault_secret_scope" { + type = list(object({ + name = string + key_vault_id = string + dns_name = string + tenant_id = string + })) + description = "Object with Azure Key Vault parameters required for creation of Azure-backed Databricks Secret scope" + default = [] +} + +variable "custom_cluster_policies" { + type = list(object({ + name = string + can_use = list(string) + definition = any + })) + description = <<-EOT +Provides an ability to create custom cluster policy, assign it to cluster and grant CAN_USE permissions on it to certain custom groups +name - name of custom cluster policy to create +can_use - list of string, where values are custom group names, there groups have to be created with Terraform; +definition - JSON document expressed in Databricks Policy Definition Language. No need to call 'jsonencode()' function on it when providing a value; +EOT + default = [{ + name = null + can_use = null + definition = null + }] +} + +variable "clusters" { + type = set(object({ + cluster_name = string + spark_version = optional(string, "15.3.x-scala2.12") + spark_conf = optional(map(any), {}) + spark_env_vars = optional(map(any), {}) + data_security_mode = optional(string, "USER_ISOLATION") + aws_attributes = optional(object({ + availability = optional(string) + zone_id = optional(string) + first_on_demand = optional(number) + spot_bid_price_percent = optional(number) + ebs_volume_count = optional(number) + ebs_volume_size = optional(number) + ebs_volume_type = optional(string) + }), { + availability = "ON_DEMAND" + zone_id = "auto" + first_on_demand = 0 + spot_bid_price_percent = 100 + ebs_volume_count = 1 + ebs_volume_size = 100 + ebs_volume_type = "GENERAL_PURPOSE_SSD" + }) + azure_attributes = optional(object({ + availability = optional(string) + first_on_demand = optional(number) + spot_bid_max_price = optional(number, 1) + }), { + availability = "ON_DEMAND_AZURE" + first_on_demand = 0 + }) + node_type_id = optional(string, null) + autotermination_minutes = optional(number, 20) + min_workers = optional(number, 1) + max_workers = optional(number, 2) + cluster_log_conf_destination = optional(string, null) + init_scripts_workspace = optional(set(string), []) + init_scripts_volumes = optional(set(string), []) + init_scripts_dbfs = optional(set(string), []) + init_scripts_abfss = optional(set(string), []) + single_user_name = optional(string, null) + single_node_enable = optional(bool, false) + custom_tags = optional(map(string), {}) + permissions = optional(set(object({ + group_name = string + permission_level = string + })), []) + pypi_library_repository = optional(set(string), []) + maven_library_repository = optional(set(object({ + coordinates = string + exclusions = set(string) + })), []) + })) + description = "Set of objects with parameters to configure Databricks clusters and assign permissions to it for certain custom groups" + default = [] +} + +variable "pat_token_lifetime_seconds" { + type = number + description = "The lifetime of the token, in seconds. If no lifetime is specified, the token remains valid indefinitely" + default = 315569520 +} + +# Mount ADLS Gen2 Filesystem +variable "mount_enabled" { + type = bool + description = "Boolean flag that determines whether mount point for storage account filesystem is created" + default = false +} + +variable "mount_configuration" { + type = object({ + service_principal = object({ + client_id = string + client_secret = string + tenant_id = string + }) + }) + description = "Configuration for mounting storage, including only service principal details" + default = { + service_principal = { + client_id = null + client_secret = null + tenant_id = null + } + } + sensitive = true +} + +variable "mountpoints" { + type = map(object({ + storage_account_name = string + container_name = string + })) + description = "Mountpoints for databricks" + default = {} +} + +variable "system_schemas" { + type = set(string) + description = "Set of strings with all possible System Schema names" + default = ["access", "billing", "compute", "marketplace", "storage"] +} + +variable "system_schemas_enabled" { + type = bool + description = "System Schemas only works with assigned Unity Catalog Metastore. Boolean flag to enabled this feature" + default = false +} + +variable "default_cluster_policies_override" { + type = list(object({ + name = string + family_id = string + definition = any + })) + description = <<-EOT +Provides an ability to override default cluster policy +name - name of cluster policy to override +family_id - family id of corresponding policy +definition - JSON document expressed in Databricks Policy Definition Language. No need to call 'jsonencode()' function on it when providing a value; +EOT + default = [{ + name = null + family_id = null + definition = null + }] +} + +variable "custom_config" { + type = map(string) + description = "Map of AD databricks workspace custom config" + default = { + "enableResultsDownloading" = "false", # https://docs.databricks.com/en/notebooks/notebook-outputs.html#download-results + "enableNotebookTableClipboard" = "false", # https://docs.databricks.com/en/administration-guide/workspace-settings/notebooks.html#enable-users-to-copy-data-to-the-clipboard-from-notebooks + "enableVerboseAuditLogs" = "true", # https://docs.databricks.com/en/administration-guide/account-settings/verbose-logs.html + "enable-X-Frame-Options" = "true", + "enable-X-Content-Type-Options" = "true", + "enable-X-XSS-Protection" = "true", + "enableDbfsFileBrowser" = "false", # https://docs.databricks.com/en/administration-guide/workspace-settings/dbfs-browser.html + "enableExportNotebook" = "false", # https://docs.databricks.com/en/administration-guide/workspace-settings/notebooks.html#enable-users-to-export-notebooks + "enforceUserIsolation" = "true", # https://docs.databricks.com/en/administration-guide/workspace-settings/enforce-user-isolation.html + "storeInteractiveNotebookResultsInCustomerAccount" = "true", # https://docs.databricks.com/en/administration-guide/workspace-settings/notebooks.html#manage-where-notebook-results-are-stored + "enableUploadDataUis" = "false", # https://docs.databricks.com/en/ingestion/add-data/index.html + "enableIpAccessLists" = "true" + } +} + +variable "ip_addresses" { + type = map(string) + description = "A map of IP address ranges" + default = { + "all" = "0.0.0.0/0" + } +} diff --git a/versions.tf b/versions.tf new file mode 100644 index 0000000..fcd2dd2 --- /dev/null +++ b/versions.tf @@ -0,0 +1,10 @@ +terraform { + required_version = ">= 1.0" + + required_providers { + databricks = { + source = "databricks/databricks" + version = "~>1.0" + } + } +} diff --git a/warehouses.tf b/warehouses.tf new file mode 100644 index 0000000..7c86372 --- /dev/null +++ b/warehouses.tf @@ -0,0 +1,54 @@ +locals { + suffix = length(var.suffix) == 0 ? "" : "-${var.suffix}" + + # Handle tags for AWS + aws_tags = var.cloud_name == "aws" ? { + custom_tags = { + key = "key" + value = "value" + } + } : {} +} + +resource "databricks_sql_endpoint" "this" { + for_each = { for endpoint in var.sql_endpoint : endpoint.name => endpoint } + + name = "${each.key}${local.suffix}" + cluster_size = each.value.cluster_size + auto_stop_mins = each.value.auto_stop_mins + max_num_clusters = each.value.max_num_clusters + enable_photon = each.value.enable_photon + enable_serverless_compute = each.value.enable_serverless_compute + spot_instance_policy = each.value.spot_instance_policy + warehouse_type = each.value.warehouse_type + + # Dynamic AWS tags block + dynamic "tags" { + for_each = var.cloud_name == "aws" ? [local.aws_tags] : [] + content { + custom_tags { + key = tags.value.custom_tags.key + value = tags.value.custom_tags.value + } + } + } +} + +resource "databricks_permissions" "sql_endpoint" { + for_each = { + for endpoint in var.sql_endpoint : endpoint.name => endpoint + if length(endpoint.permissions) != 0 + } + + sql_endpoint_id = databricks_sql_endpoint.this[each.key].id + + dynamic "access_control" { + for_each = { + for perm in each.value.permissions : perm.group_name => perm + } + content { + group_name = access_control.value.group_name + permission_level = access_control.value.permission_level + } + } +}