Skip to content

Commit 585610c

Browse files
authored
Merge pull request #2 from data-platform-hq/unity
feat: unity catalog
2 parents 8f41775 + d8d0f55 commit 585610c

File tree

5 files changed

+360
-3
lines changed

5 files changed

+360
-3
lines changed

README.md

Lines changed: 115 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,124 @@
1-
# Azure <> Terraform module
2-
Terraform module for creation Azure <>
1+
# Azure Unity Catalog Terraform module
2+
Terraform module for creation Azure Unity Catalog
33

44
## Usage
5+
```hcl
6+
# Prerequisite resources
57
8+
# Configure Databricks Provider
9+
data "azurerm_databricks_workspace" "example" {
10+
name = "example-workspace"
11+
resource_group_name = "example-rg"
12+
}
13+
14+
provider "databricks" {
15+
alias = "main"
16+
host = data.databricks_workspace.example.workspace_url
17+
azure_workspace_resource_id = data.databricks_workspace.example.id
18+
}
19+
20+
# Databricks Access Connector (managed identity)
21+
resource "azurerm_databricks_access_connector" "example" {
22+
name = "example-resource"
23+
resource_group_name = "example-rg"
24+
location = "eastus"
25+
26+
identity {
27+
type = "SystemAssigned"
28+
}
29+
}
30+
31+
# Storage Account
32+
data "azurerm_storage_account" "example" {
33+
name = "example-storage-account"
34+
resource_group_name = "example-rg"
35+
}
36+
37+
locals {
38+
catalog = {
39+
example_catalog = {
40+
catalog_grants = {
41+
"example@username.com" = ["USE_CATALOG", "USE_SCHEMA", "CREATE_SCHEMA", "CREATE_TABLE", "SELECT", "MODIFY"]
42+
}
43+
schema_name = ["raw", "refined", "data_product"]
44+
}
45+
}
46+
}
47+
48+
module "unity_catalog" {
49+
source = "../environment/modules/unity"
50+
51+
project = "datahq"
52+
env = "example"
53+
location = "eastus"
54+
access_connector_id = azurerm_databricks_access_connector.example.id
55+
storage_account_id = data.azurerm_storage_account.example.id
56+
storage_account_name = data.azurerm_storage_account.example.name
57+
catalog = local.catalog
58+
59+
providers = {
60+
databricks = databricks.main
61+
}
62+
}
63+
```
664
<!-- BEGIN_TF_DOCS -->
65+
## Requirements
66+
67+
| Name | Version |
68+
| ------------------------------------------------------------------------- | --------- |
69+
| <a name="requirement_terraform"></a> [terraform](#requirement\_terraform) | >= 1.0.0 |
70+
| <a name="requirement_databricks"></a> [databricks](#requirement\_databricks) | >= 1.14.2 |
71+
| <a name="requirement_azurerm"></a> [azurerm](#requirement\_azurerm) | >= 3.40.0 |
72+
73+
## Providers
74+
75+
| Name | Version |
76+
| ------------------------------------------------------------- | --------- |
77+
| <a name="provider_databricks"></a> [databricks](#provider\_databricks) | 1.14.2 |
78+
| <a name="provider_azurerm"></a> [azurerm](#provider\_azurerm) | 3.40.0 |
79+
80+
## Modules
81+
82+
No modules.
83+
84+
## Resources
85+
86+
| Name | Type |
87+
| ------------------------------------------------------------------------------------------------------------------------------------------------------- | -------- |
88+
| [azurerm_storage_data_lake_gen2_filesystem.this](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/storage_data_lake_gen2_filesystem) | resource |
89+
| [databricks_metastore.this](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/metastore) | resource |
90+
| [databricks_grants.metastore](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/grants) | resource |
91+
| [databricks_metastore_data_access.this](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/metastore_data_access) | resource |
92+
| [databricks_catalog.this](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/catalog) | resource |
93+
| [databricks_grants.catalog](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/grants) | resource |
94+
| [databricks_schema.this](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/sql_endpoint) | resource |
95+
| [databricks_grants.schema](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/schema) | resource |
96+
97+
## Inputs
98+
99+
| Name | Description | Type | Default | Required |
100+
| -------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------- | -------------- | ------- | :------: |
101+
| <a name="input_project"></a> [project](#input\_project) | Project name | `string` | n/a | yes |
102+
| <a name="input_env"></a> [env](#input\_env) | Environment name | `string` | n/a | yes |
103+
| <a name="input_location"></a> [location](#input\_location) | Azure location | `string` | n/a | yes |
104+
| <a name="input_suffix"></a> [suffix](#input\_suffix) | Optional suffix that would be added to the end of resources names. | `string` | " " | no |
105+
| <a name="input_create_metastore"></a> [create\_metastore](#input\_create\_metastore) | Boolean flag for Unity Catalog Metastore current in this environment. One Metastore per region | `bool` | true | no |
106+
| <a name="input_access_connector_id"></a> [access\_connector\_id](#input\_access\_connector\_id) | Databricks Access Connector Id that lets you to connect managed identities to an Azure Databricks account. Provides an ability to access Unity Catalog with assigned identity | `string` | " " | no |
107+
| <a name="input_storage_account_id"></a> [storage\_account\_id](#input\_storage\_account\_id) | Storage Account Id where Unity Catalog Metastore would be provisioned | `string` | " " | no |
108+
| <a name="input_storage_account_name"></a> [storage\_account\_name](#input\_storage\_account\_name) | Storage Account Name where Unity Catalog Metastore would be provisioned | `string` | " " | no |
109+
| <a name="input_external_metastore_id"></a> [external\_metastore\_id](#input\_external\_metastore\_id) | Unity Catalog Metastore Id that is located in separate environment. Provide this value to associate Databricks Workspace with target Metastore | `string` | " " | no |
110+
| <a name="input_catalog"></a> [catalog](#input\_catalog) | Map of objects which parameters refers to certain catalog and schema attributes | <pre> map(object({ <br> catalog_grants = optional(map(list(string))) <br> catalog_comment = optional(string) <br> catalog_properties = optional(map(string)) <br> schema_name = optional(list(string)) <br> schema_grants = optional(map(list(string))) <br> schema_comment = optional(string) <br> schema_properties = optional(map(string))<br>})) </pre> | {} | no |
111+
| <a name="input_metastore_grants"></a> [metastore\_grants](#input\_metastore\_grants) | Permissions to give on metastore to group | `map(list(string))` | {} | no |
112+
| <a name="input_custom_databricks_metastore_name"></a> [custom\_databricks\_metastore\_name](#input\_custom\_databricks\_metastore\_name) | The name to provide for your Databricks Metastore | `string` | null | no |
113+
114+
## Outputs
7115

116+
| Name | Description |
117+
| -------------------------------------------------------------------------- | -------------------------------------- |
118+
| <a name="output_metastore_id"></a> [metastore\_id](#output\_metastore\_id) | Unity Catalog Metastore Id. |
119+
| <a name="output_data_lake_gen2_file_system_id"></a> [data\_lake\_gen2\_file\_syste_id](#output\_data\_lake\_gen2\_file\_syste_id) | The ID of the Data Lake Gen2 File System. |
8120
<!-- END_TF_DOCS -->
9121

10122
## License
11123

12-
Apache 2 Licensed. For more information please see [LICENSE](https://github.com/data-platform-hq/terraform-azurerm<>/tree/master/LICENSE)
124+
Apache 2 Licensed. For more information please see [LICENSE](https://github.com/data-platform-hq/terraform-databricks-unity-catalog/tree/master/LICENSE)

main.tf

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
locals {
2+
# This optional suffix is added to the end of resource names.
3+
suffix = length(var.suffix) == 0 ? "" : "-${var.suffix}"
4+
databricks_metastore_name = var.custom_databricks_metastore_name == null ? "meta-${var.project}-${var.env}-${var.location}${local.suffix}" : var.custom_databricks_metastore_name
5+
}
6+
7+
resource "azurerm_storage_data_lake_gen2_filesystem" "this" {
8+
count = var.create_metastore ? 1 : 0
9+
10+
name = "meta-${var.project}-${var.env}"
11+
storage_account_id = var.storage_account_id
12+
13+
lifecycle {
14+
precondition {
15+
condition = alltrue([
16+
for variable in [var.storage_account_id, var.access_connector_id, var.storage_account_name] : false if length(variable) == 0
17+
])
18+
error_message = "To create Metastore in a Region it is required to provide proper values for these variables: access_connector_id, storage_account_id, storage_account_name"
19+
}
20+
}
21+
22+
}
23+
24+
resource "databricks_metastore" "this" {
25+
count = var.create_metastore ? 1 : 0
26+
27+
name = local.databricks_metastore_name
28+
storage_root = format("abfss://%s@%s.dfs.core.windows.net/", azurerm_storage_data_lake_gen2_filesystem.this[0].name, var.storage_account_name)
29+
force_destroy = true
30+
}
31+
32+
resource "databricks_grants" "metastore" {
33+
for_each = alltrue([!var.create_metastore, length(var.external_metastore_id) == 0]) ? {} : {
34+
for k, v in var.metastore_grants : k => v
35+
if v != null
36+
}
37+
38+
metastore = length(var.external_metastore_id) == 0 ? databricks_metastore.this[0].id : var.external_metastore_id
39+
grant {
40+
principal = each.key
41+
privileges = each.value
42+
}
43+
}
44+
45+
resource "databricks_metastore_data_access" "this" {
46+
count = var.create_metastore ? 1 : 0
47+
48+
metastore_id = databricks_metastore.this[0].id
49+
name = "data-access-${var.project}-${var.env}-${var.location}${local.suffix}"
50+
azure_managed_identity {
51+
access_connector_id = var.access_connector_id
52+
}
53+
is_default = true
54+
}
55+
56+
# Catalog
57+
resource "databricks_catalog" "this" {
58+
for_each = alltrue([!var.create_metastore, length(var.external_metastore_id) == 0]) ? {} : var.catalog
59+
60+
metastore_id = length(var.external_metastore_id) == 0 ? databricks_metastore.this[0].id : var.external_metastore_id
61+
name = each.key
62+
comment = lookup(each.value, "catalog_comment", "default comment")
63+
properties = merge(lookup(each.value, "catalog_properties", {}), { env = var.env })
64+
force_destroy = true
65+
}
66+
67+
# Catalog grants
68+
resource "databricks_grants" "catalog" {
69+
for_each = alltrue([!var.create_metastore, length(var.external_metastore_id) == 0]) ? {} : {
70+
for name, params in var.catalog : name => params.catalog_grants
71+
if params.catalog_grants != null
72+
}
73+
74+
catalog = databricks_catalog.this[each.key].name
75+
dynamic "grant" {
76+
for_each = each.value
77+
content {
78+
principal = grant.key
79+
privileges = grant.value
80+
}
81+
}
82+
}
83+
84+
# Schema
85+
locals {
86+
schema = flatten([
87+
for catalog, params in var.catalog : [
88+
for schema in params.schema_name : {
89+
catalog = catalog,
90+
schema = schema,
91+
comment = lookup(params, "schema_comment", "default comment"),
92+
properties = lookup(params, "schema_properties", {})
93+
}
94+
] if params.schema_name != null
95+
])
96+
}
97+
98+
resource "databricks_schema" "this" {
99+
for_each = alltrue([!var.create_metastore, length(var.external_metastore_id) == 0]) ? {} : {
100+
for entry in local.schema : "${entry.catalog}.${entry.schema}" => entry
101+
}
102+
103+
catalog_name = databricks_catalog.this[each.value.catalog].name
104+
name = each.value.schema
105+
comment = each.value.comment
106+
properties = merge(each.value.properties, { env = var.env })
107+
force_destroy = true
108+
}
109+
110+
# Schema grants
111+
locals {
112+
schema_grants = flatten([
113+
for catalog, params in var.catalog : [for schema in params.schema_name : [for principal in flatten(keys(params.schema_grants)) : {
114+
catalog = catalog,
115+
schema = schema,
116+
principal = principal,
117+
permission = flatten(values(params.schema_grants)),
118+
}]] if params.schema_grants != null
119+
])
120+
}
121+
122+
resource "databricks_grants" "schema" {
123+
for_each = alltrue([!var.create_metastore, length(var.external_metastore_id) == 0]) ? {} : {
124+
for entry in local.schema_grants : "${entry.catalog}.${entry.schema}.${entry.principal}" => entry
125+
}
126+
127+
schema = databricks_schema.this["${each.value.catalog}.${each.value.schema}"].id
128+
grant {
129+
principal = each.value.principal
130+
privileges = each.value.permission
131+
}
132+
}

outputs.tf

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
output "metastore_id" {
2+
value = var.create_metastore ? databricks_metastore.this[0].id : ""
3+
description = "Unity Catalog Metastore Id"
4+
}
5+
6+
output "data_lake_gen2_file_system_id" {
7+
value = var.create_metastore ? azurerm_storage_data_lake_gen2_filesystem.this[0].id : ""
8+
description = "The ID of the Data Lake Gen2 File System."
9+
}

variables.tf

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
variable "project" {
2+
type = string
3+
description = "Project name"
4+
}
5+
6+
variable "env" {
7+
type = string
8+
description = "Environment name"
9+
}
10+
11+
variable "location" {
12+
type = string
13+
description = "Azure location"
14+
}
15+
16+
variable "suffix" {
17+
type = string
18+
description = "Optional suffix that would be added to the end of resources names."
19+
default = ""
20+
}
21+
22+
# Unity Catalog variables
23+
variable "create_metastore" {
24+
type = bool
25+
description = "Boolean flag for Unity Catalog Metastore current in this environment. One Metastore per region"
26+
default = true
27+
}
28+
29+
variable "access_connector_id" {
30+
type = string
31+
description = "Databricks Access Connector Id that lets you to connect managed identities to an Azure Databricks account. Provides an ability to access Unity Catalog with assigned identity"
32+
default = ""
33+
}
34+
35+
variable "storage_account_id" {
36+
type = string
37+
description = "Storage Account Id where Unity Catalog Metastore would be provisioned"
38+
default = ""
39+
}
40+
41+
variable "storage_account_name" {
42+
type = string
43+
description = "Storage Account Name where Unity Catalog Metastore would be provisioned"
44+
default = ""
45+
}
46+
47+
variable "external_metastore_id" {
48+
type = string
49+
description = "Unity Catalog Metastore Id that is located in separate environment. Provide this value to associate Databricks Workspace with target Metastore"
50+
default = ""
51+
validation {
52+
condition = length(var.external_metastore_id) == 36 || length(var.external_metastore_id) == 0
53+
error_message = "UUID has to be either in nnnnnnnn-nnnn-nnnn-nnnn-nnnnnnnnnnnn format or empty string"
54+
}
55+
}
56+
57+
variable "catalog" {
58+
type = map(object({
59+
catalog_grants = optional(map(list(string)))
60+
catalog_comment = optional(string)
61+
catalog_properties = optional(map(string))
62+
schema_name = optional(list(string))
63+
schema_grants = optional(map(list(string)))
64+
schema_comment = optional(string)
65+
schema_properties = optional(map(string))
66+
}))
67+
description = "Map of catalog name and its parameters"
68+
default = {}
69+
}
70+
71+
# Metastore grants
72+
variable "metastore_grants" {
73+
type = map(list(string))
74+
description = "Permissions to give on metastore to group"
75+
default = {}
76+
validation {
77+
condition = values(var.metastore_grants) != null ? alltrue([
78+
for item in toset(flatten([for group, params in var.metastore_grants : params if params != null])) : contains([
79+
"CREATE_CATALOG", "CREATE_EXTERNAL_LOCATION", "CREATE_SHARE", "CREATE_RECIPIENT", "CREATE_PROVIDER"
80+
], item)
81+
]) : true
82+
error_message = "Metastore permission validation. The only possible values for permissions are: CREATE_CATALOG, CREATE_EXTERNAL_LOCATION, CREATE_SHARE, CREATE_RECIPIENT, CREATE_PROVIDER"
83+
}
84+
}
85+
86+
variable "custom_databricks_metastore_name" {
87+
type = string
88+
description = "The name to provide for your Databricks Metastore"
89+
default = null
90+
}

versions.tf

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
terraform {
2+
required_version = ">=1.0.0"
3+
4+
required_providers {
5+
azurerm = {
6+
source = "hashicorp/azurerm"
7+
version = ">=3.40.0"
8+
}
9+
databricks = {
10+
source = "databricks/databricks"
11+
version = ">=1.14.2"
12+
}
13+
}
14+
}

0 commit comments

Comments
 (0)