Skip to content

Commit 2f6a5cc

Browse files
Merge pull request #179 from databrickslabs/feature/v0.0.10
merging Feature/v0.0.10 branch
2 parents db158c2 + cfd8b1d commit 2f6a5cc

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+1406
-206
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ In practice, a single generic DLT pipeline reads the Dataflowspec and uses it to
8282
| Liquid cluster support | Bronze, Bronze Quarantine, Silver tables|
8383
| [DLT-META CLI](https://databrickslabs.github.io/dlt-meta/getting_started/dltmeta_cli/) | ```databricks labs dlt-meta onboard```, ```databricks labs dlt-meta deploy``` |
8484
| Bronze and Silver pipeline chaining | Deploy dlt-meta pipeline with ```layer=bronze_silver``` option using Direct publishing mode |
85+
| [DLT Sinks](https://docs.databricks.com/aws/en/delta-live-tables/dlt-sinks) |Supported formats:external ```delta table```, ```kafka```.Bronze, Silver layers|
8586

8687
## Getting Started
8788

demo/README.md

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
4. [Append FLOW Eventhub Demo](#append-flow-eventhub-demo): Write to same target from multiple sources using [dlt.append_flow](https://docs.databricks.com/en/delta-live-tables/flows.html#append-flows) and adding [File metadata column](https://docs.databricks.com/en/ingestion/file-metadata-column.html)
66
5. [Silver Fanout Demo](#silver-fanout-demo): This demo showcases the implementation of fanout architecture in the silver layer.
77
6. [Apply Changes From Snapshot Demo](#Apply-changes-from-snapshot-demo): This demo showcases the implementation of ingesting from snapshots in bronze layer
8+
7. [DLT Sink Demo](#dlt-sink-demo): This demo showcases the implementation of write to external sinks like delta and kafka
89

910
The source argument is optional for the demos.
1011

@@ -257,3 +258,53 @@ This demo will perform following tasks:
257258
python demo/launch_acfs_demo.py --uc_catalog_name=<<uc catalog name>> --profile=<<DEFAULT>>
258259
```
259260
![acfs.png](../docs/static/images/acfs.png)
261+
262+
# DLT Sink Demo
263+
- This demo will perform following steps
264+
- Showcase onboarding process for dlt writing to external sink pattern
265+
- Run onboarding for the bronze iot events.
266+
- Publish test events to kafka topic
267+
- Run Bronze DLT which will read from kafka source topic and write to
268+
- events delta table into uc
269+
- create quarantine table as per data quality expectations
270+
- writes to external kafka topics
271+
- writes to external dbfs location as external delta sink
272+
### Steps:
273+
1. Launch Command Prompt
274+
275+
2. Install [Databricks CLI](https://docs.databricks.com/dev-tools/cli/index.html)
276+
277+
3. ```commandline
278+
git clone https://github.com/databrickslabs/dlt-meta.git
279+
```
280+
281+
4. ```commandline
282+
cd dlt-meta
283+
```
284+
5. Set python environment variable into terminal
285+
```commandline
286+
dlt_meta_home=$(pwd)
287+
```
288+
```commandline
289+
export PYTHONPATH=$dlt_meta_home
290+
```
291+
292+
6. Optional: if you are using secrets for kafka. Create databricks secrets scope for source and sink kafka using below command
293+
```commandline
294+
databricks secrets create-scope <<name>>
295+
```
296+
```commandline
297+
databricks secrets put-secret --json '{
298+
"scope": "<<name>>",
299+
"key": "<<keyname>>",
300+
"string_value": "<<value>>"
301+
}'
302+
```
303+
304+
7. Run the command
305+
```commandline
306+
python demo/launch_dlt_sink_demo.py --uc_catalog_name=<<uc_catalog_name>> --source=kafka --kafka_source_topic=<<kafka source topic name>>>> --kafka_sink_topic=<<kafka sink topic name>> --kafka_source_servers_secrets_scope_name=<<kafka source servers secret name>> --kafka_source_servers_secrets_scope_key=<<kafka source server secret scope key name>> --kafka_sink_servers_secret_scope_name=<<kafka sink server secret scope key name>> --kafka_sink_servers_secret_scope_key=<<kafka sink servers secret scope key name>> --profile=<<DEFAULT>>
307+
```
308+
![dlt_demo_sink.png](../docs/static/images/dlt_demo_sink.png)
309+
![dlt_delta_sink.png](../docs/static/images/dlt_delta_sink.png)
310+
![dlt_kafka_sink.png](../docs/static/images/dlt_kafka_sink.png)

demo/conf/cloudfiles-onboarding.template

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
},
1919
"source_schema_path": "{uc_volume_path}/demo/resources/data/afam/ddl/customers.ddl"
2020
},
21+
"bronze_catalog_demo": "{uc_catalog_name}",
2122
"bronze_database_demo": "{uc_catalog_name}.{bronze_schema}",
2223
"bronze_table": "customers",
2324
"bronze_reader_options": {
@@ -30,7 +31,8 @@
3031
"pipelines.autoOptimize.zOrderCols": "id, email"
3132
},
3233
"bronze_data_quality_expectations_json_demo": "{uc_volume_path}/demo/conf/dqe/customers/bronze_data_quality_expectations.json",
33-
"bronze_database_quarantine_demo": "{uc_catalog_name}.{bronze_schema}",
34+
"bronze_catalog_quarantine_demo": "{uc_catalog_name}",
35+
"bronze_database_quarantine_demo": "{bronze_schema}",
3436
"bronze_quarantine_table": "customers_quarantine",
3537
"bronze_quarantine_table_properties": {
3638
"pipelines.reset.allowed": "false",
@@ -53,7 +55,8 @@
5355
"once": false
5456
}
5557
],
56-
"silver_database_demo": "{uc_catalog_name}.{silver_schema}",
58+
"silver_catalog_demo": "{uc_catalog_name}",
59+
"silver_database_demo": "{silver_schema}",
5760
"silver_table": "customers",
5861
"silver_transformation_json_demo": "{uc_volume_path}/demo/conf/afam_silver_transformations.json",
5962
"silver_table_properties": {
@@ -91,8 +94,9 @@
9194
}
9295
},
9396
"source_schema_path": "{uc_volume_path}/demo/resources/data/afam/ddl/transactions.ddl"
94-
},
95-
"bronze_database_demo": "{uc_catalog_name}.{bronze_schema}",
97+
},
98+
"bronze_catalog_demo": "{uc_catalog_name}",
99+
"bronze_database_demo": "{bronze_schema}",
96100
"bronze_table": "transactions",
97101
"bronze_reader_options": {
98102
"cloudFiles.format": "json",
@@ -104,7 +108,8 @@
104108
"pipelines.autoOptimize.zOrderCols": "id, customer_id"
105109
},
106110
"bronze_data_quality_expectations_json_demo": "{uc_volume_path}/demo/conf/dqe/transactions/bronze_data_quality_expectations.json",
107-
"bronze_database_quarantine_demo": "{uc_catalog_name}.{bronze_schema}",
111+
"bronze_catalog_quarantine_demo": "{uc_catalog_name}",
112+
"bronze_database_quarantine_demo": "{bronze_schema}",
108113
"bronze_quarantine_table": "transactions_quarantine",
109114
"bronze_quarantine_table_properties": {
110115
"pipelines.reset.allowed": "true",
@@ -128,7 +133,8 @@
128133
"once": false
129134
}
130135
],
131-
"silver_database_demo": "{uc_catalog_name}.{silver_schema}",
136+
"silver_catalog_demo": "{uc_catalog_name}",
137+
"silver_database_demo": "{silver_schema}",
132138
"silver_table": "transactions",
133139
"silver_cdc_apply_changes": {
134140
"keys": [

demo/conf/cloudfiles-onboarding_A2.template

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@
1616
},
1717
"source_schema_path": "{uc_volume_path}/demo/resources/data/afam/ddl/customers.ddl"
1818
},
19-
"bronze_database_demo": "{uc_catalog_name}.{bronze_schema}",
19+
"bronze_catalog_demo": "{uc_catalog_name}",
20+
"bronze_database_demo": "{bronze_schema}",
2021
"bronze_table": "customers_delta",
2122
"bronze_reader_options": {
2223
"cloudFiles.format": "json",
@@ -28,7 +29,8 @@
2829
"pipelines.autoOptimize.zOrderCols": "id, email"
2930
},
3031
"bronze_data_quality_expectations_json_demo": "{uc_volume_path}/demo/conf/dqe/customers/bronze_data_quality_expectations.json",
31-
"bronze_database_quarantine_demo": "{uc_catalog_name}.{bronze_schema}",
32+
"bronze_catalog_quarantine_demo": "{uc_catalog_name}",
33+
"bronze_database_quarantine_demo": "{bronze_schema}",
3234
"bronze_quarantine_table": "customers_delta_quarantine",
3335
"bronze_quarantine_table_properties": {
3436
"pipelines.reset.allowed": "false",

demo/conf/eventhub-onboarding.template

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,13 @@
2222
"kafka.request.timeout.ms": "60000",
2323
"kafka.session.timeout.ms": "60000"
2424
},
25-
"bronze_database_demo": "{uc_catalog_name}.{bronze_schema}",
25+
"bronze_catalog_demo": "{uc_catalog_name}",
26+
"bronze_database_demo": "{bronze_schema}",
2627
"bronze_table": "bronze_{run_id}_iot",
2728
"bronze_partition_columns": "date",
2829
"bronze_data_quality_expectations_json_demo": "{uc_volume_path}/demo/conf/dqe/iot/bronze_data_quality_expectations.json",
29-
"bronze_database_quarantine_demo": "{uc_catalog_name}.{bronze_schema}",
30+
"bronze_catalog_quarantine_demo": "{uc_catalog_name}",
31+
"bronze_database_quarantine_demo": "{bronze_schema}",
3032
"bronze_quarantine_table": "bronze_{run_id}_iot_quarantine",
3133
"bronze_quarantine_table_path_demo": "{uc_volume_path}/data/bronze/iot_quarantine",
3234
"bronze_append_flows": [
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
[
2+
{
3+
"data_flow_id": "103",
4+
"data_flow_group": "A1",
5+
"source_system": "Sensor Device",
6+
"source_format": "kafka",
7+
"source_details": {
8+
"source_schema_path": "{uc_volume_path}/demo/resources/ddl/eventhub_iot_schema.ddl",
9+
"subscribe": "{kafka_source_topic}",
10+
"kafka.security.protocol": "PLAINTEXT",
11+
"kafka_source_servers_secrets_scope_name":"{kafka_source_servers_secrets_scope_name}",
12+
"kafka_source_servers_secrets_scope_key":"{kafka_source_servers_secrets_scope_key}"
13+
},
14+
"bronze_reader_options": {
15+
"startingOffsets": "earliest",
16+
"kafka.request.timeout.ms": "60000",
17+
"kafka.session.timeout.ms": "60000"
18+
},
19+
"bronze_catalog_demo": "{uc_catalog_name}",
20+
"bronze_database_demo": "{bronze_schema}",
21+
"bronze_table": "bronze_{run_id}_iot",
22+
"bronze_partition_columns": "date",
23+
"bronze_table_path_demo": "{uc_volume_path}/data/bronze/iot",
24+
"bronze_data_quality_expectations_json_demo": "{uc_volume_path}/demo/conf/dqe/iot/bronze_data_quality_expectations.json",
25+
"bronze_catalog_quarantine_demo": "{uc_catalog_name}",
26+
"bronze_database_quarantine_demo": "{bronze_schema}",
27+
"bronze_quarantine_table": "bronze_{run_id}_iot_quarantine",
28+
"bronze_quarantine_table_path_demo": "{uc_volume_path}/data/bronze/iot_quarantine",
29+
"bronze_sinks": [
30+
{
31+
"name": "bronze_customer_kafka_sink",
32+
"format": "kafka",
33+
"options": {
34+
"kafka_sink_servers_secret_scope_name":"{kafka_sink_servers_secret_scope_name}",
35+
"kafka_sink_servers_secret_scope_key":"{kafka_sink_servers_secret_scope_key}",
36+
"kafka.security.protocol":"PLAINTEXT",
37+
"kafka.security.protocol":"PLAINTEXT",
38+
"topic":"{kafka_sink_topic}"
39+
},
40+
"select_exp":["value"],
41+
"where_clause":"value is not null"
42+
},
43+
{
44+
"name": "bronze_customer_delta_sink",
45+
"format": "delta",
46+
"options": {
47+
"path":"dbfs:/mnt/dltmeta_sink/iot"
48+
},
49+
"select_exp":["value"],
50+
"where_clause":"value is not null"
51+
}
52+
]
53+
}
54+
]

demo/conf/onboarding.template

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,10 @@
1010
"source_path_prod": "{uc_volume_path}/demo/resources/data/customers",
1111
"source_schema_path": "{uc_volume_path}/demo/resources/ddl/customers.ddl"
1212
},
13-
"bronze_database_prod": "{uc_catalog_name}.{bronze_schema}",
13+
"bronze_catalog_prod": "{uc_catalog_name}",
14+
"bronze_database_prod": "{bronze_schema}",
1415
"bronze_table": "customers",
16+
"bronze_table_comment": "customers bronze table",
1517
"bronze_table_path_prod": "{uc_volume_path}/data/bronze/customers",
1618
"bronze_reader_options": {
1719
"cloudFiles.format": "csv",
@@ -20,11 +22,15 @@
2022
},
2123
"bronze_cluster_by":["customer_id"],
2224
"bronze_data_quality_expectations_json_prod": "{uc_volume_path}/demo/conf/dqe/customers.json",
23-
"bronze_database_quarantine_prod": "{uc_catalog_name}.{bronze_schema}",
25+
"bronze_catalog_quarantine_prod": "{uc_catalog_name}",
26+
"bronze_database_quarantine_prod": "{bronze_schema}",
2427
"bronze_quarantine_table": "customers_quarantine",
28+
"bronze_quarantine_table_comment": "customers quarantine table",
2529
"bronze_quarantine_table_path_prod": "{uc_volume_path}/data/bronze/customers_quarantine",
26-
"silver_database_prod": "{uc_catalog_name}.{silver_schema}",
30+
"silver_catalog_prod": "{uc_catalog_name}",
31+
"silver_database_prod": "{silver_schema}",
2732
"silver_table": "customers",
33+
"silver_table_comment": "customers silver table",
2834
"silver_table_path_prod": "{uc_volume_path}/data/silver/customers",
2935
"silver_cdc_apply_changes": {
3036
"keys": [
@@ -55,8 +61,10 @@
5561
"source_path_prod": "{uc_volume_path}/demo/resources/data/transactions",
5662
"source_schema_path": "{uc_volume_path}/demo/resources/ddl/transactions.ddl"
5763
},
58-
"bronze_database_prod": "{uc_catalog_name}.{bronze_schema}",
64+
"bronze_catalog_prod": "{uc_catalog_name}",
65+
"bronze_database_prod": "{bronze_schema}",
5966
"bronze_table": "transactions",
67+
"bronze_table_comment": "transactions bronze table",
6068
"bronze_table_path_prod": "{uc_volume_path}/data/bronze/transactions",
6169
"bronze_reader_options": {
6270
"cloudFiles.format": "csv",
@@ -66,9 +74,12 @@
6674
"bronze_data_quality_expectations_json_prod": "{uc_volume_path}/demo/conf/dqe/transactions.json",
6775
"bronze_database_quarantine_prod": "{uc_catalog_name}.{bronze_schema}",
6876
"bronze_quarantine_table": "transactions_quarantine",
77+
"bronze_quarantine_table_comment": "transactions bronze quarantine table",
6978
"bronze_quarantine_table_path_prod": "{uc_volume_path}/demo/resources/data/bronze/transactions_quarantine",
70-
"silver_database_prod": "{uc_catalog_name}.{silver_schema}",
79+
"silver_catalog_prod": "{uc_catalog_name}",
80+
"silver_database_prod": "{silver_schema}",
7181
"silver_table": "transactions",
82+
"silver_table_comment": "transactions silver table",
7283
"silver_table_path_prod": "{uc_volume_path}/data/silver/transactions",
7384
"silver_cdc_apply_changes": {
7485
"keys": [
@@ -98,8 +109,10 @@
98109
"source_path_prod": "{uc_volume_path}/demo/resources/data/products",
99110
"source_schema_path": "{uc_volume_path}/demo/resources/ddl/products.ddl"
100111
},
101-
"bronze_database_prod": "{uc_catalog_name}.{bronze_schema}",
112+
"bronze_catalog_prod": "{uc_catalog_name}",
113+
"bronze_database_prod": "{bronze_schema}",
102114
"bronze_table": "products",
115+
"bronze_table_comment": "products bronze table",
103116
"bronze_table_path_prod": "{uc_volume_path}/data/bronze/products",
104117
"bronze_reader_options": {
105118
"cloudFiles.format": "csv",
@@ -110,9 +123,12 @@
110123
"bronze_data_quality_expectations_json_prod": "{uc_volume_path}/demo/conf/dqe/products.json",
111124
"bronze_database_quarantine_prod": "{uc_catalog_name}.{bronze_schema}",
112125
"bronze_quarantine_table": "products_quarantine",
126+
"bronze_quarantine_table_comment": "products quarantine bronze table",
113127
"bronze_quarantine_table_path_prod": "{uc_volume_path}/demo/resources/data/bronze/products_quarantine",
114-
"silver_database_prod": "{uc_catalog_name}.{silver_schema}",
128+
"silver_catalog_prod": "{uc_catalog_name}",
129+
"silver_database_prod": "{silver_schema}",
115130
"silver_table": "products",
131+
"silver_table_comment": "products silver table",
116132
"silver_table_path_prod": "{uc_volume_path}/data/silver/products",
117133
"silver_cdc_apply_changes": {
118134
"keys": [
@@ -142,8 +158,10 @@
142158
"source_path_prod": "{uc_volume_path}/demo/resources/data/stores",
143159
"source_schema_path": "{uc_volume_path}/demo/resources/ddl/stores.ddl"
144160
},
145-
"bronze_database_prod": "{uc_catalog_name}.{bronze_schema}",
161+
"bronze_catalog_prod": "{uc_catalog_name}",
162+
"bronze_database_prod": "{bronze_schema}",
146163
"bronze_table": "stores",
164+
"bronze_table_comment": "stores bronze table",
147165
"bronze_table_path_prod": "{uc_volume_path}/data/bronze/stores",
148166
"bronze_reader_options": {
149167
"cloudFiles.format": "csv",
@@ -152,11 +170,15 @@
152170
},
153171
"bronze_table_path_prod": "{uc_volume_path}/demo/resources/data/bronze/stores",
154172
"bronze_data_quality_expectations_json_prod": "{uc_volume_path}/demo/conf/dqe/stores.json",
155-
"bronze_database_quarantine_prod": "{uc_catalog_name}.{bronze_schema}",
173+
"bronze_catalog_quarantine_prod": "{uc_catalog_name}",
174+
"bronze_database_quarantine_prod": "{bronze_schema}",
156175
"bronze_quarantine_table": "stores_quarantine",
176+
"bronze_quarantine_table_comment": "stores quarantine bronze table",
157177
"bronze_quarantine_table_path_prod": "{uc_volume_path}/demo/resources/data/bronze/stores_quarantine",
158-
"silver_database_prod": "{uc_catalog_name}.{silver_schema}",
178+
"silver_catalog_prod": "{uc_catalog_name}",
179+
"silver_database_prod": "{silver_schema}",
159180
"silver_table": "stores",
181+
"silver_table_comment": "stores silver table",
160182
"silver_table_path_prod": "{uc_volume_path}/data/silver/stores",
161183
"silver_cdc_apply_changes": {
162184
"keys": [

demo/conf/onboarding_cars.template

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,19 @@
77
"source_details": {
88
"source_path_demo": "{uc_volume_path}/demo/resources/data/cars"
99
},
10-
"bronze_database_demo": "{uc_catalog_name}.{bronze_schema}",
10+
"bronze_catalog_demo": "{uc_catalog_name}",
11+
"bronze_database_demo": "{bronze_schema}",
1112
"bronze_table": "cars",
13+
"bronze_table_comment": "bronze cars table",
1214
"bronze_reader_options": {
1315
"cloudFiles.format": "csv",
1416
"cloudFiles.rescuedDataColumn": "_rescued_data",
1517
"header": "true"
1618
},
17-
"silver_database_demo": "{uc_catalog_name}.{silver_schema}",
19+
"silver_catalog_demo": "{uc_catalog_name}",
20+
"silver_database_demo": "{silver_schema}",
1821
"silver_table": "cars_usa",
22+
"silver_table_comment": "cars_usa silver table",
1923
"silver_transformation_json_demo": "{uc_volume_path}/demo/conf/silver_transformations_cars.json"
2024
}
2125
]

0 commit comments

Comments
 (0)