1
+ """
2
+ databricks-sql-connector includes a SQLAlchemy 2.0 dialect compatible with Databricks SQL. To install
3
+ its dependencies you can run `pip install databricks-sql-connector[sqlalchemy]`.
4
+
5
+ The expected connection string format which you can pass to create_engine() is:
6
+
7
+ databricks://token:dapi***@***.cloud.databricks.com?http_path=/sql/***&catalog=**&schema=**
8
+
9
+ Our dialect implements the majority of SQLAlchemy 2.0's API. Because of the extent of SQLAlchemy's
10
+ capabilities it isn't feasible to provide examples of every usage in a single script, so we only
11
+ provide a basic one here. Learn more about usage in README.sqlalchemy.md in this repo.
12
+ """
13
+
14
+ # fmt: off
15
+
16
+ import os
17
+ from datetime import date , datetime , time , timedelta , timezone
18
+ from decimal import Decimal
19
+ from uuid import UUID
20
+
21
+ # By convention, backend-specific SQLA types are defined in uppercase
22
+ # This dialect exposes Databricks SQL's TIMESTAMP and TINYINT types
23
+ # as these are not covered by the generic, camelcase types shown below
24
+ from databricks .sqlalchemy import TIMESTAMP , TINYINT
25
+
26
+ # Beside the CamelCase types shown below, line comments reflect
27
+ # the underlying Databricks SQL / Delta table type
28
+ from sqlalchemy import (
29
+ BigInteger , # BIGINT
30
+ Boolean , # BOOLEAN
31
+ Column ,
32
+ Date , # DATE
33
+ DateTime , # TIMESTAMP_NTZ
34
+ Integer , # INTEGER
35
+ Numeric , # DECIMAL
36
+ String , # STRING
37
+ Time , # STRING
38
+ Uuid , # STRING
39
+ create_engine ,
40
+ select ,
41
+ )
42
+ from sqlalchemy .orm import DeclarativeBase , Session
43
+
44
+ host = "e2-dogfood.staging.cloud.databricks.com"
45
+ http_path = "/sql/1.0/warehouses/58aa1b363649e722"
46
+ access_token = ""
47
+ catalog = "___________________first"
48
+ schema = "jprakash-test"
49
+
50
+
51
+ # Extra arguments are passed untouched to databricks-sql-connector
52
+ # See src/databricks/sql/thrift_backend.py for complete list
53
+ extra_connect_args = {
54
+ "_tls_verify_hostname" : True ,
55
+ "_user_agent_entry" : "PySQL Example Script" ,
56
+ }
57
+
58
+
59
+ engine = create_engine (
60
+ f"databricks://token:{ access_token } @{ host } ?http_path={ http_path } &catalog={ catalog } &schema={ schema } " ,
61
+ connect_args = extra_connect_args , echo = True ,
62
+ )
63
+
64
+
65
+ class Base (DeclarativeBase ):
66
+ pass
67
+
68
+
69
+ # This object gives a usage example for each supported type
70
+ # for more details on these, see README.sqlalchemy.md
71
+ class SampleObject (Base ):
72
+ __tablename__ = "pysql_sqlalchemy_example_table"
73
+
74
+ bigint_col = Column (BigInteger , primary_key = True )
75
+ string_col = Column (String )
76
+ tinyint_col = Column (TINYINT )
77
+ int_col = Column (Integer )
78
+ numeric_col = Column (Numeric (10 , 2 ))
79
+ boolean_col = Column (Boolean )
80
+ date_col = Column (Date )
81
+ datetime_col = Column (TIMESTAMP )
82
+ datetime_col_ntz = Column (DateTime )
83
+ time_col = Column (Time )
84
+ uuid_col = Column (Uuid )
85
+
86
+ # This generates a CREATE TABLE statement against the catalog and schema
87
+ # specified in the connection string
88
+ Base .metadata .create_all (engine )
89
+
90
+ # Output SQL is:
91
+ # CREATE TABLE pysql_sqlalchemy_example_table (
92
+ # bigint_col BIGINT NOT NULL,
93
+ # string_col STRING,
94
+ # tinyint_col SMALLINT,
95
+ # int_col INT,
96
+ # numeric_col DECIMAL(10, 2),
97
+ # boolean_col BOOLEAN,
98
+ # date_col DATE,
99
+ # datetime_col TIMESTAMP,
100
+ # datetime_col_ntz TIMESTAMP_NTZ,
101
+ # time_col STRING,
102
+ # uuid_col STRING,
103
+ # PRIMARY KEY (bigint_col)
104
+ # ) USING DELTA
105
+
106
+ # The code that follows will INSERT a record using SQLAlchemy ORM containing these values
107
+ # and then SELECT it back out. The output is compared to the input to demonstrate that
108
+ # all type information is preserved.
109
+ sample_object = {
110
+ "bigint_col" : 1234567890123456789 ,
111
+ "string_col" : "foo" ,
112
+ "tinyint_col" : - 100 ,
113
+ "int_col" : 5280 ,
114
+ "numeric_col" : Decimal ("525600.01" ),
115
+ "boolean_col" : True ,
116
+ "date_col" : date (2020 , 12 , 25 ),
117
+ "datetime_col" : datetime (
118
+ 1991 , 8 , 3 , 21 , 30 , 5 , tzinfo = timezone (timedelta (hours = - 8 ))
119
+ ),
120
+ "datetime_col_ntz" : datetime (1990 , 12 , 4 , 6 , 33 , 41 ),
121
+ "time_col" : time (23 , 59 , 59 ),
122
+ "uuid_col" : UUID (int = 255 ),
123
+ }
124
+ sa_obj = SampleObject (** sample_object )
125
+
126
+ session = Session (engine )
127
+ session .add (sa_obj )
128
+ session .commit ()
129
+
130
+ # Output SQL is:
131
+ # INSERT INTO
132
+ # pysql_sqlalchemy_example_table (
133
+ # bigint_col,
134
+ # string_col,
135
+ # tinyint_col,
136
+ # int_col,
137
+ # numeric_col,
138
+ # boolean_col,
139
+ # date_col,
140
+ # datetime_col,
141
+ # datetime_col_ntz,
142
+ # time_col,
143
+ # uuid_col
144
+ # )
145
+ # VALUES
146
+ # (
147
+ # :bigint_col,
148
+ # :string_col,
149
+ # :tinyint_col,
150
+ # :int_col,
151
+ # :numeric_col,
152
+ # :boolean_col,
153
+ # :date_col,
154
+ # :datetime_col,
155
+ # :datetime_col_ntz,
156
+ # :time_col,
157
+ # :uuid_col
158
+ # )
159
+
160
+ # Here we build a SELECT query using ORM
161
+ stmt = select (SampleObject ).where (SampleObject .int_col == 5280 )
162
+
163
+ # Then fetch one result with session.scalar()
164
+ result = session .scalar (stmt )
165
+
166
+ # Finally, we read out the input data and compare it to the output
167
+ compare = {key : getattr (result , key ) for key in sample_object .keys ()}
168
+ assert compare == sample_object
169
+
170
+ # Then we drop the demonstration table
171
+ Base .metadata .drop_all (engine )
172
+
173
+ # Output SQL is:
174
+ # DROP TABLE pysql_sqlalchemy_example_table
0 commit comments