1
+ from datetime import datetime , time , timezone
2
+ from itertools import product
3
+ from typing import Any , Union , Optional
4
+
1
5
import sqlalchemy
6
+ from sqlalchemy .engine .interfaces import Dialect
2
7
from sqlalchemy .ext .compiler import compiles
3
8
4
- from typing import Union
9
+ from databricks . sql . utils import ParamEscaper
5
10
6
- from datetime import datetime , time
7
11
12
+ def process_literal_param_hack (value : Any ):
13
+ """This method is supposed to accept a Python type and return a string representation of that type.
14
+ But due to some weirdness in the way SQLAlchemy's literal rendering works, we have to return
15
+ the value itself because, by the time it reaches our custom type code, it's already been converted
16
+ into a string.
8
17
9
- from databricks .sql .utils import ParamEscaper
18
+ TimeTest
19
+ DateTimeTest
20
+ DateTimeTZTest
21
+
22
+ This dynamic only seems to affect the literal rendering of datetime and time objects.
23
+
24
+ All fail without this hack in-place. I'm not sure why. But it works.
25
+ """
26
+ return value
10
27
11
28
12
29
@compiles (sqlalchemy .types .Enum , "databricks" )
@@ -64,7 +81,7 @@ def compile_numeric_databricks(type_, compiler, **kw):
64
81
@compiles (sqlalchemy .types .DateTime , "databricks" )
65
82
def compile_datetime_databricks (type_ , compiler , ** kw ):
66
83
"""
67
- We need to override the default DateTime compilation rendering because Databricks uses "TIMESTAMP " instead of "DATETIME"
84
+ We need to override the default DateTime compilation rendering because Databricks uses "TIMESTAMP_NTZ " instead of "DATETIME"
68
85
"""
69
86
return "TIMESTAMP_NTZ"
70
87
@@ -87,13 +104,15 @@ def compile_array_databricks(type_, compiler, **kw):
87
104
return f"ARRAY<{ inner } >"
88
105
89
106
90
- class DatabricksDateTimeNoTimezoneType (sqlalchemy .types .TypeDecorator ):
91
- """The decimal that pysql creates when it receives the contents of a TIMESTAMP_NTZ
92
- includes a timezone of 'Etc/UTC'. But since SQLAlchemy's test suite assumes that
93
- the sqlalchemy.types.DateTime type will return a datetime.datetime _without_ any
94
- timezone set, we need to strip the timezone off the value received from pysql.
107
+ class TIMESTAMP_NTZ (sqlalchemy .types .TypeDecorator ):
108
+ """Represents values comprising values of fields year, month, day, hour, minute, and second.
109
+ All operations are performed without taking any time zone into account.
110
+
111
+ Our dialect maps sqlalchemy.types.DateTime() to this type, which means that all DateTime()
112
+ objects are stored without tzinfo. To read and write timezone-aware datetimes use
113
+ databricks.sql.TIMESTAMP instead.
95
114
96
- It's not clear if DBR sends a timezone to pysql or if pysql is adding it. This could be a bug.
115
+ https://docs.databricks.com/en/sql/language-manual/data-types/timestamp-ntz-type.html
97
116
"""
98
117
99
118
impl = sqlalchemy .types .DateTime
@@ -106,36 +125,115 @@ def process_result_value(self, value: Union[None, datetime], dialect):
106
125
return value .replace (tzinfo = None )
107
126
108
127
128
+ class TIMESTAMP (sqlalchemy .types .TypeDecorator ):
129
+ """Represents values comprising values of fields year, month, day, hour, minute, and second,
130
+ with the session local time-zone.
131
+
132
+ Our dialect maps sqlalchemy.types.DateTime() to TIMESTAMP_NTZ, which means that all DateTime()
133
+ objects are stored without tzinfo. To read and write timezone-aware datetimes use
134
+ this type instead.
135
+
136
+ ```python
137
+ # This won't work
138
+ `Column(sqlalchemy.DateTime(timezone=True))`
139
+
140
+ # But this does
141
+ `Column(TIMESTAMP)`
142
+ ````
143
+
144
+ https://docs.databricks.com/en/sql/language-manual/data-types/timestamp-type.html
145
+ """
146
+
147
+ impl = sqlalchemy .types .DateTime
148
+
149
+ cache_ok = True
150
+
151
+ def process_result_value (self , value : Union [None , datetime ], dialect ):
152
+ if value is None :
153
+ return None
154
+
155
+ if not value .tzinfo :
156
+ return value .replace (tzinfo = timezone .utc )
157
+ return value
158
+
159
+ def process_bind_param (
160
+ self , value : Union [datetime , None ], dialect
161
+ ) -> Optional [datetime ]:
162
+ """pysql can pass datetime.datetime() objects directly to DBR"""
163
+ return value
164
+
165
+ def process_literal_param (
166
+ self , value : Union [datetime , None ], dialect : Dialect
167
+ ) -> str :
168
+ """ """
169
+ return process_literal_param_hack (value )
170
+
171
+
172
+ @compiles (TIMESTAMP , "databricks" )
173
+ def compile_timestamp_databricks (type_ , compiler , ** kw ):
174
+ """
175
+ We need to override the default DateTime compilation rendering because Databricks uses "TIMESTAMP_NTZ" instead of "DATETIME"
176
+ """
177
+ return "TIMESTAMP"
178
+
179
+
109
180
class DatabricksTimeType (sqlalchemy .types .TypeDecorator ):
110
181
"""Databricks has no native TIME type. So we store it as a string."""
111
182
112
183
impl = sqlalchemy .types .Time
113
184
cache_ok = True
114
185
115
- TIME_WITH_MICROSECONDS_FMT = "%H:%M:%S.%f"
116
- TIME_NO_MICROSECONDS_FMT = "%H:%M:%S"
186
+ BASE_FMT = "%H:%M:%S"
187
+ MICROSEC_PART = ".%f"
188
+ TIMEZONE_PART = "%z"
189
+
190
+ def _generate_fmt_string (self , ms : bool , tz : bool ) -> str :
191
+ """Return a format string for datetime.strptime() that includes or excludes microseconds and timezone."""
192
+ _ = lambda x , y : x if y else ""
193
+ return f"{ self .BASE_FMT } { _ (self .MICROSEC_PART ,ms )} { _ (self .TIMEZONE_PART ,tz )} "
194
+
195
+ @property
196
+ def allowed_fmt_strings (self ):
197
+ """Time strings can be read with or without microseconds and with or without a timezone."""
198
+
199
+ if not hasattr (self , "_allowed_fmt_strings" ):
200
+ ms_switch = tz_switch = [True , False ]
201
+ self ._allowed_fmt_strings = [
202
+ self ._generate_fmt_string (x , y )
203
+ for x , y in product (ms_switch , tz_switch )
204
+ ]
205
+
206
+ return self ._allowed_fmt_strings
207
+
208
+ def _parse_result_string (self , value : str ) -> time :
209
+ """Parse a string into a time object. Try all allowed formats until one works."""
210
+ for fmt in self .allowed_fmt_strings :
211
+ try :
212
+ # We use timetz() here because we want to preserve the timezone information
213
+ # Calling .time() will strip the timezone information
214
+ return datetime .strptime (value , fmt ).timetz ()
215
+ except ValueError :
216
+ pass
217
+
218
+ raise ValueError (f"Could not parse time string { value } " )
219
+
220
+ def _determine_fmt_string (self , value : time ) -> str :
221
+ """Determine which format string to use to render a time object as a string."""
222
+ ms_bool = value .microsecond > 0
223
+ tz_bool = value .tzinfo is not None
224
+ return self ._generate_fmt_string (ms_bool , tz_bool )
117
225
118
226
def process_bind_param (self , value : Union [time , None ], dialect ) -> Union [None , str ]:
119
227
"""Values sent to the database are converted to %:H:%M:%S strings."""
120
228
if value is None :
121
229
return None
122
- return value .strftime (self .TIME_WITH_MICROSECONDS_FMT )
230
+ fmt_string = self ._determine_fmt_string (value )
231
+ return value .strftime (fmt_string )
123
232
124
233
# mypy doesn't like this workaround because TypeEngine wants process_literal_param to return a string
125
234
def process_literal_param (self , value , dialect ) -> time : # type: ignore
126
- """It's not clear to me why this is necessary. Without it, SQLAlchemy's Timetest:test_literal fails
127
- because the string literal renderer receives a str() object and calls .isoformat() on it.
128
-
129
- Whereas this method receives a datetime.time() object which is subsequently passed to that
130
- same renderer. And that works.
131
-
132
- UPDATE: After coping with the literal_processor override in DatabricksStringType, I suspect a similar
133
- mechanism is at play. Two different processors are are called in sequence. This is likely a byproduct
134
- of Databricks not having a true TIME type. I think the string representation of Time() types is
135
- somehow affecting the literal rendering process. But as long as this passes the tests, I'm not
136
- worried about it.
137
- """
138
- return value
235
+ """ """
236
+ return process_literal_param_hack (value )
139
237
140
238
def process_result_value (
141
239
self , value : Union [None , str ], dialect
@@ -144,13 +242,7 @@ def process_result_value(
144
242
if value is None :
145
243
return None
146
244
147
- try :
148
- _parsed = datetime .strptime (value , self .TIME_WITH_MICROSECONDS_FMT )
149
- except ValueError :
150
- # If the string doesn't have microseconds, try parsing it without them
151
- _parsed = datetime .strptime (value , self .TIME_NO_MICROSECONDS_FMT )
152
-
153
- return _parsed .time ()
245
+ return self ._parse_result_string (value )
154
246
155
247
156
248
class DatabricksStringType (sqlalchemy .types .TypeDecorator ):
0 commit comments