Skip to content

Commit 1f4714b

Browse files
author
Jesse
authored
SQLAlchemy 2: Expose TIMESTAMP and TIMESTAMP_NTZ types to users (databricks#268)
Signed-off-by: Jesse Whitehouse <jesse.whitehouse@databricks.com>
1 parent abe6891 commit 1f4714b

File tree

10 files changed

+185
-62
lines changed

10 files changed

+185
-62
lines changed

src/databricks/sqlalchemy/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
from databricks.sqlalchemy.base import DatabricksDialect
2-
from databricks.sqlalchemy._types import TINYINT
2+
from databricks.sqlalchemy._types import TINYINT, TIMESTAMP, TIMESTAMP_NTZ
33

4-
__all__ = ["TINYINT"]
4+
__all__ = ["TINYINT", "TIMESTAMP", "TIMESTAMP_NTZ"]

src/databricks/sqlalchemy/_types.py

Lines changed: 125 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,29 @@
1+
from datetime import datetime, time, timezone
2+
from itertools import product
3+
from typing import Any, Union, Optional
4+
15
import sqlalchemy
6+
from sqlalchemy.engine.interfaces import Dialect
27
from sqlalchemy.ext.compiler import compiles
38

4-
from typing import Union
9+
from databricks.sql.utils import ParamEscaper
510

6-
from datetime import datetime, time
711

12+
def process_literal_param_hack(value: Any):
13+
"""This method is supposed to accept a Python type and return a string representation of that type.
14+
But due to some weirdness in the way SQLAlchemy's literal rendering works, we have to return
15+
the value itself because, by the time it reaches our custom type code, it's already been converted
16+
into a string.
817
9-
from databricks.sql.utils import ParamEscaper
18+
TimeTest
19+
DateTimeTest
20+
DateTimeTZTest
21+
22+
This dynamic only seems to affect the literal rendering of datetime and time objects.
23+
24+
All fail without this hack in-place. I'm not sure why. But it works.
25+
"""
26+
return value
1027

1128

1229
@compiles(sqlalchemy.types.Enum, "databricks")
@@ -64,7 +81,7 @@ def compile_numeric_databricks(type_, compiler, **kw):
6481
@compiles(sqlalchemy.types.DateTime, "databricks")
6582
def compile_datetime_databricks(type_, compiler, **kw):
6683
"""
67-
We need to override the default DateTime compilation rendering because Databricks uses "TIMESTAMP" instead of "DATETIME"
84+
We need to override the default DateTime compilation rendering because Databricks uses "TIMESTAMP_NTZ" instead of "DATETIME"
6885
"""
6986
return "TIMESTAMP_NTZ"
7087

@@ -87,13 +104,15 @@ def compile_array_databricks(type_, compiler, **kw):
87104
return f"ARRAY<{inner}>"
88105

89106

90-
class DatabricksDateTimeNoTimezoneType(sqlalchemy.types.TypeDecorator):
91-
"""The decimal that pysql creates when it receives the contents of a TIMESTAMP_NTZ
92-
includes a timezone of 'Etc/UTC'. But since SQLAlchemy's test suite assumes that
93-
the sqlalchemy.types.DateTime type will return a datetime.datetime _without_ any
94-
timezone set, we need to strip the timezone off the value received from pysql.
107+
class TIMESTAMP_NTZ(sqlalchemy.types.TypeDecorator):
108+
"""Represents values comprising values of fields year, month, day, hour, minute, and second.
109+
All operations are performed without taking any time zone into account.
110+
111+
Our dialect maps sqlalchemy.types.DateTime() to this type, which means that all DateTime()
112+
objects are stored without tzinfo. To read and write timezone-aware datetimes use
113+
databricks.sql.TIMESTAMP instead.
95114
96-
It's not clear if DBR sends a timezone to pysql or if pysql is adding it. This could be a bug.
115+
https://docs.databricks.com/en/sql/language-manual/data-types/timestamp-ntz-type.html
97116
"""
98117

99118
impl = sqlalchemy.types.DateTime
@@ -106,36 +125,115 @@ def process_result_value(self, value: Union[None, datetime], dialect):
106125
return value.replace(tzinfo=None)
107126

108127

128+
class TIMESTAMP(sqlalchemy.types.TypeDecorator):
129+
"""Represents values comprising values of fields year, month, day, hour, minute, and second,
130+
with the session local time-zone.
131+
132+
Our dialect maps sqlalchemy.types.DateTime() to TIMESTAMP_NTZ, which means that all DateTime()
133+
objects are stored without tzinfo. To read and write timezone-aware datetimes use
134+
this type instead.
135+
136+
```python
137+
# This won't work
138+
`Column(sqlalchemy.DateTime(timezone=True))`
139+
140+
# But this does
141+
`Column(TIMESTAMP)`
142+
````
143+
144+
https://docs.databricks.com/en/sql/language-manual/data-types/timestamp-type.html
145+
"""
146+
147+
impl = sqlalchemy.types.DateTime
148+
149+
cache_ok = True
150+
151+
def process_result_value(self, value: Union[None, datetime], dialect):
152+
if value is None:
153+
return None
154+
155+
if not value.tzinfo:
156+
return value.replace(tzinfo=timezone.utc)
157+
return value
158+
159+
def process_bind_param(
160+
self, value: Union[datetime, None], dialect
161+
) -> Optional[datetime]:
162+
"""pysql can pass datetime.datetime() objects directly to DBR"""
163+
return value
164+
165+
def process_literal_param(
166+
self, value: Union[datetime, None], dialect: Dialect
167+
) -> str:
168+
""" """
169+
return process_literal_param_hack(value)
170+
171+
172+
@compiles(TIMESTAMP, "databricks")
173+
def compile_timestamp_databricks(type_, compiler, **kw):
174+
"""
175+
We need to override the default DateTime compilation rendering because Databricks uses "TIMESTAMP_NTZ" instead of "DATETIME"
176+
"""
177+
return "TIMESTAMP"
178+
179+
109180
class DatabricksTimeType(sqlalchemy.types.TypeDecorator):
110181
"""Databricks has no native TIME type. So we store it as a string."""
111182

112183
impl = sqlalchemy.types.Time
113184
cache_ok = True
114185

115-
TIME_WITH_MICROSECONDS_FMT = "%H:%M:%S.%f"
116-
TIME_NO_MICROSECONDS_FMT = "%H:%M:%S"
186+
BASE_FMT = "%H:%M:%S"
187+
MICROSEC_PART = ".%f"
188+
TIMEZONE_PART = "%z"
189+
190+
def _generate_fmt_string(self, ms: bool, tz: bool) -> str:
191+
"""Return a format string for datetime.strptime() that includes or excludes microseconds and timezone."""
192+
_ = lambda x, y: x if y else ""
193+
return f"{self.BASE_FMT}{_(self.MICROSEC_PART,ms)}{_(self.TIMEZONE_PART,tz)}"
194+
195+
@property
196+
def allowed_fmt_strings(self):
197+
"""Time strings can be read with or without microseconds and with or without a timezone."""
198+
199+
if not hasattr(self, "_allowed_fmt_strings"):
200+
ms_switch = tz_switch = [True, False]
201+
self._allowed_fmt_strings = [
202+
self._generate_fmt_string(x, y)
203+
for x, y in product(ms_switch, tz_switch)
204+
]
205+
206+
return self._allowed_fmt_strings
207+
208+
def _parse_result_string(self, value: str) -> time:
209+
"""Parse a string into a time object. Try all allowed formats until one works."""
210+
for fmt in self.allowed_fmt_strings:
211+
try:
212+
# We use timetz() here because we want to preserve the timezone information
213+
# Calling .time() will strip the timezone information
214+
return datetime.strptime(value, fmt).timetz()
215+
except ValueError:
216+
pass
217+
218+
raise ValueError(f"Could not parse time string {value}")
219+
220+
def _determine_fmt_string(self, value: time) -> str:
221+
"""Determine which format string to use to render a time object as a string."""
222+
ms_bool = value.microsecond > 0
223+
tz_bool = value.tzinfo is not None
224+
return self._generate_fmt_string(ms_bool, tz_bool)
117225

118226
def process_bind_param(self, value: Union[time, None], dialect) -> Union[None, str]:
119227
"""Values sent to the database are converted to %:H:%M:%S strings."""
120228
if value is None:
121229
return None
122-
return value.strftime(self.TIME_WITH_MICROSECONDS_FMT)
230+
fmt_string = self._determine_fmt_string(value)
231+
return value.strftime(fmt_string)
123232

124233
# mypy doesn't like this workaround because TypeEngine wants process_literal_param to return a string
125234
def process_literal_param(self, value, dialect) -> time: # type: ignore
126-
"""It's not clear to me why this is necessary. Without it, SQLAlchemy's Timetest:test_literal fails
127-
because the string literal renderer receives a str() object and calls .isoformat() on it.
128-
129-
Whereas this method receives a datetime.time() object which is subsequently passed to that
130-
same renderer. And that works.
131-
132-
UPDATE: After coping with the literal_processor override in DatabricksStringType, I suspect a similar
133-
mechanism is at play. Two different processors are are called in sequence. This is likely a byproduct
134-
of Databricks not having a true TIME type. I think the string representation of Time() types is
135-
somehow affecting the literal rendering process. But as long as this passes the tests, I'm not
136-
worried about it.
137-
"""
138-
return value
235+
""" """
236+
return process_literal_param_hack(value)
139237

140238
def process_result_value(
141239
self, value: Union[None, str], dialect
@@ -144,13 +242,7 @@ def process_result_value(
144242
if value is None:
145243
return None
146244

147-
try:
148-
_parsed = datetime.strptime(value, self.TIME_WITH_MICROSECONDS_FMT)
149-
except ValueError:
150-
# If the string doesn't have microseconds, try parsing it without them
151-
_parsed = datetime.strptime(value, self.TIME_NO_MICROSECONDS_FMT)
152-
153-
return _parsed.time()
245+
return self._parse_result_string(value)
154246

155247

156248
class DatabricksStringType(sqlalchemy.types.TypeDecorator):

src/databricks/sqlalchemy/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ class DatabricksDialect(default.DefaultDialect):
6666
supports_sequences: bool = False
6767

6868
colspecs = {
69-
sqlalchemy.types.DateTime: dialect_type_impl.DatabricksDateTimeNoTimezoneType,
69+
sqlalchemy.types.DateTime: dialect_type_impl.TIMESTAMP_NTZ,
7070
sqlalchemy.types.Time: dialect_type_impl.DatabricksTimeType,
7171
sqlalchemy.types.String: dialect_type_impl.DatabricksStringType,
7272
}

src/databricks/sqlalchemy/requirements.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,3 +228,10 @@ def denormalized_names(self):
228228
UPPERCASE as case insensitive names."""
229229

230230
return sqlalchemy.testing.exclusions.open()
231+
232+
@property
233+
def time_timezone(self):
234+
"""target dialect supports representation of Python
235+
datetime.time() with tzinfo with Time(timezone=True)."""
236+
237+
return sqlalchemy.testing.exclusions.open()

src/databricks/sqlalchemy/test/_extra.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
"""Additional tests authored by Databricks that use SQLAlchemy's test fixtures
22
"""
33

4+
import datetime
5+
46
from sqlalchemy.testing.suite.test_types import (
57
_LiteralRoundTripFixture,
68
fixtures,
@@ -10,8 +12,10 @@
1012
Table,
1113
Column,
1214
config,
15+
_DateFixture,
16+
literal,
1317
)
14-
from databricks.sqlalchemy import TINYINT
18+
from databricks.sqlalchemy import TINYINT, TIMESTAMP
1519

1620

1721
class TinyIntegerTest(_LiteralRoundTripFixture, fixtures.TestBase):
@@ -46,3 +50,21 @@ def run(datatype, data):
4650
assert isinstance(row[0], int)
4751

4852
return run
53+
54+
55+
class DateTimeTZTestCustom(_DateFixture, fixtures.TablesTest):
56+
"""This test confirms that when a user uses the TIMESTAMP
57+
type to store a datetime object, it retains its timezone
58+
"""
59+
60+
__backend__ = True
61+
datatype = TIMESTAMP
62+
data = datetime.datetime(2012, 10, 15, 12, 57, 18, tzinfo=datetime.timezone.utc)
63+
64+
@testing.requires.datetime_implicit_bound
65+
def test_select_direct(self, connection):
66+
67+
# We need to pass the TIMESTAMP type to the literal function
68+
# so that the value is processed correctly.
69+
result = connection.scalar(select(literal(self.data, TIMESTAMP)))
70+
eq_(result, self.data)

src/databricks/sqlalchemy/test/_future.py

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
CollateTest,
2525
ComputedColumnTest,
2626
ComputedReflectionTest,
27-
DateTimeTZTest,
2827
DifficultParametersTest,
2928
FutureWeCanSetDefaultSchemaWEventsTest,
3029
IdentityColumnTest,
@@ -35,7 +34,6 @@
3534
QuotedNameArgumentTest,
3635
RowCountTest,
3736
SimpleUpdateDeleteTest,
38-
TimeTZTest,
3937
WeCanSetDefaultSchemaWEventsTest,
4038
)
4139

@@ -58,7 +56,6 @@ class FutureFeature(Enum):
5856
TBL_COMMENTS = "table comment reflection"
5957
TBL_OPTS = "get_table_options method"
6058
TEST_DESIGN = "required test-fixture overrides"
61-
TIMEZONE = "timezone handling for DateTime() or Time() types"
6259
TUPLE_LITERAL = "tuple-like IN markers completely"
6360
UUID = "native Uuid() type"
6461
VIEW_DEF = "get_view_definition method"
@@ -202,26 +199,6 @@ def test_regexp_match(self):
202199
pass
203200

204201

205-
@pytest.mark.reviewed
206-
@pytest.mark.skip(render_future_feature(FutureFeature.TIMEZONE, True))
207-
class DateTimeTZTest(DateTimeTZTest):
208-
"""When I initially implemented DateTime type handling, I started using TIMESTAMP_NTZ because
209-
that's the default behaviour of the DateTime() type and the other tests passed. I simply missed
210-
this group of tests. Will need to modify the compilation and result_processor for our type override
211-
so that we can pass both DateTimeTZTest and DateTimeTest. Currently, only DateTimeTest passes.
212-
"""
213-
214-
pass
215-
216-
217-
@pytest.mark.reviewed
218-
@pytest.mark.skip(render_future_feature(FutureFeature.TIMEZONE, True))
219-
class TimeTZTest(TimeTZTest):
220-
"""Similar to DateTimeTZTest, this should be possible for the dialect since we can override type compilation
221-
and processing in _types.py. Implementation has been deferred.
222-
"""
223-
224-
225202
@pytest.mark.reviewed
226203
@pytest.mark.skip(render_future_feature(FutureFeature.COLLATE))
227204
class CollateTest(CollateTest):

src/databricks/sqlalchemy/test/_regression.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
TimeMicrosecondsTest,
4646
TimestampMicrosecondsTest,
4747
TimeTest,
48+
TimeTZTest,
4849
TrueDivTest,
4950
UnicodeTextTest,
5051
UnicodeVarcharTest,
@@ -300,3 +301,8 @@ class IdentityAutoincrementTest(IdentityAutoincrementTest):
300301
@pytest.mark.reviewed
301302
class LikeFunctionsTest(LikeFunctionsTest):
302303
pass
304+
305+
306+
@pytest.mark.reviewed
307+
class TimeTZTest(TimeTZTest):
308+
pass

src/databricks/sqlalchemy/test/_unsupported.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
# These are test suites that are fully skipped with a SkipReason
2020
from sqlalchemy.testing.suite import (
2121
AutocommitIsolationTest,
22+
DateTimeTZTest,
2223
ExceptionTest,
2324
HasIndexTest,
2425
HasSequenceTest,
@@ -51,6 +52,7 @@ class SkipReason(Enum):
5152
STRING_FEAT = "required STRING type features"
5253
SYMBOL_CHARSET = "symbols expected by test"
5354
TEMP_TBL = "temporary tables"
55+
TIMEZONE_OPT = "timezone-optional TIMESTAMP fields"
5456
TRANSACTIONS = "transactions"
5557
UNIQUE = "UNIQUE constraints"
5658

@@ -415,3 +417,18 @@ def test_delete_scalar_subq_round_trip(self):
415417
This suggests a limitation of the platform. But a workaround may be possible if customers require it.
416418
"""
417419
pass
420+
421+
422+
@pytest.mark.reviewed
423+
@pytest.mark.skip(render_skip_reason(SkipReason.TIMEZONE_OPT, True))
424+
class DateTimeTZTest(DateTimeTZTest):
425+
"""Test whether the sqlalchemy.DateTime() type can _optionally_ include timezone info.
426+
This dialect maps DateTime() → TIMESTAMP, which _always_ includes tzinfo.
427+
428+
Users can use databricks.sqlalchemy.TIMESTAMP_NTZ for a tzinfo-less timestamp. The SQLA docs
429+
acknowledge this is expected for some dialects.
430+
431+
https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.DateTime
432+
"""
433+
434+
pass

src/databricks/sqlalchemy/test/test_suite.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,4 @@ def start_protocol_patch():
2323
from databricks.sqlalchemy.test._regression import *
2424
from databricks.sqlalchemy.test._unsupported import *
2525
from databricks.sqlalchemy.test._future import *
26-
from databricks.sqlalchemy.test._extra import TinyIntegerTest
26+
from databricks.sqlalchemy.test._extra import TinyIntegerTest, DateTimeTZTestCustom

0 commit comments

Comments
 (0)