Skip to content

Commit 21c389d

Browse files
introduce type conversion for primitive types for JSON + INLINE
Signed-off-by: varun-edachali-dbx <varun.edachali@databricks.com>
1 parent 5e01e7b commit 21c389d

File tree

3 files changed

+392
-2
lines changed

3 files changed

+392
-2
lines changed

src/databricks/sql/conversion.py

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
"""
2+
Type conversion utilities for the Databricks SQL Connector.
3+
4+
This module provides functionality to convert string values from SEA Inline results
5+
to appropriate Python types based on column metadata.
6+
"""
7+
8+
import datetime
9+
import decimal
10+
import logging
11+
from dateutil import parser
12+
from typing import Any, Callable, Dict, Optional, Union
13+
14+
logger = logging.getLogger(__name__)
15+
16+
17+
class SqlType:
18+
"""SQL type constants for improved maintainability."""
19+
20+
# Numeric types
21+
TINYINT = "tinyint"
22+
SMALLINT = "smallint"
23+
INT = "int"
24+
INTEGER = "integer"
25+
BIGINT = "bigint"
26+
FLOAT = "float"
27+
REAL = "real"
28+
DOUBLE = "double"
29+
DECIMAL = "decimal"
30+
NUMERIC = "numeric"
31+
32+
# Boolean types
33+
BOOLEAN = "boolean"
34+
BIT = "bit"
35+
36+
# Date/Time types
37+
DATE = "date"
38+
TIME = "time"
39+
TIMESTAMP = "timestamp"
40+
TIMESTAMP_NTZ = "timestamp_ntz"
41+
TIMESTAMP_LTZ = "timestamp_ltz"
42+
TIMESTAMP_TZ = "timestamp_tz"
43+
44+
# String types
45+
CHAR = "char"
46+
VARCHAR = "varchar"
47+
STRING = "string"
48+
TEXT = "text"
49+
50+
# Binary types
51+
BINARY = "binary"
52+
VARBINARY = "varbinary"
53+
54+
# Complex types
55+
ARRAY = "array"
56+
MAP = "map"
57+
STRUCT = "struct"
58+
59+
@classmethod
60+
def is_numeric(cls, sql_type: str) -> bool:
61+
"""Check if the SQL type is a numeric type."""
62+
return sql_type.lower() in (
63+
cls.TINYINT,
64+
cls.SMALLINT,
65+
cls.INT,
66+
cls.INTEGER,
67+
cls.BIGINT,
68+
cls.FLOAT,
69+
cls.REAL,
70+
cls.DOUBLE,
71+
cls.DECIMAL,
72+
cls.NUMERIC,
73+
)
74+
75+
@classmethod
76+
def is_boolean(cls, sql_type: str) -> bool:
77+
"""Check if the SQL type is a boolean type."""
78+
return sql_type.lower() in (cls.BOOLEAN, cls.BIT)
79+
80+
@classmethod
81+
def is_datetime(cls, sql_type: str) -> bool:
82+
"""Check if the SQL type is a date/time type."""
83+
return sql_type.lower() in (
84+
cls.DATE,
85+
cls.TIME,
86+
cls.TIMESTAMP,
87+
cls.TIMESTAMP_NTZ,
88+
cls.TIMESTAMP_LTZ,
89+
cls.TIMESTAMP_TZ,
90+
)
91+
92+
@classmethod
93+
def is_string(cls, sql_type: str) -> bool:
94+
"""Check if the SQL type is a string type."""
95+
return sql_type.lower() in (cls.CHAR, cls.VARCHAR, cls.STRING, cls.TEXT)
96+
97+
@classmethod
98+
def is_binary(cls, sql_type: str) -> bool:
99+
"""Check if the SQL type is a binary type."""
100+
return sql_type.lower() in (cls.BINARY, cls.VARBINARY)
101+
102+
@classmethod
103+
def is_complex(cls, sql_type: str) -> bool:
104+
"""Check if the SQL type is a complex type."""
105+
sql_type = sql_type.lower()
106+
return (
107+
sql_type.startswith(cls.ARRAY)
108+
or sql_type.startswith(cls.MAP)
109+
or sql_type.startswith(cls.STRUCT)
110+
)
111+
112+
113+
class SqlTypeConverter:
114+
"""
115+
Utility class for converting SQL types to Python types.
116+
Based on the JDBC ConverterHelper implementation.
117+
"""
118+
119+
# SQL type to conversion function mapping
120+
TYPE_MAPPING: Dict[str, Callable] = {
121+
# Numeric types
122+
SqlType.TINYINT: lambda v: int(v),
123+
SqlType.SMALLINT: lambda v: int(v),
124+
SqlType.INT: lambda v: int(v),
125+
SqlType.INTEGER: lambda v: int(v),
126+
SqlType.BIGINT: lambda v: int(v),
127+
SqlType.FLOAT: lambda v: float(v),
128+
SqlType.REAL: lambda v: float(v),
129+
SqlType.DOUBLE: lambda v: float(v),
130+
SqlType.DECIMAL: lambda v, p=None, s=None: (
131+
decimal.Decimal(v).quantize(
132+
decimal.Decimal(f'0.{"0" * s}'), context=decimal.Context(prec=p)
133+
)
134+
if p is not None and s is not None
135+
else decimal.Decimal(v)
136+
),
137+
SqlType.NUMERIC: lambda v, p=None, s=None: (
138+
decimal.Decimal(v).quantize(
139+
decimal.Decimal(f'0.{"0" * s}'), context=decimal.Context(prec=p)
140+
)
141+
if p is not None and s is not None
142+
else decimal.Decimal(v)
143+
),
144+
# Boolean types
145+
SqlType.BOOLEAN: lambda v: v.lower() in ("true", "t", "1", "yes", "y"),
146+
SqlType.BIT: lambda v: v.lower() in ("true", "t", "1", "yes", "y"),
147+
# Date/Time types
148+
SqlType.DATE: lambda v: datetime.date.fromisoformat(v),
149+
SqlType.TIME: lambda v: datetime.time.fromisoformat(v),
150+
SqlType.TIMESTAMP: lambda v: parser.parse(v),
151+
SqlType.TIMESTAMP_NTZ: lambda v: parser.parse(v).replace(tzinfo=None),
152+
SqlType.TIMESTAMP_LTZ: lambda v: parser.parse(v).astimezone(tz=None),
153+
SqlType.TIMESTAMP_TZ: lambda v: parser.parse(v),
154+
# String types - no conversion needed
155+
SqlType.CHAR: lambda v: v,
156+
SqlType.VARCHAR: lambda v: v,
157+
SqlType.STRING: lambda v: v,
158+
SqlType.TEXT: lambda v: v,
159+
# Binary types
160+
SqlType.BINARY: lambda v: bytes.fromhex(v),
161+
SqlType.VARBINARY: lambda v: bytes.fromhex(v),
162+
}
163+
164+
@staticmethod
165+
def convert_value(
166+
value: Any,
167+
sql_type: str,
168+
precision: Optional[int] = None,
169+
scale: Optional[int] = None,
170+
) -> Any:
171+
"""
172+
Convert a string value to the appropriate Python type based on SQL type.
173+
174+
Args:
175+
value: The string value to convert
176+
sql_type: The SQL type (e.g., 'int', 'decimal')
177+
precision: Optional precision for decimal types
178+
scale: Optional scale for decimal types
179+
180+
Returns:
181+
The converted value in the appropriate Python type
182+
"""
183+
if value is None:
184+
return None
185+
186+
# Normalize SQL type
187+
sql_type = sql_type.lower().strip()
188+
189+
# Handle primitive types using the mapping
190+
if sql_type not in SqlTypeConverter.TYPE_MAPPING:
191+
return value
192+
193+
converter_func = SqlTypeConverter.TYPE_MAPPING[sql_type]
194+
try:
195+
if sql_type in (SqlType.DECIMAL, SqlType.NUMERIC):
196+
return converter_func(value, precision, scale)
197+
else:
198+
return converter_func(value)
199+
except (ValueError, TypeError, decimal.InvalidOperation) as e:
200+
logger.warning(f"Error converting value '{value}' to {sql_type}: {e}")
201+
return value

src/databricks/sql/result_set.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
from databricks.sql.backend.sea.backend import SeaDatabricksClient
88
from databricks.sql.backend.sea.models.base import ResultData, ResultManifest
9+
from databricks.sql.conversion import SqlTypeConverter
910

1011
try:
1112
import pyarrow
@@ -503,17 +504,44 @@ def __init__(
503504
def _convert_json_table(self, rows):
504505
"""
505506
Convert raw data rows to Row objects with named columns based on description.
507+
Also converts string values to appropriate Python types based on column metadata.
508+
506509
Args:
507510
rows: List of raw data rows
508511
Returns:
509-
List of Row objects with named columns
512+
List of Row objects with named columns and converted values
510513
"""
511514
if not self.description or not rows:
512515
return rows
513516

514517
column_names = [col[0] for col in self.description]
515518
ResultRow = Row(*column_names)
516-
return [ResultRow(*row) for row in rows]
519+
520+
# JSON + INLINE gives us string values, so we convert them to appropriate
521+
# types based on column metadata
522+
converted_rows = []
523+
for row in rows:
524+
converted_row = []
525+
526+
for i, value in enumerate(row):
527+
column_type = self.description[i][1]
528+
precision = self.description[i][4]
529+
scale = self.description[i][5]
530+
531+
try:
532+
converted_value = SqlTypeConverter.convert_value(
533+
value, column_type, precision=precision, scale=scale
534+
)
535+
converted_row.append(converted_value)
536+
except Exception as e:
537+
logger.warning(
538+
f"Error converting value '{value}' to {column_type}: {e}"
539+
)
540+
converted_row.append(value)
541+
542+
converted_rows.append(ResultRow(*converted_row))
543+
544+
return converted_rows
517545

518546
def fetchmany_json(self, size: int):
519547
"""

0 commit comments

Comments
 (0)