Skip to content

Commit f90b4d4

Browse files
reduce code repetition
Signed-off-by: varun-edachali-dbx <varun.edachali@databricks.com>
1 parent 13e6346 commit f90b4d4

File tree

1 file changed

+38
-101
lines changed

1 file changed

+38
-101
lines changed

src/databricks/sql/result_set.py

Lines changed: 38 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,44 @@ def __iter__(self):
9393
else:
9494
break
9595

96+
def _convert_arrow_table(self, table):
97+
column_names = [c[0] for c in self.description]
98+
ResultRow = Row(*column_names)
99+
100+
if self.connection.disable_pandas is True:
101+
return [
102+
ResultRow(*[v.as_py() for v in r]) for r in zip(*table.itercolumns())
103+
]
104+
105+
# Need to use nullable types, as otherwise type can change when there are missing values.
106+
# See https://arrow.apache.org/docs/python/pandas.html#nullable-types
107+
# NOTE: This api is epxerimental https://pandas.pydata.org/pandas-docs/stable/user_guide/integer_na.html
108+
dtype_mapping = {
109+
pyarrow.int8(): pandas.Int8Dtype(),
110+
pyarrow.int16(): pandas.Int16Dtype(),
111+
pyarrow.int32(): pandas.Int32Dtype(),
112+
pyarrow.int64(): pandas.Int64Dtype(),
113+
pyarrow.uint8(): pandas.UInt8Dtype(),
114+
pyarrow.uint16(): pandas.UInt16Dtype(),
115+
pyarrow.uint32(): pandas.UInt32Dtype(),
116+
pyarrow.uint64(): pandas.UInt64Dtype(),
117+
pyarrow.bool_(): pandas.BooleanDtype(),
118+
pyarrow.float32(): pandas.Float32Dtype(),
119+
pyarrow.float64(): pandas.Float64Dtype(),
120+
pyarrow.string(): pandas.StringDtype(),
121+
}
122+
123+
# Need to rename columns, as the to_pandas function cannot handle duplicate column names
124+
table_renamed = table.rename_columns([str(c) for c in range(table.num_columns)])
125+
df = table_renamed.to_pandas(
126+
types_mapper=dtype_mapping.get,
127+
date_as_object=True,
128+
timestamp_as_object=True,
129+
)
130+
131+
res = df.to_numpy(na_value=None, dtype="object")
132+
return [ResultRow(*v) for v in res]
133+
96134
@property
97135
def rownumber(self):
98136
return self._next_row_index
@@ -234,44 +272,6 @@ def _fill_results_buffer(self):
234272
self.results = results
235273
self.has_more_rows = has_more_rows
236274

237-
def _convert_arrow_table(self, table):
238-
column_names = [c[0] for c in self.description]
239-
ResultRow = Row(*column_names)
240-
241-
if self.connection.disable_pandas is True:
242-
return [
243-
ResultRow(*[v.as_py() for v in r]) for r in zip(*table.itercolumns())
244-
]
245-
246-
# Need to use nullable types, as otherwise type can change when there are missing values.
247-
# See https://arrow.apache.org/docs/python/pandas.html#nullable-types
248-
# NOTE: This api is epxerimental https://pandas.pydata.org/pandas-docs/stable/user_guide/integer_na.html
249-
dtype_mapping = {
250-
pyarrow.int8(): pandas.Int8Dtype(),
251-
pyarrow.int16(): pandas.Int16Dtype(),
252-
pyarrow.int32(): pandas.Int32Dtype(),
253-
pyarrow.int64(): pandas.Int64Dtype(),
254-
pyarrow.uint8(): pandas.UInt8Dtype(),
255-
pyarrow.uint16(): pandas.UInt16Dtype(),
256-
pyarrow.uint32(): pandas.UInt32Dtype(),
257-
pyarrow.uint64(): pandas.UInt64Dtype(),
258-
pyarrow.bool_(): pandas.BooleanDtype(),
259-
pyarrow.float32(): pandas.Float32Dtype(),
260-
pyarrow.float64(): pandas.Float64Dtype(),
261-
pyarrow.string(): pandas.StringDtype(),
262-
}
263-
264-
# Need to rename columns, as the to_pandas function cannot handle duplicate column names
265-
table_renamed = table.rename_columns([str(c) for c in range(table.num_columns)])
266-
df = table_renamed.to_pandas(
267-
types_mapper=dtype_mapping.get,
268-
date_as_object=True,
269-
timestamp_as_object=True,
270-
)
271-
272-
res = df.to_numpy(na_value=None, dtype="object")
273-
return [ResultRow(*v) for v in res]
274-
275275
def merge_columnar(self, result1, result2) -> "ColumnTable":
276276
"""
277277
Function to merge / combining the columnar results into a single result
@@ -503,69 +503,6 @@ def __init__(
503503
# Initialize queue for result data if not provided
504504
self.results = results_queue or JsonQueue([])
505505

506-
def _convert_arrow_table(self, table):
507-
"""
508-
Convert an Arrow table to a list of Row objects.
509-
510-
Args:
511-
table: PyArrow Table to convert
512-
513-
Returns:
514-
List of Row objects
515-
"""
516-
if table.num_rows == 0:
517-
return []
518-
519-
column_names = [c[0] for c in self.description]
520-
ResultRow = Row(*column_names)
521-
522-
if self.connection.disable_pandas is True:
523-
return [
524-
ResultRow(*[v.as_py() for v in r]) for r in zip(*table.itercolumns())
525-
]
526-
527-
# Need to use nullable types, as otherwise type can change when there are missing values.
528-
# See https://arrow.apache.org/docs/python/pandas.html#nullable-types
529-
# NOTE: This api is experimental https://pandas.pydata.org/pandas-docs/stable/user_guide/integer_na.html
530-
dtype_mapping = {
531-
pyarrow.int8(): pandas.Int8Dtype(),
532-
pyarrow.int16(): pandas.Int16Dtype(),
533-
pyarrow.int32(): pandas.Int32Dtype(),
534-
pyarrow.int64(): pandas.Int64Dtype(),
535-
pyarrow.uint8(): pandas.UInt8Dtype(),
536-
pyarrow.uint16(): pandas.UInt16Dtype(),
537-
pyarrow.uint32(): pandas.UInt32Dtype(),
538-
pyarrow.uint64(): pandas.UInt64Dtype(),
539-
pyarrow.bool_(): pandas.BooleanDtype(),
540-
pyarrow.float32(): pandas.Float32Dtype(),
541-
pyarrow.float64(): pandas.Float64Dtype(),
542-
pyarrow.string(): pandas.StringDtype(),
543-
}
544-
545-
# Need to rename columns, as the to_pandas function cannot handle duplicate column names
546-
table_renamed = table.rename_columns([str(c) for c in range(table.num_columns)])
547-
df = table_renamed.to_pandas(
548-
types_mapper=dtype_mapping.get,
549-
date_as_object=True,
550-
timestamp_as_object=True,
551-
)
552-
553-
res = df.to_numpy(na_value=None, dtype="object")
554-
return [ResultRow(*v) for v in res]
555-
556-
def _create_empty_arrow_table(self):
557-
"""
558-
Create an empty Arrow table with the correct schema.
559-
560-
Returns:
561-
Empty PyArrow Table with the schema from description
562-
"""
563-
if not self.description:
564-
return pyarrow.Table.from_pylist([])
565-
566-
column_names = [col[0] for col in self.description]
567-
return pyarrow.Table.from_pydict({name: [] for name in column_names})
568-
569506
def fetchmany_arrow(self, size: int) -> "pyarrow.Table":
570507
"""
571508
Fetch the next set of rows as an Arrow table.

0 commit comments

Comments
 (0)