databricks · jprakash-db · Mar 18, 2025 · Mar 7, 2025 · Mar 7, 2025 · Mar 8, 2025
diff --git a/src/databricks/sql/utils.py b/src/databricks/sql/utils.py
@@ -628,6 +628,26 @@ def convert_decimals_in_arrow_table(table, description) -> "pyarrow.Table":
     return table
 
 
+def datetime_parser(datetime_string):
+    formats = [
+        "%Y-%m-%d %H:%M:%S.%f",  # MySQL and PostgreSQL format
+        "%Y-%m-%d %H:%M:%S",  # Common SQL format
+        "%Y-%m-%d",  # MySQL Date only
+        "%m/%d/%Y %I:%M:%S %p",  # US format MSSQL Server
+        "%d-%b-%Y %H:%M:%S",  # Oracle format
+    ]
+
+    for fmt in formats:
+        try:
+            return datetime.datetime.strptime(datetime_string, fmt)
+        except ValueError:
+            continue
+
+    raise ValueError(
+        f"Datetime string '{datetime_string}' does not match any expected formats"
+    )
+
+
 def convert_to_assigned_datatypes_in_column_table(column_table, description):
 
     converted_column_table = []
@@ -642,16 +662,7 @@ def convert_to_assigned_datatypes_in_column_table(column_table, description):
             )
         elif description[i][1] == "timestamp":
             converted_column_table.append(
-                tuple(
-                    (
-                        v
-                        if v is None
-                        else datetime.datetime.strptime(
-                            v, "%Y-%m-%d %H:%M:%S.%f"
-                        ).replace(tzinfo=pytz.UTC)
-                    )
-                    for v in col
-                )
+                tuple((v if v is None else datetime_parser(v)) for v in col)
             )
         else:
             converted_column_table.append(col)

diff --git a/tests/unit/test_util.py b/tests/unit/test_util.py
@@ -0,0 +1,92 @@
+import decimal
+import datetime
+
+from databricks.sql.utils import convert_to_assigned_datatypes_in_column_table
+
+
+class TestUtils:
+    def get_column_table_and_description(self):
+        table_description = [
+            ("id", "int", None, None, None, None, None),
+            ("varchar_column", "string", None, None, None, None, None),
+            ("boolean_column", "boolean", None, None, None, None, None),
+            ("integer_column", "int", None, None, None, None, None),
+            ("bigint_column", "bigint", None, None, None, None, None),
+            ("smallint_column", "smallint", None, None, None, None, None),
+            ("tinyint_column", "tinyint", None, None, None, None, None),
+            ("float_column", "float", None, None, None, None, None),
+            ("double_column", "double", None, None, None, None, None),
+            ("decimal_column", "decimal", None, None, 10, 2, None),
+            ("date_column", "date", None, None, None, None, None),
+            ("timestamp_column", "timestamp", None, None, None, None, None),
+            ("timestamp_ntz_column", "timestamp", None, None, None, None, None),
+            ("timestamp_column_2", "timestamp", None, None, None, None, None),
+            ("timestamp_column_3", "timestamp", None, None, None, None, None),
+            ("timestamp_column_4", "timestamp", None, None, None, None, None),
+            ("binary_column", "binary", None, None, None, None, None),
+            ("array_column", "array", None, None, None, None, None),
+            ("map_column", "map", None, None, None, None, None),
+            ("struct_column", "struct", None, None, None, None, None),
+            ("variant_column", "string", None, None, None, None, None),
+        ]
+
+        column_table = [
+            (9,),
+            ("Test Varchar",),
+            (True,),
+            (123,),
+            (9876543210,),
+            (32000,),
+            (120,),
+            (1.23,),
+            (4.56,),
+            ("7890.12",),
+            ("2023-12-31",),
+            ("2023-12-31 12:30:00",),
+            ("2023-12-31 12:30:00",),
+            ("2021-09-30 11:27:35.123",),
+            ("03/08/2024 02:30:15 PM",),
+            ("08-Mar-2024 14:30:15",),
+            (b"\xde\xad\xbe\xef",),
+            ('["item1","item2"]',),
+            ('{"key1":"value1","key2":"value2"}',),
+            ('{"name":"John","age":30}',),
+            ('"semi-structured data"',),
+        ]
+
+        return column_table, table_description
+
+    def test_convert_to_assigned_datatypes_in_column_table(self):
+        column_table, description = self.get_column_table_and_description()
+        converted_column_table = convert_to_assigned_datatypes_in_column_table(
+            column_table, description
+        )
+
+        # (data , datatype)
+        expected_convertion = [
+            (9, int),
+            ("Test Varchar", str),
+            (True, bool),
+            (123, int),
+            (9876543210, int),
+            (32000, int),
+            (120, int),
+            (1.23, float),
+            (4.56, float),
+            (decimal.Decimal("7890.12"), decimal.Decimal),
+            (datetime.date(2023, 12, 31), datetime.date),
+            (datetime.datetime(2023, 12, 31, 12, 30, 0), datetime.datetime),
+            (datetime.datetime(2023, 12, 31, 12, 30, 0), datetime.datetime),
+            (datetime.datetime(2021, 9, 30, 11, 27, 35, 123000), datetime.datetime),
+            (datetime.datetime(2024, 3, 8, 14, 30, 15), datetime.datetime),
+            (datetime.datetime(2024, 3, 8, 14, 30, 15), datetime.datetime),
+            (b"\xde\xad\xbe\xef", bytes),
+            ('["item1","item2"]', str),
+            ('{"key1":"value1","key2":"value2"}', str),
+            ('{"name":"John","age":30}', str),
+            ('"semi-structured data"', str),
+        ]
+
+        for index, entry in enumerate(converted_column_table):
+            assert entry[0] == expected_convertion[index][0]
+            assert isinstance(entry[0], expected_convertion[index][1])