From 548e395fa98799de308f4c39155292035e1a87d9 Mon Sep 17 00:00:00 2001 From: varun-edachali-dbx Date: Mon, 7 Jul 2025 04:17:31 +0000 Subject: [PATCH 1/5] introduce column normalisation for SEA metadat queries Signed-off-by: varun-edachali-dbx --- src/databricks/sql/backend/column_mapping.py | 105 ++++++++++ src/databricks/sql/backend/sea/backend.py | 14 ++ tests/unit/backend/test_column_mapping.py | 200 +++++++++++++++++++ tests/unit/test_sea_backend.py | 57 ++++++ 4 files changed, 376 insertions(+) create mode 100644 src/databricks/sql/backend/column_mapping.py create mode 100644 tests/unit/backend/test_column_mapping.py diff --git a/src/databricks/sql/backend/column_mapping.py b/src/databricks/sql/backend/column_mapping.py new file mode 100644 index 00000000..c2e80233 --- /dev/null +++ b/src/databricks/sql/backend/column_mapping.py @@ -0,0 +1,105 @@ +""" +Column name mappings between different backend protocols. + +This module provides mappings between column names returned by different backends + to ensure a consistent interface for metadata operations. +""" + +from enum import Enum + + +class MetadataOp(Enum): + """Enum for metadata operations.""" + + CATALOGS = "catalogs" + SCHEMAS = "schemas" + TABLES = "tables" + COLUMNS = "columns" + + +# Mappings from column names to standard column names +CATALOG_OP = { + "catalog": "TABLE_CAT", +} + +SCHEMA_OP = { + "databaseName": "TABLE_SCHEM", + "catalogName": "TABLE_CATALOG", +} + +TABLE_OP = { + "catalogName": "TABLE_CAT", + "namespace": "TABLE_SCHEM", + "tableName": "TABLE_NAME", + "tableType": "TABLE_TYPE", + "remarks": "REMARKS", + "TYPE_CATALOG_COLUMN": "TYPE_CAT", + "TYPE_SCHEMA_COLUMN": "TYPE_SCHEM", + "TYPE_NAME": "TYPE_NAME", + "SELF_REFERENCING_COLUMN_NAME": "SELF_REFERENCING_COL_NAME", + "REF_GENERATION_COLUMN": "REF_GENERATION", +} + +COLUMN_OP = { + "catalogName": "TABLE_CAT", + "namespace": "TABLE_SCHEM", + "tableName": "TABLE_NAME", + "columnName": "COLUMN_NAME", + "dataType": "DATA_TYPE", + "columnType": "TYPE_NAME", + "columnSize": "COLUMN_SIZE", + "bufferLength": "BUFFER_LENGTH", + "decimalDigits": "DECIMAL_DIGITS", + "radix": "NUM_PREC_RADIX", + "nullable": "NULLABLE", + "remarks": "REMARKS", + "columnDef": "COLUMN_DEF", + "sqlDataType": "SQL_DATA_TYPE", + "sqlDatetimeSub": "SQL_DATETIME_SUB", + "charOctetLength": "CHAR_OCTET_LENGTH", + "ordinalPosition": "ORDINAL_POSITION", + "isNullable": "IS_NULLABLE", + "scopeCatalog": "SCOPE_CATALOG", + "scopeSchema": "SCOPE_SCHEMA", + "scopeTable": "SCOPE_TABLE", + "sourceDataType": "SOURCE_DATA_TYPE", + "isAutoIncrement": "IS_AUTOINCREMENT", + "isGenerated": "IS_GENERATEDCOLUMN", +} + + +def normalise_metadata_result(result_set, operation: MetadataOp): + """ + Normalise column names in a result set based on the operation type. + This function modifies the result set in place. + + Args: + result_set: The result set object to normalise + operation: The metadata operation (from MetadataOp enum) + """ + + # Select the appropriate mapping based on the operation + mapping = None + if operation == MetadataOp.CATALOGS: + mapping = CATALOG_OP + elif operation == MetadataOp.SCHEMAS: + mapping = SCHEMA_OP + elif operation == MetadataOp.TABLES: + mapping = TABLE_OP + elif operation == MetadataOp.COLUMNS: + mapping = COLUMN_OP + + if mapping is None: + return + + # Normalize column names in the description + new_description = [] + for col_desc in result_set.description: + col_name = col_desc[0] + if col_name in mapping: + # Create a new column description tuple with the normalized name + new_col_desc = (mapping[col_name],) + col_desc[1:] + new_description.append(new_col_desc) + else: + new_description.append(col_desc) + result_set.description = new_description diff --git a/src/databricks/sql/backend/sea/backend.py b/src/databricks/sql/backend/sea/backend.py index f729e8b8..e488ecad 100644 --- a/src/databricks/sql/backend/sea/backend.py +++ b/src/databricks/sql/backend/sea/backend.py @@ -31,6 +31,9 @@ from databricks.sql.backend.sea.utils.http_client import SeaHttpClient from databricks.sql.types import SSLOptions +# Import the column mapping module +from databricks.sql.backend.column_mapping import normalise_metadata_result, MetadataOp + from databricks.sql.backend.sea.models import ( ExecuteStatementRequest, GetStatementRequest, @@ -681,6 +684,9 @@ def get_catalogs( enforce_embedded_schema_correctness=False, ) assert result is not None, "execute_command returned None in synchronous mode" + + normalise_metadata_result(result, MetadataOp.CATALOGS) + return result def get_schemas( @@ -714,6 +720,9 @@ def get_schemas( enforce_embedded_schema_correctness=False, ) assert result is not None, "execute_command returned None in synchronous mode" + + normalise_metadata_result(result, MetadataOp.SCHEMAS) + return result def get_tables( @@ -761,6 +770,8 @@ def get_tables( result = ResultSetFilter.filter_tables_by_type(result, table_types) + normalise_metadata_result(result, MetadataOp.TABLES) + return result def get_columns( @@ -802,4 +813,7 @@ def get_columns( enforce_embedded_schema_correctness=False, ) assert result is not None, "execute_command returned None in synchronous mode" + + normalise_metadata_result(result, MetadataOp.COLUMNS) + return result diff --git a/tests/unit/backend/test_column_mapping.py b/tests/unit/backend/test_column_mapping.py new file mode 100644 index 00000000..627e77fa --- /dev/null +++ b/tests/unit/backend/test_column_mapping.py @@ -0,0 +1,200 @@ +""" +Tests for the column mapping module. +""" + +import pytest +from unittest.mock import MagicMock +from enum import Enum + +from databricks.sql.backend.column_mapping import ( + normalise_metadata_result, + MetadataOp, + CATALOG_OP, + SCHEMA_OP, + TABLE_OP, + COLUMN_OP, +) + + +class TestColumnMapping: + """Tests for the column mapping module.""" + + def test_normalize_metadata_result_catalogs(self): + """Test normalizing catalog column names.""" + # Create a mock result set with a description + mock_result = MagicMock() + mock_result.description = [ + ("catalog", "string", None, None, None, None, True), + ("other_column", "string", None, None, None, None, True), + ] + + # Normalize the result set + normalise_metadata_result(mock_result, MetadataOp.CATALOGS) + + # Check that the column names were normalized + assert mock_result.description[0][0] == "TABLE_CAT" + assert mock_result.description[1][0] == "other_column" + + def test_normalize_metadata_result_schemas(self): + """Test normalizing schema column names.""" + # Create a mock result set with a description + mock_result = MagicMock() + mock_result.description = [ + ("databaseName", "string", None, None, None, None, True), + ("catalogName", "string", None, None, None, None, True), + ("other_column", "string", None, None, None, None, True), + ] + + # Normalize the result set + normalise_metadata_result(mock_result, MetadataOp.SCHEMAS) + + # Check that the column names were normalized + assert mock_result.description[0][0] == "TABLE_SCHEM" + assert mock_result.description[1][0] == "TABLE_CATALOG" + assert mock_result.description[2][0] == "other_column" + + def test_normalize_metadata_result_tables(self): + """Test normalizing table column names.""" + # Create a mock result set with a description + mock_result = MagicMock() + mock_result.description = [ + ("catalogName", "string", None, None, None, None, True), + ("namespace", "string", None, None, None, None, True), + ("tableName", "string", None, None, None, None, True), + ("tableType", "string", None, None, None, None, True), + ("remarks", "string", None, None, None, None, True), + ("TYPE_CATALOG_COLUMN", "string", None, None, None, None, True), + ("TYPE_SCHEMA_COLUMN", "string", None, None, None, None, True), + ("TYPE_NAME", "string", None, None, None, None, True), + ("SELF_REFERENCING_COLUMN_NAME", "string", None, None, None, None, True), + ("REF_GENERATION_COLUMN", "string", None, None, None, None, True), + ("other_column", "string", None, None, None, None, True), + ] + + # Normalize the result set + normalise_metadata_result(mock_result, MetadataOp.TABLES) + + # Check that the column names were normalized + assert mock_result.description[0][0] == "TABLE_CAT" + assert mock_result.description[1][0] == "TABLE_SCHEM" + assert mock_result.description[2][0] == "TABLE_NAME" + assert mock_result.description[3][0] == "TABLE_TYPE" + assert mock_result.description[4][0] == "REMARKS" + assert mock_result.description[5][0] == "TYPE_CAT" + assert mock_result.description[6][0] == "TYPE_SCHEM" + assert mock_result.description[7][0] == "TYPE_NAME" + assert mock_result.description[8][0] == "SELF_REFERENCING_COL_NAME" + assert mock_result.description[9][0] == "REF_GENERATION" + assert mock_result.description[10][0] == "other_column" + + def test_normalize_metadata_result_columns(self): + """Test normalizing column column names.""" + # Create a mock result set with a description + mock_result = MagicMock() + mock_result.description = [ + ("catalogName", "string", None, None, None, None, True), + ("namespace", "string", None, None, None, None, True), + ("tableName", "string", None, None, None, None, True), + ("columnName", "string", None, None, None, None, True), + ("dataType", "string", None, None, None, None, True), + ("columnType", "string", None, None, None, None, True), + ("columnSize", "string", None, None, None, None, True), + ("bufferLength", "string", None, None, None, None, True), + ("decimalDigits", "string", None, None, None, None, True), + ("radix", "string", None, None, None, None, True), + ("nullable", "string", None, None, None, None, True), + ("remarks", "string", None, None, None, None, True), + ("columnDef", "string", None, None, None, None, True), + ("sqlDataType", "string", None, None, None, None, True), + ("sqlDatetimeSub", "string", None, None, None, None, True), + ("charOctetLength", "string", None, None, None, None, True), + ("ordinalPosition", "string", None, None, None, None, True), + ("isNullable", "string", None, None, None, None, True), + ("scopeCatalog", "string", None, None, None, None, True), + ("scopeSchema", "string", None, None, None, None, True), + ("scopeTable", "string", None, None, None, None, True), + ("sourceDataType", "string", None, None, None, None, True), + ("isAutoIncrement", "string", None, None, None, None, True), + ("isGenerated", "string", None, None, None, None, True), + ("other_column", "string", None, None, None, None, True), + ] + + # Normalize the result set + normalise_metadata_result(mock_result, MetadataOp.COLUMNS) + + # Check that the column names were normalized + assert mock_result.description[0][0] == "TABLE_CAT" + assert mock_result.description[1][0] == "TABLE_SCHEM" + assert mock_result.description[2][0] == "TABLE_NAME" + assert mock_result.description[3][0] == "COLUMN_NAME" + assert mock_result.description[4][0] == "DATA_TYPE" + assert mock_result.description[5][0] == "TYPE_NAME" + assert mock_result.description[6][0] == "COLUMN_SIZE" + assert mock_result.description[7][0] == "BUFFER_LENGTH" + assert mock_result.description[8][0] == "DECIMAL_DIGITS" + assert mock_result.description[9][0] == "NUM_PREC_RADIX" + assert mock_result.description[10][0] == "NULLABLE" + assert mock_result.description[11][0] == "REMARKS" + assert mock_result.description[12][0] == "COLUMN_DEF" + assert mock_result.description[13][0] == "SQL_DATA_TYPE" + assert mock_result.description[14][0] == "SQL_DATETIME_SUB" + assert mock_result.description[15][0] == "CHAR_OCTET_LENGTH" + assert mock_result.description[16][0] == "ORDINAL_POSITION" + assert mock_result.description[17][0] == "IS_NULLABLE" + assert mock_result.description[18][0] == "SCOPE_CATALOG" + assert mock_result.description[19][0] == "SCOPE_SCHEMA" + assert mock_result.description[20][0] == "SCOPE_TABLE" + assert mock_result.description[21][0] == "SOURCE_DATA_TYPE" + assert mock_result.description[22][0] == "IS_AUTOINCREMENT" + assert mock_result.description[23][0] == "IS_GENERATEDCOLUMN" + assert mock_result.description[24][0] == "other_column" + + def test_normalize_metadata_result_unknown_operation(self): + """Test normalizing with an unknown operation type.""" + # Create a mock result set with a description + mock_result = MagicMock() + mock_result.description = [ + ("column1", "string", None, None, None, None, True), + ("column2", "string", None, None, None, None, True), + ] + + # Save the original description + original_description = mock_result.description.copy() + + # Create a separate enum for testing + class TestOp(Enum): + UNKNOWN = "unknown" + + # Normalize the result set with an unknown operation + normalise_metadata_result(mock_result, TestOp.UNKNOWN) + + # Check that the description was not modified + assert mock_result.description == original_description + + def test_normalize_metadata_result_preserves_other_fields(self): + """Test that normalization preserves other fields in the description.""" + # Create a mock result set with a description + mock_result = MagicMock() + mock_result.description = [ + ( + "catalog", + "string", + "display_size", + "internal_size", + "precision", + "scale", + True, + ), + ] + + # Normalize the result set + normalise_metadata_result(mock_result, MetadataOp.CATALOGS) + + # Check that the column name was normalized but other fields preserved + assert mock_result.description[0][0] == "TABLE_CAT" + assert mock_result.description[0][1] == "string" + assert mock_result.description[0][2] == "display_size" + assert mock_result.description[0][3] == "internal_size" + assert mock_result.description[0][4] == "precision" + assert mock_result.description[0][5] == "scale" + assert mock_result.description[0][6] == True diff --git a/tests/unit/test_sea_backend.py b/tests/unit/test_sea_backend.py index 67c202bc..d91bce82 100644 --- a/tests/unit/test_sea_backend.py +++ b/tests/unit/test_sea_backend.py @@ -650,6 +650,11 @@ def test_get_catalogs(self, sea_client, sea_session_id, mock_cursor): """Test the get_catalogs method.""" # Mock the execute_command method mock_result_set = Mock() + # Add description attribute to the mock result set + mock_result_set.description = [ + ("catalog", "string", None, None, None, None, True), + ] + with patch.object( sea_client, "execute_command", return_value=mock_result_set ) as mock_execute: @@ -678,10 +683,19 @@ def test_get_catalogs(self, sea_client, sea_session_id, mock_cursor): # Verify the result is correct assert result == mock_result_set + # Verify that column normalization was applied + assert result.description[0][0] == "TABLE_CAT" + def test_get_schemas(self, sea_client, sea_session_id, mock_cursor): """Test the get_schemas method with various parameter combinations.""" # Mock the execute_command method mock_result_set = Mock() + # Add description attribute to the mock result set + mock_result_set.description = [ + ("databaseName", "string", None, None, None, None, True), + ("catalogName", "string", None, None, None, None, True), + ] + with patch.object( sea_client, "execute_command", return_value=mock_result_set ) as mock_execute: @@ -707,6 +721,10 @@ def test_get_schemas(self, sea_client, sea_session_id, mock_cursor): enforce_embedded_schema_correctness=False, ) + # Verify that column normalization was applied + assert result.description[0][0] == "TABLE_SCHEM" + assert result.description[1][0] == "TABLE_CATALOG" + # Case 2: With catalog and schema names result = sea_client.get_schemas( session_id=sea_session_id, @@ -746,6 +764,14 @@ def test_get_tables(self, sea_client, sea_session_id, mock_cursor): from databricks.sql.backend.sea.result_set import SeaResultSet mock_result_set = Mock(spec=SeaResultSet) + # Add description attribute to the mock result set + mock_result_set.description = [ + ("catalogName", "string", None, None, None, None, True), + ("namespace", "string", None, None, None, None, True), + ("tableName", "string", None, None, None, None, True), + ("tableType", "string", None, None, None, None, True), + ("remarks", "string", None, None, None, None, True), + ] with patch.object( sea_client, "execute_command", return_value=mock_result_set @@ -778,6 +804,13 @@ def test_get_tables(self, sea_client, sea_session_id, mock_cursor): ) mock_filter.assert_called_with(mock_result_set, None) + # Verify that column normalization was applied + assert result.description[0][0] == "TABLE_CAT" + assert result.description[1][0] == "TABLE_SCHEM" + assert result.description[2][0] == "TABLE_NAME" + assert result.description[3][0] == "TABLE_TYPE" + assert result.description[4][0] == "REMARKS" + # Case 2: With all parameters table_types = ["TABLE", "VIEW"] result = sea_client.get_tables( @@ -831,6 +864,19 @@ def test_get_columns(self, sea_client, sea_session_id, mock_cursor): """Test the get_columns method with various parameter combinations.""" # Mock the execute_command method mock_result_set = Mock() + # Add description attribute to the mock result set + mock_result_set.description = [ + ("catalogName", "string", None, None, None, None, True), + ("namespace", "string", None, None, None, None, True), + ("tableName", "string", None, None, None, None, True), + ("columnName", "string", None, None, None, None, True), + ("columnType", "string", None, None, None, None, True), + ("dataType", "string", None, None, None, None, True), + ("nullable", "string", None, None, None, None, True), + ("isNullable", "string", None, None, None, None, True), + ("ordinalPosition", "string", None, None, None, None, True), + ] + with patch.object( sea_client, "execute_command", return_value=mock_result_set ) as mock_execute: @@ -856,6 +902,17 @@ def test_get_columns(self, sea_client, sea_session_id, mock_cursor): enforce_embedded_schema_correctness=False, ) + # Verify that column normalization was applied + assert result.description[0][0] == "TABLE_CAT" + assert result.description[1][0] == "TABLE_SCHEM" + assert result.description[2][0] == "TABLE_NAME" + assert result.description[3][0] == "COLUMN_NAME" + assert result.description[4][0] == "TYPE_NAME" + assert result.description[5][0] == "DATA_TYPE" + assert result.description[6][0] == "NULLABLE" + assert result.description[7][0] == "IS_NULLABLE" + assert result.description[8][0] == "ORDINAL_POSITION" + # Case 2: With all parameters result = sea_client.get_columns( session_id=sea_session_id, From 61a920065b38db54647d9896c743d564c437b267 Mon Sep 17 00:00:00 2001 From: varun-edachali-dbx Date: Mon, 7 Jul 2025 04:27:02 +0000 Subject: [PATCH 2/5] add refs, correct COLUMN_COLUMNS mappings Signed-off-by: varun-edachali-dbx --- src/databricks/sql/backend/column_mapping.py | 40 +++++++++++--------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/src/databricks/sql/backend/column_mapping.py b/src/databricks/sql/backend/column_mapping.py index c2e80233..f1b31c4d 100644 --- a/src/databricks/sql/backend/column_mapping.py +++ b/src/databricks/sql/backend/column_mapping.py @@ -18,16 +18,19 @@ class MetadataOp(Enum): # Mappings from column names to standard column names -CATALOG_OP = { +# ref: CATALOG_COLUMNS in JDBC: https://github.com/databricks/databricks-jdbc/blob/e3d0d8dad683146a3afc3d501ddf0864ba086309/src/main/java/com/databricks/jdbc/common/MetadataResultConstants.java#L219 +CATALOG_COLUMNS = { "catalog": "TABLE_CAT", } -SCHEMA_OP = { +# ref: SCHEMA_COLUMNS in JDBC: https://github.com/databricks/databricks-jdbc/blob/e3d0d8dad683146a3afc3d501ddf0864ba086309/src/main/java/com/databricks/jdbc/common/MetadataResultConstants.java#L221 +SCHEMA_COLUMNS = { "databaseName": "TABLE_SCHEM", "catalogName": "TABLE_CATALOG", } -TABLE_OP = { +# ref: TABLE_COLUMNS in JDBC: https://github.com/databricks/databricks-jdbc/blob/e3d0d8dad683146a3afc3d501ddf0864ba086309/src/main/java/com/databricks/jdbc/common/MetadataResultConstants.java#L224 +TABLE_COLUMNS = { "catalogName": "TABLE_CAT", "namespace": "TABLE_SCHEM", "tableName": "TABLE_NAME", @@ -40,29 +43,30 @@ class MetadataOp(Enum): "REF_GENERATION_COLUMN": "REF_GENERATION", } -COLUMN_OP = { +# ref: COLUMN_COLUMNS in JDBC: https://github.com/databricks/databricks-jdbc/blob/e3d0d8dad683146a3afc3d501ddf0864ba086309/src/main/java/com/databricks/jdbc/common/MetadataResultConstants.java#L192 +COLUMN_COLUMNS = { "catalogName": "TABLE_CAT", "namespace": "TABLE_SCHEM", "tableName": "TABLE_NAME", - "columnName": "COLUMN_NAME", + "col_name": "COLUMN_NAME", "dataType": "DATA_TYPE", "columnType": "TYPE_NAME", "columnSize": "COLUMN_SIZE", "bufferLength": "BUFFER_LENGTH", "decimalDigits": "DECIMAL_DIGITS", "radix": "NUM_PREC_RADIX", - "nullable": "NULLABLE", + "Nullable": "NULLABLE", "remarks": "REMARKS", - "columnDef": "COLUMN_DEF", - "sqlDataType": "SQL_DATA_TYPE", - "sqlDatetimeSub": "SQL_DATETIME_SUB", - "charOctetLength": "CHAR_OCTET_LENGTH", + "columnType": "COLUMN_DEF", + "SQLDataType": "SQL_DATA_TYPE", + "SQLDatetimeSub": "SQL_DATETIME_SUB", + "CharOctetLength": "CHAR_OCTET_LENGTH", "ordinalPosition": "ORDINAL_POSITION", "isNullable": "IS_NULLABLE", - "scopeCatalog": "SCOPE_CATALOG", - "scopeSchema": "SCOPE_SCHEMA", - "scopeTable": "SCOPE_TABLE", - "sourceDataType": "SOURCE_DATA_TYPE", + "ScopeCatalog": "SCOPE_CATALOG", + "ScopeSchema": "SCOPE_SCHEMA", + "ScopeTable": "SCOPE_TABLE", + "SourceDataType": "SOURCE_DATA_TYPE", "isAutoIncrement": "IS_AUTOINCREMENT", "isGenerated": "IS_GENERATEDCOLUMN", } @@ -81,13 +85,13 @@ def normalise_metadata_result(result_set, operation: MetadataOp): # Select the appropriate mapping based on the operation mapping = None if operation == MetadataOp.CATALOGS: - mapping = CATALOG_OP + mapping = CATALOG_COLUMNS elif operation == MetadataOp.SCHEMAS: - mapping = SCHEMA_OP + mapping = SCHEMA_COLUMNS elif operation == MetadataOp.TABLES: - mapping = TABLE_OP + mapping = TABLE_COLUMNS elif operation == MetadataOp.COLUMNS: - mapping = COLUMN_OP + mapping = COLUMN_COLUMNS if mapping is None: return From c5d97bdeb55c1ab698ff7b331184df50143b171f Mon Sep 17 00:00:00 2001 From: varun-edachali-dbx Date: Mon, 7 Jul 2025 05:01:32 +0000 Subject: [PATCH 3/5] remove TYPE_NAME mapping Signed-off-by: varun-edachali-dbx --- src/databricks/sql/backend/column_mapping.py | 3 +- tests/unit/backend/test_column_mapping.py | 64 +++++++++----------- tests/unit/test_sea_backend.py | 6 +- 3 files changed, 34 insertions(+), 39 deletions(-) diff --git a/src/databricks/sql/backend/column_mapping.py b/src/databricks/sql/backend/column_mapping.py index f1b31c4d..c463f3ad 100644 --- a/src/databricks/sql/backend/column_mapping.py +++ b/src/databricks/sql/backend/column_mapping.py @@ -44,13 +44,14 @@ class MetadataOp(Enum): } # ref: COLUMN_COLUMNS in JDBC: https://github.com/databricks/databricks-jdbc/blob/e3d0d8dad683146a3afc3d501ddf0864ba086309/src/main/java/com/databricks/jdbc/common/MetadataResultConstants.java#L192 +# TYPE_NAME is not included because it is a duplicate target for columnType, and COLUMN_DEF is known to be returned by Thrift. +# TODO: check if TYPE_NAME is to be returned / also used by Thrift. COLUMN_COLUMNS = { "catalogName": "TABLE_CAT", "namespace": "TABLE_SCHEM", "tableName": "TABLE_NAME", "col_name": "COLUMN_NAME", "dataType": "DATA_TYPE", - "columnType": "TYPE_NAME", "columnSize": "COLUMN_SIZE", "bufferLength": "BUFFER_LENGTH", "decimalDigits": "DECIMAL_DIGITS", diff --git a/tests/unit/backend/test_column_mapping.py b/tests/unit/backend/test_column_mapping.py index 627e77fa..a5f4712b 100644 --- a/tests/unit/backend/test_column_mapping.py +++ b/tests/unit/backend/test_column_mapping.py @@ -9,10 +9,6 @@ from databricks.sql.backend.column_mapping import ( normalise_metadata_result, MetadataOp, - CATALOG_OP, - SCHEMA_OP, - TABLE_OP, - COLUMN_OP, ) @@ -95,25 +91,24 @@ def test_normalize_metadata_result_columns(self): ("catalogName", "string", None, None, None, None, True), ("namespace", "string", None, None, None, None, True), ("tableName", "string", None, None, None, None, True), - ("columnName", "string", None, None, None, None, True), + ("col_name", "string", None, None, None, None, True), ("dataType", "string", None, None, None, None, True), - ("columnType", "string", None, None, None, None, True), ("columnSize", "string", None, None, None, None, True), ("bufferLength", "string", None, None, None, None, True), ("decimalDigits", "string", None, None, None, None, True), ("radix", "string", None, None, None, None, True), - ("nullable", "string", None, None, None, None, True), + ("Nullable", "string", None, None, None, None, True), ("remarks", "string", None, None, None, None, True), - ("columnDef", "string", None, None, None, None, True), - ("sqlDataType", "string", None, None, None, None, True), - ("sqlDatetimeSub", "string", None, None, None, None, True), - ("charOctetLength", "string", None, None, None, None, True), + ("columnType", "string", None, None, None, None, True), + ("SQLDataType", "string", None, None, None, None, True), + ("SQLDatetimeSub", "string", None, None, None, None, True), + ("CharOctetLength", "string", None, None, None, None, True), ("ordinalPosition", "string", None, None, None, None, True), ("isNullable", "string", None, None, None, None, True), - ("scopeCatalog", "string", None, None, None, None, True), - ("scopeSchema", "string", None, None, None, None, True), - ("scopeTable", "string", None, None, None, None, True), - ("sourceDataType", "string", None, None, None, None, True), + ("ScopeCatalog", "string", None, None, None, None, True), + ("ScopeSchema", "string", None, None, None, None, True), + ("ScopeTable", "string", None, None, None, None, True), + ("SourceDataType", "string", None, None, None, None, True), ("isAutoIncrement", "string", None, None, None, None, True), ("isGenerated", "string", None, None, None, None, True), ("other_column", "string", None, None, None, None, True), @@ -128,26 +123,25 @@ def test_normalize_metadata_result_columns(self): assert mock_result.description[2][0] == "TABLE_NAME" assert mock_result.description[3][0] == "COLUMN_NAME" assert mock_result.description[4][0] == "DATA_TYPE" - assert mock_result.description[5][0] == "TYPE_NAME" - assert mock_result.description[6][0] == "COLUMN_SIZE" - assert mock_result.description[7][0] == "BUFFER_LENGTH" - assert mock_result.description[8][0] == "DECIMAL_DIGITS" - assert mock_result.description[9][0] == "NUM_PREC_RADIX" - assert mock_result.description[10][0] == "NULLABLE" - assert mock_result.description[11][0] == "REMARKS" - assert mock_result.description[12][0] == "COLUMN_DEF" - assert mock_result.description[13][0] == "SQL_DATA_TYPE" - assert mock_result.description[14][0] == "SQL_DATETIME_SUB" - assert mock_result.description[15][0] == "CHAR_OCTET_LENGTH" - assert mock_result.description[16][0] == "ORDINAL_POSITION" - assert mock_result.description[17][0] == "IS_NULLABLE" - assert mock_result.description[18][0] == "SCOPE_CATALOG" - assert mock_result.description[19][0] == "SCOPE_SCHEMA" - assert mock_result.description[20][0] == "SCOPE_TABLE" - assert mock_result.description[21][0] == "SOURCE_DATA_TYPE" - assert mock_result.description[22][0] == "IS_AUTOINCREMENT" - assert mock_result.description[23][0] == "IS_GENERATEDCOLUMN" - assert mock_result.description[24][0] == "other_column" + assert mock_result.description[5][0] == "COLUMN_SIZE" + assert mock_result.description[6][0] == "BUFFER_LENGTH" + assert mock_result.description[7][0] == "DECIMAL_DIGITS" + assert mock_result.description[8][0] == "NUM_PREC_RADIX" + assert mock_result.description[9][0] == "NULLABLE" + assert mock_result.description[10][0] == "REMARKS" + assert mock_result.description[11][0] == "COLUMN_DEF" + assert mock_result.description[12][0] == "SQL_DATA_TYPE" + assert mock_result.description[13][0] == "SQL_DATETIME_SUB" + assert mock_result.description[14][0] == "CHAR_OCTET_LENGTH" + assert mock_result.description[15][0] == "ORDINAL_POSITION" + assert mock_result.description[16][0] == "IS_NULLABLE" + assert mock_result.description[17][0] == "SCOPE_CATALOG" + assert mock_result.description[18][0] == "SCOPE_SCHEMA" + assert mock_result.description[19][0] == "SCOPE_TABLE" + assert mock_result.description[20][0] == "SOURCE_DATA_TYPE" + assert mock_result.description[21][0] == "IS_AUTOINCREMENT" + assert mock_result.description[22][0] == "IS_GENERATEDCOLUMN" + assert mock_result.description[23][0] == "other_column" def test_normalize_metadata_result_unknown_operation(self): """Test normalizing with an unknown operation type.""" diff --git a/tests/unit/test_sea_backend.py b/tests/unit/test_sea_backend.py index d91bce82..93f53288 100644 --- a/tests/unit/test_sea_backend.py +++ b/tests/unit/test_sea_backend.py @@ -869,10 +869,10 @@ def test_get_columns(self, sea_client, sea_session_id, mock_cursor): ("catalogName", "string", None, None, None, None, True), ("namespace", "string", None, None, None, None, True), ("tableName", "string", None, None, None, None, True), - ("columnName", "string", None, None, None, None, True), + ("col_name", "string", None, None, None, None, True), ("columnType", "string", None, None, None, None, True), ("dataType", "string", None, None, None, None, True), - ("nullable", "string", None, None, None, None, True), + ("Nullable", "string", None, None, None, None, True), ("isNullable", "string", None, None, None, None, True), ("ordinalPosition", "string", None, None, None, None, True), ] @@ -907,7 +907,7 @@ def test_get_columns(self, sea_client, sea_session_id, mock_cursor): assert result.description[1][0] == "TABLE_SCHEM" assert result.description[2][0] == "TABLE_NAME" assert result.description[3][0] == "COLUMN_NAME" - assert result.description[4][0] == "TYPE_NAME" + assert result.description[4][0] == "COLUMN_DEF" assert result.description[5][0] == "DATA_TYPE" assert result.description[6][0] == "NULLABLE" assert result.description[7][0] == "IS_NULLABLE" From 48bb680ec6f0d769f4ead6cc2f6ad4e822731608 Mon Sep 17 00:00:00 2001 From: varun-edachali-dbx Date: Mon, 7 Jul 2025 05:02:27 +0000 Subject: [PATCH 4/5] move test-column-mapping into tests/unit/ Signed-off-by: varun-edachali-dbx --- tests/unit/{backend => }/test_column_mapping.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/unit/{backend => }/test_column_mapping.py (100%) diff --git a/tests/unit/backend/test_column_mapping.py b/tests/unit/test_column_mapping.py similarity index 100% rename from tests/unit/backend/test_column_mapping.py rename to tests/unit/test_column_mapping.py From 8f12c33e8f89842448d0d0da494b6e70139f7eed Mon Sep 17 00:00:00 2001 From: varun-edachali-dbx Date: Mon, 7 Jul 2025 05:13:16 +0000 Subject: [PATCH 5/5] formatting (black) Signed-off-by: varun-edachali-dbx --- src/databricks/sql/backend/column_mapping.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/databricks/sql/backend/column_mapping.py b/src/databricks/sql/backend/column_mapping.py index c463f3ad..6305d3b5 100644 --- a/src/databricks/sql/backend/column_mapping.py +++ b/src/databricks/sql/backend/column_mapping.py @@ -44,8 +44,8 @@ class MetadataOp(Enum): } # ref: COLUMN_COLUMNS in JDBC: https://github.com/databricks/databricks-jdbc/blob/e3d0d8dad683146a3afc3d501ddf0864ba086309/src/main/java/com/databricks/jdbc/common/MetadataResultConstants.java#L192 -# TYPE_NAME is not included because it is a duplicate target for columnType, and COLUMN_DEF is known to be returned by Thrift. -# TODO: check if TYPE_NAME is to be returned / also used by Thrift. +# TYPE_NAME is not included because it is a duplicate target for columnType, and COLUMN_DEF is known to be returned by Thrift. +# TODO: check if TYPE_NAME is to be returned / also used by Thrift. COLUMN_COLUMNS = { "catalogName": "TABLE_CAT", "namespace": "TABLE_SCHEM",