Skip to content

Commit 4f551ce

Browse files
Merge branch 'main' into feature/llm-transpile
2 parents 7a50a6c + e19d9b3 commit 4f551ce

40 files changed

+698
-643
lines changed

.github/workflows/acceptance.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,15 @@ jobs:
3838
python-version: '3.10'
3939

4040
- name: Install hatch
41-
run: pip install hatch==1.14.1 'click<8.3.0' # https://github.com/pallets/click/issues/3065
41+
run: pip install hatch==1.14.2
4242

4343
- name: Install MSSQL ODBC Driver
4444
run: |
4545
chmod +x $GITHUB_WORKSPACE/.github/scripts/setup_mssql_odbc.sh
4646
$GITHUB_WORKSPACE/.github/scripts/setup_mssql_odbc.sh
4747
4848
- name: Run integration tests
49-
uses: databrickslabs/sandbox/acceptance@acceptance/v0.4.2
49+
uses: databrickslabs/sandbox/acceptance@acceptance/v0.4.4
5050
with:
5151
vault_uri: ${{ secrets.VAULT_URI }}
5252
directory: ${{ github.workspace }}

.github/workflows/push.yml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ on:
1515
- main
1616

1717
env:
18-
HATCH_VERSION: 1.14.1
18+
HATCH_VERSION: 1.14.2
1919

2020
jobs:
2121
test-python:
@@ -34,7 +34,7 @@ jobs:
3434
python-version: '3.10'
3535

3636
- name: Install hatch
37-
run: pip install hatch==$HATCH_VERSION 'click<8.3.0' # https://github.com/pallets/click/issues/3065
37+
run: pip install hatch==$HATCH_VERSION
3838

3939
- name: Run unit tests
4040
run: hatch run test
@@ -61,7 +61,7 @@ jobs:
6161
python-version: '3.10'
6262

6363
- name: Install hatch
64-
run: pip install hatch==$HATCH_VERSION 'click<8.3.0' # https://github.com/pallets/click/issues/3065
64+
run: pip install hatch==$HATCH_VERSION
6565

6666
- name: Setup Spark Remote
6767
run: |
@@ -94,7 +94,7 @@ jobs:
9494
python-version: 3.10.x
9595

9696
- name: Install hatch
97-
run: pip install hatch==$HATCH_VERSION 'click<8.3.0' # https://github.com/pallets/click/issues/3065
97+
run: pip install hatch==$HATCH_VERSION
9898

9999
- name: Reformat code
100100
run: make fmt
@@ -147,7 +147,7 @@ jobs:
147147
python-version: 3.10.x
148148

149149
- name: Install hatch
150-
run: pip install hatch==$HATCH_VERSION 'click<8.3.0' # https://github.com/pallets/click/issues/3065
150+
run: pip install hatch==$HATCH_VERSION
151151

152152
- name: Install Databricks CLI
153153
uses: databricks/setup-cli@main
@@ -199,11 +199,11 @@ jobs:
199199

200200
- name: Install hatch (Windows)
201201
if: runner.os == 'Windows'
202-
run: pip install hatch==$env:HATCH_VERSION 'click<8.3.0' # https://github.com/pallets/click/issues/3065
202+
run: pip install hatch==$env:HATCH_VERSION
203203

204204
- name: Install hatch (Non-Windows)
205205
if: runner.os != 'Windows'
206-
run: pip install hatch==$HATCH_VERSION 'click<8.3.0' # https://github.com/pallets/click/issues/3065
206+
run: pip install hatch==$HATCH_VERSION
207207

208208
- name: Install Databricks CLI
209209
uses: databricks/setup-cli@main

.github/workflows/release.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ jobs:
2727

2828
- name: Build wheels
2929
run: |
30-
pip install hatch==1.14.1 'click<8.3.0' # https://github.com/pallets/click/issues/3065
30+
pip install hatch==1.14.2
3131
hatch build
3232
3333
- name: Draft release
@@ -41,7 +41,7 @@ jobs:
4141
name: Publish package distributions to PyPI
4242

4343
- name: Sign artifacts with Sigstore
44-
uses: sigstore/gh-action-sigstore-python@v3.0.0
44+
uses: sigstore/gh-action-sigstore-python@v3.0.1
4545
with:
4646
inputs: |
4747
dist/databricks_*.whl

CHANGELOG.md

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,78 @@
11
# Version changelog
22

3+
## # Lakebridge v0.10.11 Release Notes
4+
5+
## Analyzer
6+
No updates in this release
7+
8+
## Converters
9+
10+
### General
11+
- Fixed special character handling in filenames by introducing from_uri() helper function for safer URI handling
12+
- Ensured SQL converter returns UTF-8 encoded files for proper character encoding
13+
- Fixed filename to correctly output databricks_conversion_supplements.py supplemental file
14+
- Fixed broken splitter URL by updating directory naming conventions from "Downloads" to "downloads"
15+
- Improved handling of encoding-related errors by catching UnicodeDecodeError and LookupError exceptions during file processing, creating TranspileError with specific encoding-error codes instead of stopping
16+
17+
### Morpheus
18+
19+
#### Snowflake
20+
- Added support for TRUNCATE TABLE statements with proper IR and translation support
21+
- Correctly support and system variables
22+
- Refactored and extended grammar and AST support for SQL procedure creation with improved handling of raw string literals
23+
- Enhanced schema reconciliation functionality to support Snowflake arrays, addressing the corner case where Databricks arrays are typed and Snowflake arrays are untyped
24+
25+
#### TSQL
26+
- Added support for TRUNCATE TABLE statements with proper IR and translation support
27+
- Support full CREATE and ALTER INDEX statements in TSQL parsing, rejecting INDEX CREATE/ALTER statements sensibly instead of raising syntax errors
28+
- Fixed implementation of IF scripting blocks with improvements to SQL parser, grammar enhancements, and enhanced scripting grammar for more robust handling of block statements and conditional branches
29+
- Allow CLUSTERED to be an identifier to improve CREATE TABLE syntax as a CONSTRAINT qualifier
30+
- Support percentage expressions in TSQL options (e.g., OPT = 42%) instead of raising parsing errors
31+
- Added support for REVOKE statements, similar to existing GRANT statement implementation
32+
- Ensure that ROWS and OBJECTS can be used as identifiers even with Jinja templates
33+
- Correctly support and system variables
34+
35+
#### General (Multiple Dialects)
36+
- Support comments on column declarations when generating SQL and renamed legacy builders for consistency
37+
- Refactored IR around CREATE FUNCTION and CREATE PROCEDURE, unifying all ways to create stored procedures under a single CreateStoredProcedure IR node and all ways to create user defined functions under a single CreateUDF IR node
38+
- Implemented grammar and IR placeholders for named windows, introducing initial support for the SQL standard WINDOW clause in parser grammar
39+
40+
### BladeBridge
41+
42+
#### Oracle
43+
- Removed unsupported Oracle DDL constraints (add/create constraint unique) and extraneous TBLPROPERTIES from converted output
44+
45+
#### MSSQL
46+
- Added handle_xml_nodes function for MS SQL processing
47+
- Fixed multiple MSSQL issues including CTEs in views/stored procedures, ADD CONSTRAINT problems, DEFAULT value handling, and parameter data types
48+
49+
#### Synapse
50+
- Fixed multiple Synapse issues including CTEs in views/stored procedures, ADD CONSTRAINT problems, DEFAULT value handling, parameter data types, error handling in stored procedures, and Synapse-specific features (e.g., table distribution)
51+
52+
#### Teradata
53+
- Added Teradata function mappings including ZEROIFNULL, TEMPORAL_TIMESTAMP, TRYCAST, ANY, FIRST, NULLIFZERO, DECODE with different parameter counts, and HASHAMP
54+
- Removed collect statistics and lock table statements
55+
56+
#### DataStage
57+
- Implemented DataStage Checksum component translation to SparkSQL equivalent and fixed Pyspark checksum translation to use MD5() instead of SHA2()
58+
59+
## Reconcile
60+
- Added handling for special characters in reconcile aggregate, enhancing the library to handle special characters in column names by properly delimiting identifiers in SQL queries
61+
- Fixed deploy reconcile jobs by updating wheel file handling, simplifying deployment process to use single wheel path, and fixing broken documentation links
62+
63+
## Documentation
64+
- Fixed download link in docs (reconcile automation) by replacing broken markdown link with JSX link utilizing useBaseUrl hook
65+
66+
## General
67+
- Implemented new describe-transpile CLI subcommand that describes installed transpilers, including their versions, configuration paths, and supported source dialects
68+
- Switched from urllib to requests library for making HTTP calls to PyPI and Maven Central, with default 60-second timeout and improved error handling
69+
- Work around DATABRICKS_HOST normalization issue during install and uninstall by introducing new Lakebridge subclass with appropriate workspace client
70+
71+
# Dependency updates
72+
* Bump Databricks SDK Version to 0.67.0 by @goodwillpunning in https://github.com/databrickslabs/lakebridge/pull/2062
73+
* Bump sigstore/gh-action-sigstore-python from 3.0.0 to 3.0.1 by @dependabot[bot] in https://github.com/databrickslabs/lakebridge/pull/1753
74+
75+
Special thanks to @BrianDeacon for his contribution to fix https://github.com/databrickslabs/lakebridge/issues/1858
376
## 0.10.10
477

578
## Analyzer

pyproject.toml

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,16 @@ classifiers = [
1515
"Programming Language :: Python",
1616
"Programming Language :: Python :: 3.10",
1717
"Programming Language :: Python :: 3.11",
18+
"Programming Language :: Python :: 3.12",
19+
"Programming Language :: Python :: 3.13",
1820
"Programming Language :: Python :: Implementation :: CPython",
1921
"Environment :: Console",
2022
"Framework :: Pytest",
2123
"Intended Audience :: Developers",
2224
"Intended Audience :: System Administrators",
2325
"Operating System :: MacOS",
2426
"Operating System :: Microsoft :: Windows",
25-
"Topic :: Software Development :: Libraries",
27+
"Operating System :: POSIX :: Linux",
2628
"Topic :: Utilities",
2729
]
2830

@@ -31,9 +33,9 @@ dependencies = [
3133
"standard-distutils~=3.11.9; python_version>='3.11'",
3234
"databricks-bb-analyzer~=0.1.9",
3335
"sqlglot==26.1.3",
34-
"databricks-labs-blueprint[yaml]>=0.11.3,<0.12.0",
36+
"databricks-labs-blueprint[yaml]>=0.11.4,<0.12.0",
3537
"databricks-labs-lsql==0.16.0",
36-
"cryptography>=44.0.2,<45.1.0",
38+
"cryptography>=44.0.2,<46.1.0",
3739
"pyodbc~=5.2.0",
3840
"SQLAlchemy~=2.0.40",
3941
"pygls~=2.0.0a2",
@@ -66,26 +68,25 @@ path = ".venv"
6668
dependencies = [
6769
"pylint~=3.2.2",
6870
"pylint-pytest==2.0.0a0",
69-
"coverage[toml]~=7.8.0",
70-
"pytest~=8.3.5",
71-
"pytest-cov>=5.0.0,<6.0.0",
72-
"pytest-asyncio~=0.26.0",
73-
"pytest-xdist~=3.5.0",
71+
"coverage[toml]~=7.10.7",
72+
"pytest~=8.4.2",
73+
"pytest-cov~=7.0.0",
74+
"pytest-asyncio~=1.2.0",
75+
"pytest-xdist~=3.8.0",
7476
"pytest-timeout~=2.4.0",
75-
"black~=25.1.0",
76-
"ruff~=0.11.6",
77+
"black~=25.9.0",
78+
"ruff~=0.13.2",
7779
"databricks-connect==15.1",
7880
"types-requests>=2.28.1,<3", # Matches the 'requests' above.
7981
"types-pyYAML~=6.0.12",
8082
"types-pytz~=2025.2",
8183
"databricks-labs-pylint~=0.4.0",
8284
"databricks-labs-pytester>=0.3.0",
83-
"mypy~=1.10.0",
85+
"mypy~=1.18.2",
8486
"numpy~=1.26.4",
8587
"pandas~=2.3.1",
8688
"pandas-stubs~=2.3.0.250703",
8789
"cattrs>=25.2.0",
88-
"click<8.3.0", # https://github.com/pallets/click/issues/3065 for ruff
8990
"faker"
9091
]
9192

@@ -151,7 +152,6 @@ lint.ignore = [
151152
# Ignore Exception must not use a string literal, assign to variable first
152153
"EM101",
153154
"PLR2004",
154-
"UP038", # Use `X | Y` in `isinstance` call instead of `(X, Y)`
155155
]
156156
extend-exclude = [
157157
"notebooks/*.py"
@@ -459,7 +459,7 @@ max-bool-expr = 5
459459
max-branches = 20
460460

461461
# Maximum number of locals for function / method body.
462-
max-locals = 19
462+
max-locals = 20
463463

464464
# Maximum number of parents for a class (see R0901).
465465
max-parents = 7
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
# DO NOT MODIFY THIS FILE
2-
__version__ = "0.10.10"
2+
__version__ = "0.10.11"
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
from ._constants import (
2+
PRODUCT_NAME,
3+
PRODUCT_PATH_PREFIX,
4+
PROFILER_SOURCE_SYSTEM,
5+
PLATFORM_TO_SOURCE_TECHNOLOGY,
6+
CONNECTOR_REQUIRED,
7+
)
8+
9+
__all__ = [
10+
"PRODUCT_NAME",
11+
"PRODUCT_PATH_PREFIX",
12+
"PROFILER_SOURCE_SYSTEM",
13+
"PLATFORM_TO_SOURCE_TECHNOLOGY",
14+
"CONNECTOR_REQUIRED",
15+
]
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
from pathlib import Path
2+
3+
PRODUCT_NAME = "lakebridge"
4+
PRODUCT_PATH_PREFIX = Path.home() / ".databricks" / "labs" / PRODUCT_NAME / "lib"
5+
6+
PLATFORM_TO_SOURCE_TECHNOLOGY = {
7+
"synapse": "src/databricks/labs/lakebridge/resources/assessments/synapse/pipeline_config.yml",
8+
}
9+
10+
# TODO modify this PLATFORM_TO_SOURCE_TECHNOLOGY.keys() once all platforms are supported
11+
PROFILER_SOURCE_SYSTEM = ["mssql", "synapse"]
12+
13+
# This flag indicates whether a connector is required for the source system when pipeline is trigger
14+
# For example in the case of synapse no connector is required and the python scripts
15+
# manage the connection by directly reading the credentials files
16+
# Revisit this when more source systems are added to standardize the approach
17+
CONNECTOR_REQUIRED = {
18+
"synapse": False,
19+
"mssql": True,
20+
}

0 commit comments

Comments
 (0)