Skip to content

Commit 71c1022

Browse files
authored
feat(databricks-jdbc-driver): Support M2M OAuth Authentication (#9651)
* remove databrickAcceptPolicy env * fix databricks env var names * prepare CI * feat(databricks-jdbc-driver): Support M2M OAuth Authentication * implement cache for access token * update authProps * fix * update OSS_DRIVER_VERSION
1 parent ae10a76 commit 71c1022

File tree

8 files changed

+172
-67
lines changed

8 files changed

+172
-67
lines changed

.github/workflows/drivers-tests.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,8 @@ jobs:
355355
DRIVERS_TESTS_CUBEJS_DB_DATABRICKS_TOKEN: ${{ secrets.DRIVERS_TESTS_CUBEJS_DB_DATABRICKS_TOKEN }}
356356
DRIVERS_TESTS_CUBEJS_DB_EXPORT_BUCKET_AWS_KEY: ${{ secrets.DRIVERS_TESTS_CUBEJS_DB_EXPORT_BUCKET_AWS_KEY }}
357357
DRIVERS_TESTS_CUBEJS_DB_EXPORT_BUCKET_AWS_SECRET: ${{ secrets.DRIVERS_TESTS_CUBEJS_DB_EXPORT_BUCKET_AWS_SECRET }}
358+
DRIVERS_TESTS_CUBEJS_DB_DATABRICKS_OAUTH_CLIENT_ID: ${{ secrets.DRIVERS_TESTS_CUBEJS_DB_DATABRICKS_OAUTH_CLIENT_ID }}
359+
DRIVERS_TESTS_CUBEJS_DB_DATABRICKS_OAUTH_CLIENT_SECRET: ${{ secrets.DRIVERS_TESTS_CUBEJS_DB_DATABRICKS_OAUTH_CLIENT_SECRET }}
358360

359361
# Redshift
360362
DRIVERS_TESTS_CUBEJS_DB_REDSHIFT_HOST: ${{ secrets.DRIVERS_TESTS_CUBEJS_DB_REDSHIFT_HOST }}

packages/cubejs-backend-shared/src/env.ts

Lines changed: 28 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -951,25 +951,10 @@ const variables: Record<string, (...args: any) => any> = {
951951
* Databricks Driver *
952952
***************************************************************** */
953953

954-
/**
955-
* Accept Databricks policy flag. This environment variable doesn't
956-
* need to be split by the data source.
957-
* TODO: Tech-debt: Remove totally someday
958-
*/
959-
databrickAcceptPolicy: () => {
960-
const val = get('CUBEJS_DB_DATABRICKS_ACCEPT_POLICY').asBoolStrict();
961-
962-
if (val !== undefined) {
963-
console.warn(
964-
'The CUBEJS_DB_DATABRICKS_ACCEPT_POLICY is not needed anymore. Please, remove it'
965-
);
966-
}
967-
},
968-
969954
/**
970955
* Databricks jdbc-connection url.
971956
*/
972-
databrickUrl: ({
957+
databricksUrl: ({
973958
dataSource,
974959
}: {
975960
dataSource: string,
@@ -990,7 +975,7 @@ const variables: Record<string, (...args: any) => any> = {
990975
/**
991976
* Databricks jdbc-connection token.
992977
*/
993-
databrickToken: ({
978+
databricksToken: ({
994979
dataSource,
995980
}: {
996981
dataSource: string,
@@ -1012,6 +997,32 @@ const variables: Record<string, (...args: any) => any> = {
1012997
keyByDataSource('CUBEJS_DB_DATABRICKS_CATALOG', dataSource)
1013998
],
1014999

1000+
/**
1001+
* Databricks OAuth Client ID (Same as the service principal UUID)
1002+
*/
1003+
databricksOAuthClientId: ({
1004+
dataSource,
1005+
}: {
1006+
dataSource: string,
1007+
}) => (
1008+
process.env[
1009+
keyByDataSource('CUBEJS_DB_DATABRICKS_OAUTH_CLIENT_ID', dataSource)
1010+
]
1011+
),
1012+
1013+
/**
1014+
* Databricks OAuth Client Secret.
1015+
*/
1016+
databricksOAuthClientSecret: ({
1017+
dataSource,
1018+
}: {
1019+
dataSource: string,
1020+
}) => (
1021+
process.env[
1022+
keyByDataSource('CUBEJS_DB_DATABRICKS_OAUTH_CLIENT_SECRET', dataSource)
1023+
]
1024+
),
1025+
10151026
/** ****************************************************************
10161027
* Athena Driver *
10171028
***************************************************************** */

packages/cubejs-backend-shared/test/db_env_multi.test.ts

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1105,31 +1105,31 @@ describe('Multiple datasources', () => {
11051105
process.env.CUBEJS_DB_DATABRICKS_URL = 'default1';
11061106
process.env.CUBEJS_DS_POSTGRES_DB_DATABRICKS_URL = 'postgres1';
11071107
process.env.CUBEJS_DS_WRONG_DB_DATABRICKS_URL = 'wrong1';
1108-
expect(getEnv('databrickUrl', { dataSource: 'default' })).toEqual('default1');
1109-
expect(getEnv('databrickUrl', { dataSource: 'postgres' })).toEqual('postgres1');
1110-
expect(() => getEnv('databrickUrl', { dataSource: 'wrong' })).toThrow(
1108+
expect(getEnv('databricksUrl', { dataSource: 'default' })).toEqual('default1');
1109+
expect(getEnv('databricksUrl', { dataSource: 'postgres' })).toEqual('postgres1');
1110+
expect(() => getEnv('databricksUrl', { dataSource: 'wrong' })).toThrow(
11111111
'The wrong data source is missing in the declared CUBEJS_DATASOURCES.'
11121112
);
11131113

11141114
process.env.CUBEJS_DB_DATABRICKS_URL = 'default2';
11151115
process.env.CUBEJS_DS_POSTGRES_DB_DATABRICKS_URL = 'postgres2';
11161116
process.env.CUBEJS_DS_WRONG_DB_DATABRICKS_URL = 'wrong2';
1117-
expect(getEnv('databrickUrl', { dataSource: 'default' })).toEqual('default2');
1118-
expect(getEnv('databrickUrl', { dataSource: 'postgres' })).toEqual('postgres2');
1119-
expect(() => getEnv('databrickUrl', { dataSource: 'wrong' })).toThrow(
1117+
expect(getEnv('databricksUrl', { dataSource: 'default' })).toEqual('default2');
1118+
expect(getEnv('databricksUrl', { dataSource: 'postgres' })).toEqual('postgres2');
1119+
expect(() => getEnv('databricksUrl', { dataSource: 'wrong' })).toThrow(
11201120
'The wrong data source is missing in the declared CUBEJS_DATASOURCES.'
11211121
);
11221122

11231123
delete process.env.CUBEJS_DB_DATABRICKS_URL;
11241124
delete process.env.CUBEJS_DS_POSTGRES_DB_DATABRICKS_URL;
11251125
delete process.env.CUBEJS_DS_WRONG_DB_DATABRICKS_URL;
1126-
expect(() => getEnv('databrickUrl', { dataSource: 'default' })).toThrow(
1126+
expect(() => getEnv('databricksUrl', { dataSource: 'default' })).toThrow(
11271127
'The CUBEJS_DB_DATABRICKS_URL is required and missing.'
11281128
);
1129-
expect(() => getEnv('databrickUrl', { dataSource: 'postgres' })).toThrow(
1129+
expect(() => getEnv('databricksUrl', { dataSource: 'postgres' })).toThrow(
11301130
'The CUBEJS_DS_POSTGRES_DB_DATABRICKS_URL is required and missing.'
11311131
);
1132-
expect(() => getEnv('databrickUrl', { dataSource: 'wrong' })).toThrow(
1132+
expect(() => getEnv('databricksUrl', { dataSource: 'wrong' })).toThrow(
11331133
'The wrong data source is missing in the declared CUBEJS_DATASOURCES.'
11341134
);
11351135
});
@@ -1138,27 +1138,27 @@ describe('Multiple datasources', () => {
11381138
process.env.CUBEJS_DB_DATABRICKS_TOKEN = 'default1';
11391139
process.env.CUBEJS_DS_POSTGRES_DB_DATABRICKS_TOKEN = 'postgres1';
11401140
process.env.CUBEJS_DS_WRONG_DB_DATABRICKS_TOKEN = 'wrong1';
1141-
expect(getEnv('databrickToken', { dataSource: 'default' })).toEqual('default1');
1142-
expect(getEnv('databrickToken', { dataSource: 'postgres' })).toEqual('postgres1');
1143-
expect(() => getEnv('databrickToken', { dataSource: 'wrong' })).toThrow(
1141+
expect(getEnv('databricksToken', { dataSource: 'default' })).toEqual('default1');
1142+
expect(getEnv('databricksToken', { dataSource: 'postgres' })).toEqual('postgres1');
1143+
expect(() => getEnv('databricksToken', { dataSource: 'wrong' })).toThrow(
11441144
'The wrong data source is missing in the declared CUBEJS_DATASOURCES.'
11451145
);
11461146

11471147
process.env.CUBEJS_DB_DATABRICKS_TOKEN = 'default2';
11481148
process.env.CUBEJS_DS_POSTGRES_DB_DATABRICKS_TOKEN = 'postgres2';
11491149
process.env.CUBEJS_DS_WRONG_DB_DATABRICKS_TOKEN = 'wrong2';
1150-
expect(getEnv('databrickToken', { dataSource: 'default' })).toEqual('default2');
1151-
expect(getEnv('databrickToken', { dataSource: 'postgres' })).toEqual('postgres2');
1152-
expect(() => getEnv('databrickToken', { dataSource: 'wrong' })).toThrow(
1150+
expect(getEnv('databricksToken', { dataSource: 'default' })).toEqual('default2');
1151+
expect(getEnv('databricksToken', { dataSource: 'postgres' })).toEqual('postgres2');
1152+
expect(() => getEnv('databricksToken', { dataSource: 'wrong' })).toThrow(
11531153
'The wrong data source is missing in the declared CUBEJS_DATASOURCES.'
11541154
);
11551155

11561156
delete process.env.CUBEJS_DB_DATABRICKS_TOKEN;
11571157
delete process.env.CUBEJS_DS_POSTGRES_DB_DATABRICKS_TOKEN;
11581158
delete process.env.CUBEJS_DS_WRONG_DB_DATABRICKS_TOKEN;
1159-
expect(getEnv('databrickToken', { dataSource: 'default' })).toBeUndefined();
1160-
expect(getEnv('databrickToken', { dataSource: 'postgres' })).toBeUndefined();
1161-
expect(() => getEnv('databrickToken', { dataSource: 'wrong' })).toThrow(
1159+
expect(getEnv('databricksToken', { dataSource: 'default' })).toBeUndefined();
1160+
expect(getEnv('databricksToken', { dataSource: 'postgres' })).toBeUndefined();
1161+
expect(() => getEnv('databricksToken', { dataSource: 'wrong' })).toThrow(
11621162
'The wrong data source is missing in the declared CUBEJS_DATASOURCES.'
11631163
);
11641164
});

packages/cubejs-backend-shared/test/db_env_single.test.ts

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -705,42 +705,42 @@ describe('Single datasources', () => {
705705

706706
test('getEnv("databrickUrl")', () => {
707707
process.env.CUBEJS_DB_DATABRICKS_URL = 'default1';
708-
expect(getEnv('databrickUrl', { dataSource: 'default' })).toEqual('default1');
709-
expect(getEnv('databrickUrl', { dataSource: 'postgres' })).toEqual('default1');
710-
expect(getEnv('databrickUrl', { dataSource: 'wrong' })).toEqual('default1');
708+
expect(getEnv('databricksUrl', { dataSource: 'default' })).toEqual('default1');
709+
expect(getEnv('databricksUrl', { dataSource: 'postgres' })).toEqual('default1');
710+
expect(getEnv('databricksUrl', { dataSource: 'wrong' })).toEqual('default1');
711711

712712
process.env.CUBEJS_DB_DATABRICKS_URL = 'default2';
713-
expect(getEnv('databrickUrl', { dataSource: 'default' })).toEqual('default2');
714-
expect(getEnv('databrickUrl', { dataSource: 'postgres' })).toEqual('default2');
715-
expect(getEnv('databrickUrl', { dataSource: 'wrong' })).toEqual('default2');
713+
expect(getEnv('databricksUrl', { dataSource: 'default' })).toEqual('default2');
714+
expect(getEnv('databricksUrl', { dataSource: 'postgres' })).toEqual('default2');
715+
expect(getEnv('databricksUrl', { dataSource: 'wrong' })).toEqual('default2');
716716

717717
delete process.env.CUBEJS_DB_DATABRICKS_URL;
718-
expect(() => getEnv('databrickUrl', { dataSource: 'default' })).toThrow(
718+
expect(() => getEnv('databricksUrl', { dataSource: 'default' })).toThrow(
719719
'The CUBEJS_DB_DATABRICKS_URL is required and missing.'
720720
);
721-
expect(() => getEnv('databrickUrl', { dataSource: 'postgres' })).toThrow(
721+
expect(() => getEnv('databricksUrl', { dataSource: 'postgres' })).toThrow(
722722
'The CUBEJS_DB_DATABRICKS_URL is required and missing.'
723723
);
724-
expect(() => getEnv('databrickUrl', { dataSource: 'wrong' })).toThrow(
724+
expect(() => getEnv('databricksUrl', { dataSource: 'wrong' })).toThrow(
725725
'The CUBEJS_DB_DATABRICKS_URL is required and missing.'
726726
);
727727
});
728728

729729
test('getEnv("databrickToken")', () => {
730730
process.env.CUBEJS_DB_DATABRICKS_TOKEN = 'default1';
731-
expect(getEnv('databrickToken', { dataSource: 'default' })).toEqual('default1');
732-
expect(getEnv('databrickToken', { dataSource: 'postgres' })).toEqual('default1');
733-
expect(getEnv('databrickToken', { dataSource: 'wrong' })).toEqual('default1');
731+
expect(getEnv('databricksToken', { dataSource: 'default' })).toEqual('default1');
732+
expect(getEnv('databricksToken', { dataSource: 'postgres' })).toEqual('default1');
733+
expect(getEnv('databricksToken', { dataSource: 'wrong' })).toEqual('default1');
734734

735735
process.env.CUBEJS_DB_DATABRICKS_TOKEN = 'default2';
736-
expect(getEnv('databrickToken', { dataSource: 'default' })).toEqual('default2');
737-
expect(getEnv('databrickToken', { dataSource: 'postgres' })).toEqual('default2');
738-
expect(getEnv('databrickToken', { dataSource: 'wrong' })).toEqual('default2');
736+
expect(getEnv('databricksToken', { dataSource: 'default' })).toEqual('default2');
737+
expect(getEnv('databricksToken', { dataSource: 'postgres' })).toEqual('default2');
738+
expect(getEnv('databricksToken', { dataSource: 'wrong' })).toEqual('default2');
739739

740740
delete process.env.CUBEJS_DB_DATABRICKS_TOKEN;
741-
expect(getEnv('databrickToken', { dataSource: 'default' })).toBeUndefined();
742-
expect(getEnv('databrickToken', { dataSource: 'postgres' })).toBeUndefined();
743-
expect(getEnv('databrickToken', { dataSource: 'wrong' })).toBeUndefined();
741+
expect(getEnv('databricksToken', { dataSource: 'default' })).toBeUndefined();
742+
expect(getEnv('databricksToken', { dataSource: 'postgres' })).toBeUndefined();
743+
expect(getEnv('databricksToken', { dataSource: 'wrong' })).toBeUndefined();
744744
});
745745

746746
test('getEnv("databricksCatalog")', () => {

packages/cubejs-databricks-jdbc-driver/src/DatabricksDriver.ts

Lines changed: 97 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,16 @@ export type DatabricksDriverConfiguration = JDBCDriverConfiguration &
9090
*/
9191
token?: string,
9292

93+
/**
94+
* Databricks OAuth Client ID.
95+
*/
96+
oauthClientId?: string,
97+
98+
/**
99+
* Databricks OAuth Client Secret.
100+
*/
101+
oauthClientSecret?: string,
102+
93103
/**
94104
* Azure tenant Id
95105
*/
@@ -152,6 +162,10 @@ export class DatabricksDriver extends JDBCDriver {
152162

153163
private readonly parsedConnectionProperties: ParsedConnectionProperties;
154164

165+
private accessToken: string | undefined;
166+
167+
private accessTokenExpires: number = 0;
168+
155169
public static dialectClass() {
156170
return DatabricksQuery;
157171
}
@@ -192,14 +206,47 @@ export class DatabricksDriver extends JDBCDriver {
192206
let showSparkProtocolWarn = false;
193207
let url: string =
194208
conf?.url ||
195-
getEnv('databrickUrl', { dataSource }) ||
209+
getEnv('databricksUrl', { dataSource }) ||
196210
getEnv('jdbcUrl', { dataSource });
197211
if (url.indexOf('jdbc:spark://') !== -1) {
198212
showSparkProtocolWarn = true;
199213
url = url.replace('jdbc:spark://', 'jdbc:databricks://');
200214
}
201215

202216
const [uid, pwd, cleanedUrl] = extractAndRemoveUidPwdFromJdbcUrl(url);
217+
const passwd = conf?.token ||
218+
getEnv('databricksToken', { dataSource }) ||
219+
pwd;
220+
const oauthClientId = conf?.oauthClientId || getEnv('databricksOAuthClientId', { dataSource });
221+
const oauthClientSecret = conf?.oauthClientSecret || getEnv('databricksOAuthClientSecret', { dataSource });
222+
223+
if (oauthClientId && !oauthClientSecret) {
224+
throw new Error('Invalid credentials: No OAuth Client Secret provided');
225+
} else if (!oauthClientId && oauthClientSecret) {
226+
throw new Error('Invalid credentials: No OAuth Client ID provided');
227+
} else if (!oauthClientId && !oauthClientSecret && !passwd) {
228+
throw new Error('No credentials provided');
229+
}
230+
231+
let authProps: Record<string, any> = {};
232+
233+
// OAuth has an advantage over UID+PWD
234+
// For magic numbers below - see Databricks docs:
235+
// https://docs.databricks.com/aws/en/integrations/jdbc-oss/configure#authenticate-the-driver
236+
if (oauthClientId) {
237+
authProps = {
238+
OAuth2ClientID: oauthClientId,
239+
OAuth2Secret: oauthClientSecret,
240+
AuthMech: 11,
241+
Auth_Flow: 1,
242+
};
243+
} else {
244+
authProps = {
245+
UID: uid,
246+
PWD: passwd,
247+
AuthMech: 3,
248+
};
249+
}
203250

204251
const config: DatabricksDriverConfiguration = {
205252
...conf,
@@ -208,11 +255,7 @@ export class DatabricksDriver extends JDBCDriver {
208255
drivername: 'com.databricks.client.jdbc.Driver',
209256
customClassPath: undefined,
210257
properties: {
211-
UID: uid,
212-
PWD:
213-
conf?.token ||
214-
getEnv('databrickToken', { dataSource }) ||
215-
pwd,
258+
...authProps,
216259
UserAgentEntry: 'CubeDev_Cube',
217260
},
218261
catalog:
@@ -291,8 +334,55 @@ export class DatabricksDriver extends JDBCDriver {
291334
this.showDeprecations();
292335
}
293336

337+
private async fetchAccessToken(): Promise<void> {
338+
// Need to exchange client ID + Secret => Access token
339+
340+
const basicAuth = Buffer.from(`${this.config.properties.OAuth2ClientID}:${this.config.properties.OAuth2Secret}`).toString('base64');
341+
342+
const res = await fetch(`https://${this.parsedConnectionProperties.host}/oidc/v1/token`, {
343+
method: 'POST',
344+
headers: {
345+
Authorization: `Basic ${basicAuth}`,
346+
'Content-Type': 'application/x-www-form-urlencoded',
347+
},
348+
body: new URLSearchParams({
349+
grant_type: 'client_credentials',
350+
scope: 'all-apis',
351+
}),
352+
});
353+
354+
if (!res.ok) {
355+
throw new Error(`Failed to get access token: ${res.statusText}`);
356+
}
357+
358+
const resp = await res.json();
359+
360+
this.accessToken = resp.access_token;
361+
this.accessTokenExpires = Date.now() + resp.expires_in * 1000 - 60_000;
362+
}
363+
364+
private async getValidAccessToken(): Promise<string> {
365+
if (
366+
!this.accessToken ||
367+
!this.accessTokenExpires ||
368+
Date.now() >= this.accessTokenExpires
369+
) {
370+
await this.fetchAccessToken();
371+
}
372+
return this.accessToken!;
373+
}
374+
294375
public override async testConnection() {
295-
const token = `Bearer ${this.config.properties.PWD}`;
376+
let token: string;
377+
378+
// Databricks docs on accessing REST API
379+
// https://docs.databricks.com/aws/en/dev-tools/auth/oauth-m2m
380+
if (this.config.properties.OAuth2Secret) {
381+
const at = await this.getValidAccessToken();
382+
token = `Bearer ${at}`;
383+
} else {
384+
token = `Bearer ${this.config.properties.PWD}`;
385+
}
296386

297387
const res = await fetch(`https://${this.parsedConnectionProperties.host}/api/2.0/sql/warehouses/${this.parsedConnectionProperties.warehouseId}`, {
298388
headers: { Authorization: token },

packages/cubejs-databricks-jdbc-driver/src/helpers.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ export async function resolveJDBCDriver(): Promise<string> {
3535

3636
/**
3737
* Extract if exist UID and PWD from URL and return UID, PWD and URL without these params.
38-
* New Databricks OSS driver throws an error if UID and PWD are provided in the URL and as a separate params
38+
* New Databricks OSS driver throws an error if any parameter is provided in the URL and as a separate param
3939
* passed to the driver instance. That's why we strip them out from the URL if they exist there.
4040
* @param jdbcUrl
4141
*/
@@ -48,7 +48,8 @@ export function extractAndRemoveUidPwdFromJdbcUrl(jdbcUrl: string): [uid: string
4848

4949
const cleanedUrl = jdbcUrl
5050
.replace(/;?UID=[^;]*/i, '')
51-
.replace(/;?PWD=[^;]*/i, '');
51+
.replace(/;?PWD=[^;]*/i, '')
52+
.replace(/;?AuthMech=[^;]*/i, '');
5253

5354
return [uid, pwd, cleanedUrl];
5455
}

0 commit comments

Comments
 (0)