diff --git a/docs/pages/product/configuration/data-sources/databricks-jdbc.mdx b/docs/pages/product/configuration/data-sources/databricks-jdbc.mdx index 40b46fffdc9e2..94e05cef63ce7 100644 --- a/docs/pages/product/configuration/data-sources/databricks-jdbc.mdx +++ b/docs/pages/product/configuration/data-sources/databricks-jdbc.mdx @@ -134,6 +134,17 @@ CUBEJS_DB_EXPORT_BUCKET=wasbs://my-bucket@my-account.blob.core.windows.net CUBEJS_DB_EXPORT_BUCKET_AZURE_KEY= ``` +Access key provides full access to the configuration and data, +to use a fine-grained control over access to storage resources, follow [the Databricks guide on authorize with Azure Active Directory][authorize-with-azure-active-directory]. + +[Create the service principal][azure-authentication-with-service-principal] and replace the access key as follows: + +```dotenv +CUBEJS_DB_EXPORT_BUCKET_AZURE_TENANT_ID= +CUBEJS_DB_EXPORT_BUCKET_AZURE_CLIENT_ID= +CUBEJS_DB_EXPORT_BUCKET_AZURE_CLIENT_SECRET= +``` + ## SSL/TLS Cube does not require any additional configuration to enable SSL/TLS for @@ -150,6 +161,10 @@ bucket][self-preaggs-export-bucket] **must be** configured. [azure-bs]: https://azure.microsoft.com/en-gb/services/storage/blobs/ [azure-bs-docs-get-key]: https://docs.microsoft.com/en-us/azure/storage/common/storage-account-keys-manage?toc=%2Fazure%2Fstorage%2Fblobs%2Ftoc.json&tabs=azure-portal#view-account-access-keys +[authorize-with-azure-active-directory]: + https://learn.microsoft.com/en-us/rest/api/storageservices/authorize-with-azure-active-directory +[azure-authentication-with-service-principal]: + https://learn.microsoft.com/en-us/azure/developer/java/sdk/identity-service-principal-auth [databricks]: https://databricks.com/ [databricks-docs-dbfs]: https://docs.databricks.com/en/dbfs/mounts.html [databricks-docs-azure]: diff --git a/docs/pages/reference/configuration/environment-variables.mdx b/docs/pages/reference/configuration/environment-variables.mdx index 475bb32708be5..1b7ac21aacae2 100644 --- a/docs/pages/reference/configuration/environment-variables.mdx +++ b/docs/pages/reference/configuration/environment-variables.mdx @@ -457,6 +457,66 @@ with a data source][ref-config-multiple-ds-decorating-env]. | -------------------------------------- | ---------------------- | --------------------- | | [A valid AWS region][aws-docs-regions] | N/A | N/A | +## `CUBEJS_DB_EXPORT_BUCKET_AZURE_KEY` + +The Azure Access Key to use for the export bucket. + + + +When using multiple data sources, this environment variable can be [decorated +with a data source][ref-config-multiple-ds-decorating-env]. + + + +| Possible Values | Default in Development | Default in Production | +| ------------------------ | ---------------------- | --------------------- | +| A valid Azure Access Key | N/A | N/A | + +## `CUBEJS_DB_EXPORT_BUCKET_AZURE_TENANT_ID` + +The Azure tenant ID to use for the export bucket. + + + +When using multiple data sources, this environment variable can be [decorated +with a data source][ref-config-multiple-ds-decorating-env]. + + + +| Possible Values | Default in Development | Default in Production | +| ----------------------- | ---------------------- | --------------------- | +| A valid Azure Tenant ID | N/A | N/A | + +## `CUBEJS_DB_EXPORT_BUCKET_AZURE_CLIENT_ID` + +The Azure client ID to use for the export bucket. + + + +When using multiple data sources, this environment variable can be [decorated +with a data source][ref-config-multiple-ds-decorating-env]. + + + +| Possible Values | Default in Development | Default in Production | +| ----------------------- | ---------------------- | --------------------- | +| A valid Azure Client ID | N/A | N/A | + +## `CUBEJS_DB_EXPORT_BUCKET_AZURE_CLIENT_SECRET` + +The Azure client secret to use for the export bucket. + + + +When using multiple data sources, this environment variable can be [decorated +with a data source][ref-config-multiple-ds-decorating-env]. + + + +| Possible Values | Default in Development | Default in Production | +| --------------------------- | ---------------------- | --------------------- | +| A valid Azure Client Secret | N/A | N/A | + ## `CUBEJS_DB_EXPORT_BUCKET_MOUNT_DIR` The mount path to use for a [Databricks DBFS mount][databricks-docs-dbfs]. diff --git a/packages/cubejs-backend-shared/src/env.ts b/packages/cubejs-backend-shared/src/env.ts index 3d5a2c0e2e81c..36f31eb5f58a1 100644 --- a/packages/cubejs-backend-shared/src/env.ts +++ b/packages/cubejs-backend-shared/src/env.ts @@ -795,6 +795,19 @@ const variables: Record any> = { ] ), + /** + * Client Secret for the Azure based export bucket storage. + */ + dbExportBucketAzureClientSecret: ({ + dataSource, + }: { + dataSource: string, + }) => ( + process.env[ + keyByDataSource('CUBEJS_DB_EXPORT_BUCKET_AZURE_CLIENT_SECRET', dataSource) + ] + ), + /** * Azure Federated Token File Path for the Azure based export bucket storage. */ diff --git a/packages/cubejs-backend-shared/test/db_env_multi.test.ts b/packages/cubejs-backend-shared/test/db_env_multi.test.ts index 1eb8596024f7e..b37ce923c20c4 100644 --- a/packages/cubejs-backend-shared/test/db_env_multi.test.ts +++ b/packages/cubejs-backend-shared/test/db_env_multi.test.ts @@ -956,6 +956,93 @@ describe('Multiple datasources', () => { ); }); + test('getEnv("dbExportBucketAzureTenantId")', () => { + process.env.CUBEJS_DB_EXPORT_BUCKET_AZURE_TENANT_ID = 'default1'; + process.env.CUBEJS_DS_POSTGRES_DB_EXPORT_BUCKET_AZURE_TENANT_ID = 'postgres1'; + process.env.CUBEJS_DS_WRONG_DB_EXPORT_BUCKET_AZURE_TENANT_ID = 'wrong1'; + expect(getEnv('dbExportBucketAzureTenantId', { dataSource: 'default' })).toEqual('default1'); + expect(getEnv('dbExportBucketAzureTenantId', { dataSource: 'postgres' })).toEqual('postgres1'); + expect(() => getEnv('dbExportBucketAzureTenantId', { dataSource: 'wrong' })).toThrow( + 'The wrong data source is missing in the declared CUBEJS_DATASOURCES.' + ); + + process.env.CUBEJS_DB_EXPORT_BUCKET_AZURE_TENANT_ID = 'default2'; + process.env.CUBEJS_DS_POSTGRES_DB_EXPORT_BUCKET_AZURE_TENANT_ID = 'postgres2'; + process.env.CUBEJS_DS_WRONG_DB_EXPORT_BUCKET_AZURE_TENANT_ID = 'wrong2'; + expect(getEnv('dbExportBucketAzureTenantId', { dataSource: 'default' })).toEqual('default2'); + expect(getEnv('dbExportBucketAzureTenantId', { dataSource: 'postgres' })).toEqual('postgres2'); + expect(() => getEnv('dbExportBucketAzureTenantId', { dataSource: 'wrong' })).toThrow( + 'The wrong data source is missing in the declared CUBEJS_DATASOURCES.' + ); + + delete process.env.CUBEJS_DB_EXPORT_BUCKET_AZURE_TENANT_ID; + delete process.env.CUBEJS_DS_POSTGRES_DB_EXPORT_BUCKET_AZURE_TENANT_ID; + delete process.env.CUBEJS_DS_WRONG_DB_EXPORT_BUCKET_AZURE_TENANT_ID; + expect(getEnv('dbExportBucketAzureTenantId', { dataSource: 'default' })).toBeUndefined(); + expect(getEnv('dbExportBucketAzureTenantId', { dataSource: 'postgres' })).toBeUndefined(); + expect(() => getEnv('dbExportBucketAzureTenantId', { dataSource: 'wrong' })).toThrow( + 'The wrong data source is missing in the declared CUBEJS_DATASOURCES.' + ); + }); + + test('getEnv("dbExportBucketAzureClientId")', () => { + process.env.CUBEJS_DB_EXPORT_BUCKET_AZURE_CLIENT_ID = 'default1'; + process.env.CUBEJS_DS_POSTGRES_DB_EXPORT_BUCKET_AZURE_CLIENT_ID = 'postgres1'; + process.env.CUBEJS_DS_WRONG_DB_EXPORT_BUCKET_AZURE_CLIENT_ID = 'wrong1'; + expect(getEnv('dbExportBucketAzureClientId', { dataSource: 'default' })).toEqual('default1'); + expect(getEnv('dbExportBucketAzureClientId', { dataSource: 'postgres' })).toEqual('postgres1'); + expect(() => getEnv('dbExportBucketAzureClientId', { dataSource: 'wrong' })).toThrow( + 'The wrong data source is missing in the declared CUBEJS_DATASOURCES.' + ); + + process.env.CUBEJS_DB_EXPORT_BUCKET_AZURE_CLIENT_ID = 'default2'; + process.env.CUBEJS_DS_POSTGRES_DB_EXPORT_BUCKET_AZURE_CLIENT_ID = 'postgres2'; + process.env.CUBEJS_DS_WRONG_DB_EXPORT_BUCKET_AZURE_CLIENT_ID = 'wrong2'; + expect(getEnv('dbExportBucketAzureClientId', { dataSource: 'default' })).toEqual('default2'); + expect(getEnv('dbExportBucketAzureClientId', { dataSource: 'postgres' })).toEqual('postgres2'); + expect(() => getEnv('dbExportBucketAzureClientId', { dataSource: 'wrong' })).toThrow( + 'The wrong data source is missing in the declared CUBEJS_DATASOURCES.' + ); + + delete process.env.CUBEJS_DB_EXPORT_BUCKET_AZURE_CLIENT_ID; + delete process.env.CUBEJS_DS_POSTGRES_DB_EXPORT_BUCKET_AZURE_CLIENT_ID; + delete process.env.CUBEJS_DS_WRONG_DB_EXPORT_BUCKET_AZURE_CLIENT_ID; + expect(getEnv('dbExportBucketAzureClientId', { dataSource: 'default' })).toBeUndefined(); + expect(getEnv('dbExportBucketAzureClientId', { dataSource: 'postgres' })).toBeUndefined(); + expect(() => getEnv('dbExportBucketAzureClientId', { dataSource: 'wrong' })).toThrow( + 'The wrong data source is missing in the declared CUBEJS_DATASOURCES.' + ); + }); + + test('getEnv("dbExportBucketAzureClientSecret")', () => { + process.env.CUBEJS_DB_EXPORT_BUCKET_AZURE_CLIENT_SECRET = 'default1'; + process.env.CUBEJS_DS_POSTGRES_DB_EXPORT_BUCKET_AZURE_CLIENT_SECRET = 'postgres1'; + process.env.CUBEJS_DS_WRONG_DB_EXPORT_BUCKET_AZURE_CLIENT_SECRET = 'wrong1'; + expect(getEnv('dbExportBucketAzureClientSecret', { dataSource: 'default' })).toEqual('default1'); + expect(getEnv('dbExportBucketAzureClientSecret', { dataSource: 'postgres' })).toEqual('postgres1'); + expect(() => getEnv('dbExportBucketAzureClientSecret', { dataSource: 'wrong' })).toThrow( + 'The wrong data source is missing in the declared CUBEJS_DATASOURCES.' + ); + + process.env.CUBEJS_DB_EXPORT_BUCKET_AZURE_CLIENT_SECRET = 'default2'; + process.env.CUBEJS_DS_POSTGRES_DB_EXPORT_BUCKET_AZURE_CLIENT_SECRET = 'postgres2'; + process.env.CUBEJS_DS_WRONG_DB_EXPORT_BUCKET_AZURE_CLIENT_SECRET = 'wrong2'; + expect(getEnv('dbExportBucketAzureClientSecret', { dataSource: 'default' })).toEqual('default2'); + expect(getEnv('dbExportBucketAzureClientSecret', { dataSource: 'postgres' })).toEqual('postgres2'); + expect(() => getEnv('dbExportBucketAzureClientSecret', { dataSource: 'wrong' })).toThrow( + 'The wrong data source is missing in the declared CUBEJS_DATASOURCES.' + ); + + delete process.env.CUBEJS_DB_EXPORT_BUCKET_AZURE_CLIENT_SECRET; + delete process.env.CUBEJS_DS_POSTGRES_DB_EXPORT_BUCKET_AZURE_CLIENT_SECRET; + delete process.env.CUBEJS_DS_WRONG_DB_EXPORT_BUCKET_AZURE_CLIENT_SECRET; + expect(getEnv('dbExportBucketAzureClientSecret', { dataSource: 'default' })).toBeUndefined(); + expect(getEnv('dbExportBucketAzureClientSecret', { dataSource: 'postgres' })).toBeUndefined(); + expect(() => getEnv('dbExportBucketAzureClientSecret', { dataSource: 'wrong' })).toThrow( + 'The wrong data source is missing in the declared CUBEJS_DATASOURCES.' + ); + }); + test('getEnv("dbExportIntegration")', () => { process.env.CUBEJS_DB_EXPORT_INTEGRATION = 'default1'; process.env.CUBEJS_DS_POSTGRES_DB_EXPORT_INTEGRATION = 'postgres1'; diff --git a/packages/cubejs-backend-shared/test/db_env_single.test.ts b/packages/cubejs-backend-shared/test/db_env_single.test.ts index 6d7b0d801c043..765e94b7c175d 100644 --- a/packages/cubejs-backend-shared/test/db_env_single.test.ts +++ b/packages/cubejs-backend-shared/test/db_env_single.test.ts @@ -618,6 +618,57 @@ describe('Single datasources', () => { expect(getEnv('dbExportBucketAzureKey', { dataSource: 'wrong' })).toBeUndefined(); }); + test('getEnv("dbExportBucketAzureTenantId")', () => { + process.env.CUBEJS_DB_EXPORT_BUCKET_AZURE_TENANT_ID = 'default1'; + expect(getEnv('dbExportBucketAzureTenantId', { dataSource: 'default' })).toEqual('default1'); + expect(getEnv('dbExportBucketAzureTenantId', { dataSource: 'postgres' })).toEqual('default1'); + expect(getEnv('dbExportBucketAzureTenantId', { dataSource: 'wrong' })).toEqual('default1'); + + process.env.CUBEJS_DB_EXPORT_BUCKET_AZURE_TENANT_ID = 'default2'; + expect(getEnv('dbExportBucketAzureTenantId', { dataSource: 'default' })).toEqual('default2'); + expect(getEnv('dbExportBucketAzureTenantId', { dataSource: 'postgres' })).toEqual('default2'); + expect(getEnv('dbExportBucketAzureTenantId', { dataSource: 'wrong' })).toEqual('default2'); + + delete process.env.CUBEJS_DB_EXPORT_BUCKET_AZURE_TENANT_ID; + expect(getEnv('dbExportBucketAzureTenantId', { dataSource: 'default' })).toBeUndefined(); + expect(getEnv('dbExportBucketAzureTenantId', { dataSource: 'postgres' })).toBeUndefined(); + expect(getEnv('dbExportBucketAzureTenantId', { dataSource: 'wrong' })).toBeUndefined(); + }); + + test('getEnv("dbExportBucketAzureClientId")', () => { + process.env.CUBEJS_DB_EXPORT_BUCKET_AZURE_CLIENT_ID = 'default1'; + expect(getEnv('dbExportBucketAzureClientId', { dataSource: 'default' })).toEqual('default1'); + expect(getEnv('dbExportBucketAzureClientId', { dataSource: 'postgres' })).toEqual('default1'); + expect(getEnv('dbExportBucketAzureClientId', { dataSource: 'wrong' })).toEqual('default1'); + + process.env.CUBEJS_DB_EXPORT_BUCKET_AZURE_CLIENT_ID = 'default2'; + expect(getEnv('dbExportBucketAzureClientId', { dataSource: 'default' })).toEqual('default2'); + expect(getEnv('dbExportBucketAzureClientId', { dataSource: 'postgres' })).toEqual('default2'); + expect(getEnv('dbExportBucketAzureClientId', { dataSource: 'wrong' })).toEqual('default2'); + + delete process.env.CUBEJS_DB_EXPORT_BUCKET_AZURE_CLIENT_ID; + expect(getEnv('dbExportBucketAzureClientId', { dataSource: 'default' })).toBeUndefined(); + expect(getEnv('dbExportBucketAzureClientId', { dataSource: 'postgres' })).toBeUndefined(); + expect(getEnv('dbExportBucketAzureClientId', { dataSource: 'wrong' })).toBeUndefined(); + }); + + test('getEnv("dbExportBucketAzureClientSecret")', () => { + process.env.CUBEJS_DB_EXPORT_BUCKET_AZURE_CLIENT_SECRET = 'default1'; + expect(getEnv('dbExportBucketAzureClientSecret', { dataSource: 'default' })).toEqual('default1'); + expect(getEnv('dbExportBucketAzureClientSecret', { dataSource: 'postgres' })).toEqual('default1'); + expect(getEnv('dbExportBucketAzureClientSecret', { dataSource: 'wrong' })).toEqual('default1'); + + process.env.CUBEJS_DB_EXPORT_BUCKET_AZURE_CLIENT_SECRET = 'default2'; + expect(getEnv('dbExportBucketAzureClientSecret', { dataSource: 'default' })).toEqual('default2'); + expect(getEnv('dbExportBucketAzureClientSecret', { dataSource: 'postgres' })).toEqual('default2'); + expect(getEnv('dbExportBucketAzureClientSecret', { dataSource: 'wrong' })).toEqual('default2'); + + delete process.env.CUBEJS_DB_EXPORT_BUCKET_AZURE_CLIENT_SECRET; + expect(getEnv('dbExportBucketAzureClientSecret', { dataSource: 'default' })).toBeUndefined(); + expect(getEnv('dbExportBucketAzureClientSecret', { dataSource: 'postgres' })).toBeUndefined(); + expect(getEnv('dbExportBucketAzureClientSecret', { dataSource: 'wrong' })).toBeUndefined(); + }); + test('getEnv("dbExportIntegration")', () => { process.env.CUBEJS_DB_EXPORT_INTEGRATION = 'default1'; expect(getEnv('dbExportIntegration', { dataSource: 'default' })).toEqual('default1'); diff --git a/packages/cubejs-base-driver/src/BaseDriver.ts b/packages/cubejs-base-driver/src/BaseDriver.ts index bfd44df407503..fc2890b1ae4e7 100644 --- a/packages/cubejs-base-driver/src/BaseDriver.ts +++ b/packages/cubejs-base-driver/src/BaseDriver.ts @@ -27,6 +27,7 @@ import { } from '@azure/storage-blob'; import { DefaultAzureCredential, + ClientSecretCredential, } from '@azure/identity'; import { cancelCombinator } from './utils'; @@ -73,6 +74,15 @@ export type AzureStorageClientConfig = { * the Azure library will try to use the AZURE_TENANT_ID env */ tenantId?: string, + /** + * Azure service principal client secret. + * Enables authentication to Microsoft Entra ID using a client secret that was generated + * for an App Registration. More information on how to configure a client secret can be found here: + * https://learn.microsoft.com/entra/identity-platform/quickstart-configure-app-access-web-apis#add-credentials-to-your-web-application + * In case of DefaultAzureCredential flow if it is omitted + * the Azure library will try to use the AZURE_CLIENT_SECRET env + */ + clientSecret?: string, /** * The path to a file containing a Kubernetes service account token that authenticates the identity. * In case of DefaultAzureCredential flow if it is omitted @@ -760,7 +770,7 @@ export abstract class BaseDriver implements DriverInterface { const parts = bucketName.split(splitter); const account = parts[0]; const container = parts[1].split('/')[0]; - let credential: StorageSharedKeyCredential | DefaultAzureCredential; + let credential: StorageSharedKeyCredential | ClientSecretCredential | DefaultAzureCredential; let blobServiceClient: BlobServiceClient; let getSas; @@ -778,6 +788,28 @@ export abstract class BaseDriver implements DriverInterface { }, credential as StorageSharedKeyCredential ).toString(); + } else if (azureConfig.clientSecret && azureConfig.tenantId && azureConfig.clientId) { + credential = new ClientSecretCredential( + azureConfig.tenantId, + azureConfig.clientId, + azureConfig.clientSecret, + ); + getSas = async (name: string, startsOn: Date, expiresOn: Date) => { + const userDelegationKey = await blobServiceClient.getUserDelegationKey(startsOn, expiresOn); + return generateBlobSASQueryParameters( + { + containerName: container, + blobName: name, + permissions: ContainerSASPermissions.parse('r'), + startsOn, + expiresOn, + protocol: SASProtocol.Https, + version: '2020-08-04', + }, + userDelegationKey, + account + ).toString(); + }; } else { const opts = { tenantId: azureConfig.tenantId, diff --git a/packages/cubejs-databricks-jdbc-driver/README.md b/packages/cubejs-databricks-jdbc-driver/README.md index fe71ef64c31ff..1d07551292bf7 100644 --- a/packages/cubejs-databricks-jdbc-driver/README.md +++ b/packages/cubejs-databricks-jdbc-driver/README.md @@ -20,6 +20,8 @@ $ yarn $ yarn test ``` +Note: Unit tests requires Java to be installed. + ### License Cube.js Databricks Database Driver is [Apache 2.0 licensed](./LICENSE). diff --git a/packages/cubejs-databricks-jdbc-driver/package.json b/packages/cubejs-databricks-jdbc-driver/package.json index d9bf3370577e2..684639d9b7678 100644 --- a/packages/cubejs-databricks-jdbc-driver/package.json +++ b/packages/cubejs-databricks-jdbc-driver/package.json @@ -18,6 +18,8 @@ "build": "rm -rf dist && npm run tsc", "tsc": "tsc", "watch": "tsc -w", + "test": "npm run unit-tests", + "unit-tests": "jest dist/test --forceExit", "lint": "eslint src/* --ext .ts", "lint:fix": "eslint --fix src/* --ext .ts", "postinstall": "node bin/post-install" diff --git a/packages/cubejs-databricks-jdbc-driver/src/DatabricksDriver.ts b/packages/cubejs-databricks-jdbc-driver/src/DatabricksDriver.ts index f19de53c0fa3a..d3754207a3d07 100644 --- a/packages/cubejs-databricks-jdbc-driver/src/DatabricksDriver.ts +++ b/packages/cubejs-databricks-jdbc-driver/src/DatabricksDriver.ts @@ -88,6 +88,21 @@ export type DatabricksDriverConfiguration = JDBCDriverConfiguration & * Databricks security token (PWD). */ token?: string, + + /** + * Azure tenant Id + */ + azureTenantId?: string, + + /** + * Azure service principal client Id + */ + azureClientId?: string, + + /** + * Azure service principal client secret + */ + azureClientSecret?: string, }; type ShowTableRow = { @@ -221,6 +236,16 @@ export class DatabricksDriver extends JDBCDriver { getEnv('dbExportBucketAzureKey', { dataSource }), exportBucketCsvEscapeSymbol: getEnv('dbExportBucketCsvEscapeSymbol', { dataSource }), + // Azure service principal + azureTenantId: + conf?.azureTenantId || + getEnv('dbExportBucketAzureTenantId', { dataSource }), + azureClientId: + conf?.azureClientId || + getEnv('dbExportBucketAzureClientId', { dataSource }), + azureClientSecret: + conf?.azureClientSecret || + getEnv('dbExportBucketAzureClientSecret', { dataSource }), }; if (config.readOnly === undefined) { // we can set readonly to true if there is no bucket config provided @@ -679,31 +704,36 @@ export class DatabricksDriver extends JDBCDriver { // The extractors in BaseDriver expect just clean bucket name const url = new URL(this.config.exportBucket || ''); - switch (this.config.bucketType) { - case 'azure': - return this.extractFilesFromAzure( - { azureKey: this.config.azureKey || '' }, - // Databricks uses different bucket address form, so we need to transform it - // to the one understandable by extractFilesFromAzure implementation - `${url.host}/${url.username}`, - tableName, - ); - case 's3': - return this.extractUnloadedFilesFromS3( - { - credentials: { - accessKeyId: this.config.awsKey || '', - secretAccessKey: this.config.awsSecret || '', - }, - region: this.config.awsRegion || '', + if (this.config.bucketType === 'azure') { + const { + azureKey, + azureClientId: clientId, + azureTenantId: tenantId, + azureClientSecret: clientSecret + } = this.config; + return this.extractFilesFromAzure( + { azureKey, clientId, tenantId, clientSecret }, + // Databricks uses different bucket address form, so we need to transform it + // to the one understandable by extractFilesFromAzure implementation + `${url.host}/${url.username}`, + tableName, + ); + } else if (this.config.bucketType === 's3') { + return this.extractUnloadedFilesFromS3( + { + credentials: { + accessKeyId: this.config.awsKey || '', + secretAccessKey: this.config.awsSecret || '', }, - url.host, - tableName, - ); - default: - throw new Error(`Unsupported export bucket type: ${ - this.config.bucketType - }`); + region: this.config.awsRegion || '', + }, + url.host, + tableName, + ); + } else { + throw new Error(`Unsupported export bucket type: ${ + this.config.bucketType + }`); } } diff --git a/packages/cubejs-databricks-jdbc-driver/test/DatabricksDriver.test.ts b/packages/cubejs-databricks-jdbc-driver/test/DatabricksDriver.test.ts new file mode 100644 index 0000000000000..275924a184307 --- /dev/null +++ b/packages/cubejs-databricks-jdbc-driver/test/DatabricksDriver.test.ts @@ -0,0 +1,65 @@ +import { DatabricksDriver } from '../src/DatabricksDriver'; +import { UnloadOptions } from '@cubejs-backend/base-driver'; +import { ContainerClient, BlobServiceClient } from '@azure/storage-blob'; + +jest.mock('@azure/storage-blob', () => ({ + ...jest.requireActual('@azure/storage-blob'), + generateBlobSASQueryParameters: jest.fn().mockReturnValue('test') +})); + +jest.spyOn(ContainerClient.prototype, 'listBlobsFlat').mockImplementation( + jest.fn().mockReturnValue([{name: 'product.csv/test.csv'}]) +); +jest.spyOn(BlobServiceClient.prototype, 'getUserDelegationKey').mockImplementation( + jest.fn().mockReturnValue('mockKey') +); + +describe('DatabricksDriver', () => { + const mockTableName = 'product'; + const mockSql = 'SELECT * FROM ' + mockTableName; + const mockParams = [1] + const mockOptions: UnloadOptions = { + maxFileSize: 3, + query: { + sql: mockSql, + params: mockParams, + }, + }; + let databricksDriver: DatabricksDriver; + const mockUnloadWithSql = jest.fn().mockResolvedValue('mockType'); + + beforeAll(() => { + process.env.CUBEJS_DB_DATABRICKS_ACCEPT_POLICY='true'; + process.env.CUBEJS_DB_DATABRICKS_URL='jdbc:databricks://adb-123456789.10.azuredatabricks.net:443'; + process.env.CUBEJS_DB_EXPORT_BUCKET_TYPE='azure'; + process.env.CUBEJS_DB_EXPORT_BUCKET='wasbs://cube-export@mock.blob.core.windows.net'; + process.env.CUBEJS_DB_EXPORT_BUCKET_AZURE_KEY='azure-key'; + process.env.CUBEJS_DB_EXPORT_BUCKET_AZURE_TENANT_ID='azure-tenant-id'; + process.env.CUBEJS_DB_EXPORT_BUCKET_AZURE_CLIENT_ID='azure-client-id'; + process.env.CUBEJS_DB_EXPORT_BUCKET_AZURE_CLIENT_SECRET='azure-client-sceret' + process.env.CUBEJS_DB_DATABRICKS_TOKEN='token'; + }); + + afterEach(() => { + jest.clearAllMocks(); + }); + + test('should get signed URLs of unloaded csv files by azure storage shared key', async () => { + databricksDriver = new DatabricksDriver(); + databricksDriver['unloadWithSql'] = mockUnloadWithSql; + + const result = await databricksDriver.unload(mockTableName, mockOptions); + expect(mockUnloadWithSql).toHaveBeenCalledWith(mockTableName, mockSql, mockParams); + expect(result.csvFile).toBeTruthy(); + }); + + test('should get signed URLs of unloaded csv files by azure client secret', async () => { + process.env.CUBEJS_DB_EXPORT_BUCKET_AZURE_KEY=''; + databricksDriver = new DatabricksDriver(); + databricksDriver['unloadWithSql'] = mockUnloadWithSql; + + const result = await databricksDriver.unload(mockTableName, mockOptions); + expect(mockUnloadWithSql).toHaveBeenCalledWith(mockTableName, mockSql, mockParams); + expect(result.csvFile).toBeTruthy(); + }); +});