From 3dbe5c8e8ee0b5659bd59e265e9d4c1994b723c0 Mon Sep 17 00:00:00 2001 From: Simeon Widdis Date: Thu, 25 Apr 2024 14:35:10 -0700 Subject: [PATCH] Add multi-checkpoint support to integrations (#1742) * Refactor addIntegrationRequest params to object Signed-off-by: Simeon Widdis * Move SetupIntegrationFormInputs to own file Signed-off-by: Simeon Widdis * Split form inputs into more sections visually Signed-off-by: Simeon Widdis * Split form inputs into more sections logically Signed-off-by: Simeon Widdis * Minor copy update for checkpoint location Signed-off-by: Simeon Widdis * Add UUID to created checkpoint location Signed-off-by: Simeon Widdis * Use dashes instead of underscores for separating checkpoint parts Signed-off-by: Simeon Widdis * Update toggleworkflow method per Ryan's feedback Signed-off-by: Simeon Widdis --------- Signed-off-by: Simeon Widdis --- .../integrations/components/setup_integration.tsx | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/public/components/integrations/components/setup_integration.tsx b/public/components/integrations/components/setup_integration.tsx index 9608c38d21..e521e3b74b 100644 --- a/public/components/integrations/components/setup_integration.tsx +++ b/public/components/integrations/components/setup_integration.tsx @@ -103,10 +103,21 @@ const makeTableName = (config: IntegrationSetupInputs): string => { }; const prepareQuery = (query: string, config: IntegrationSetupInputs): string => { + // To prevent checkpoint collisions, each query needs a unique checkpoint name, we use an enriched + // UUID to create subfolders under the given checkpoint location per-query. + const querySpecificUUID = crypto.randomUUID(); + let checkpointLocation = config.checkpointLocation.endsWith('/') + ? config.checkpointLocation + : config.checkpointLocation + '/'; + checkpointLocation += `${config.connectionDataSource}-${config.connectionTableName}-${querySpecificUUID}`; + let queryStr = query.replaceAll('{table_name}', makeTableName(config)); queryStr = queryStr.replaceAll('{s3_bucket_location}', config.connectionLocation); - queryStr = queryStr.replaceAll('{s3_checkpoint_location}', config.checkpointLocation); + queryStr = queryStr.replaceAll('{s3_checkpoint_location}', checkpointLocation); queryStr = queryStr.replaceAll('{object_name}', config.connectionTableName); + // TODO spark API only supports single-line queries, but directly replacing all whitespace leads + // to issues with single-line comments and quoted strings with more whitespace. A more robust + // implementation would remove comments before flattening and ignore strings. queryStr = queryStr.replaceAll(/\s+/g, ' '); return queryStr; };