-
Notifications
You must be signed in to change notification settings - Fork 16
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
2317/Copy data from grants-db into the opportunity table(s) in the an…
…alytics db (#3228) ## Summary Fixes #{[2317](#2317)} ### Time to review: __20 mins__ ## Changes proposed New Cli Function to upload opportunity tables into analytics db S3 configuration to read `csv `opportunity tables Add S3 Environment variables Mock s3 client for testing Added fixtures for AWS (From API code), test-schema and opportunity-tables to aid in testing, scoped to test session. Added fixture to delete table records after each test Added opportunity table `csv` files for testing Added test that checks files were successfully uploaded and records inserted into test-schema tables ## Context for reviewers > poetry run pytest ./tests/integrations/extracts/test_load_opportunity_data.py when running test locally ## Additional information > Screenshots, GIF demos, code examples or output to help show the changes working as expected.
- Loading branch information
1 parent
2052dba
commit b429083
Showing
17 changed files
with
823 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
""" | ||
We use this package to load opportunity data from s3. | ||
It extracts CSV files from S3 bucket and loads the records into respective | ||
opportunity tables. | ||
""" |
101 changes: 101 additions & 0 deletions
101
analytics/src/analytics/integrations/extracts/constants.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
"""Holds all constant values.""" | ||
|
||
from enum import StrEnum | ||
|
||
|
||
class OpportunityTables(StrEnum): | ||
"""Opportunity tables that are copied over to analytics database.""" | ||
|
||
LK_OPPORTUNITY_STATUS = "lk_opportunity_status" | ||
LK_OPPORTUNITY_CATEGORY = "lk_opportunity_category" | ||
OPPORTUNITY = "opportunity" | ||
OPPORTUNITY_SUMMARY = "opportunity_summary" | ||
CURRENT_OPPORTUNITY_SUMMARY = "current_opportunity_summary" | ||
|
||
|
||
LK_OPPORTUNITY_STATUS_COLS = ( | ||
"OPPORTUNITY_STATUS_ID", | ||
"DESCRIPTION", | ||
"CREATED_AT", | ||
"UPDATED_AT", | ||
) | ||
|
||
LK_OPPORTUNITY_CATEGORY_COLS = ( | ||
"OPPORTUNITY_CATEGORY_ID", | ||
"DESCRIPTION", | ||
"CREATED_AT", | ||
"UPDATED_AT", | ||
) | ||
OPPORTUNITY_COLS = ( | ||
"OPPORTUNITY_ID", | ||
"OPPORTUNITY_NUMBER", | ||
"OPPORTUNITY_TITLE", | ||
"AGENCY_CODE", | ||
"OPPORTUNITY_CATEGORY_ID", | ||
"CATEGORY_EXPLANATION", | ||
"IS_DRAFT", | ||
"REVISION_NUMBER", | ||
"MODIFIED_COMMENTS", | ||
"PUBLISHER_USER_ID", | ||
"PUBLISHER_PROFILE_ID", | ||
"CREATED_AT", | ||
"UPDATED_AT", | ||
) | ||
OPOORTUNITY_SUMMARY_COLS = ( | ||
"OPPORTUNITY_SUMMARY_ID", | ||
"OPPORTUNITY_ID", | ||
"SUMMARY_DESCRIPTION", | ||
"IS_COST_SHARING", | ||
"IS_FORECAST", | ||
"POST_DATE", | ||
"CLOSE_DATE", | ||
"CLOSE_DATE_DESCRIPTION", | ||
"ARCHIVE_DATE", | ||
"UNARCHIVE_DATE", | ||
"EXPECTED_NUMBER_OF_AWARDS", | ||
"ESTIMATED_TOTAL_PROGRAM_FUNDING", | ||
"AWARD_FLOOR", | ||
"AWARD_CEILING", | ||
"ADDITIONAL_INFO_URL", | ||
"ADDITIONAL_INFO_URL_DESCRIPTION", | ||
"FORECASTED_POST_DATE", | ||
"FORECASTED_CLOSE_DATE", | ||
"FORECASTED_CLOSE_DATE_DESCRIPTION", | ||
"FORECASTED_AWARD_DATE", | ||
"FORECASTED_PROJECT_START_DATE", | ||
"FISCAL_YEAR", | ||
"REVISION_NUMBER", | ||
"MODIFICATION_COMMENTS", | ||
"FUNDING_CATEGORY_DESCRIPTION", | ||
"APPLICANT_ELIGIBILITY_DESCRIPTION", | ||
"AGENCY_CODE", | ||
"AGENCY_NAME", | ||
"AGENCY_PHONE_NUMBER", | ||
"AGENCY_CONTACT_DESCRIPTION", | ||
"AGENCY_EMAIL_ADDRESS", | ||
"AGENCY_EMAIL_ADDRESS_DESCRIPTION", | ||
"IS_DELETED", | ||
"CAN_SEND_MAIL", | ||
"PUBLISHER_PROFILE_ID", | ||
"PUBLISHER_USER_ID", | ||
"UPDATED_BY", | ||
"CREATED_BY", | ||
"CREATED_AT", | ||
"UPDATED_AT", | ||
"VERSION_NUMBER", | ||
) | ||
CURRENT_OPPORTUNITY_SUMMARY_COLS = ( | ||
"OPPORTUNITY_ID", | ||
"OPPORTUNITY_SUMMARY_ID", | ||
"OPPORTUNITY_STATUS_ID", | ||
"CREATED_AT", | ||
"UPDATED_AT", | ||
) | ||
|
||
MAP_TABLES_TO_COLS: dict[OpportunityTables, tuple[str, ...]] = { | ||
OpportunityTables.LK_OPPORTUNITY_STATUS: LK_OPPORTUNITY_STATUS_COLS, | ||
OpportunityTables.LK_OPPORTUNITY_CATEGORY: LK_OPPORTUNITY_CATEGORY_COLS, | ||
OpportunityTables.OPPORTUNITY: OPPORTUNITY_COLS, | ||
OpportunityTables.OPPORTUNITY_SUMMARY: OPOORTUNITY_SUMMARY_COLS, | ||
OpportunityTables.CURRENT_OPPORTUNITY_SUMMARY: CURRENT_OPPORTUNITY_SUMMARY_COLS, | ||
} |
79 changes: 79 additions & 0 deletions
79
analytics/src/analytics/integrations/extracts/load_opportunity_data.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
# pylint: disable=invalid-name, line-too-long | ||
"""Loads opportunity tables with opportunity data from S3.""" | ||
|
||
import logging | ||
import os | ||
from contextlib import ExitStack | ||
|
||
import smart_open # type: ignore[import] | ||
from pydantic import Field | ||
from pydantic_settings import BaseSettings | ||
from sqlalchemy import Connection | ||
|
||
from analytics.integrations.etldb.etldb import EtlDb | ||
from analytics.integrations.extracts.constants import ( | ||
MAP_TABLES_TO_COLS, | ||
OpportunityTables, | ||
) | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class LoadOpportunityDataFileConfig(BaseSettings): | ||
"""Configure S3 properties for opportunity data.""" | ||
|
||
load_opportunity_data_file_path: str | None = Field( | ||
default=None, | ||
alias="LOAD_OPPORTUNITY_DATA_FILE_PATH", | ||
) | ||
|
||
|
||
def extract_copy_opportunity_data() -> None: | ||
"""Instantiate Etldb class and call helper funcs to truncate and insert data in one txn.""" | ||
etldb_conn = EtlDb() | ||
|
||
with etldb_conn.connection() as conn, conn.begin(): | ||
_trancate_opportunity_table_records(conn) | ||
|
||
_fetch_insert_opportunity_data(conn) | ||
|
||
logger.info("Extract opportunity data completed successfully") | ||
|
||
|
||
def _trancate_opportunity_table_records(conn: Connection) -> None: | ||
"""Truncate existing records from all tables.""" | ||
cursor = conn.connection.cursor() | ||
schema = os.environ["DB_SCHEMA"] | ||
for table in OpportunityTables: | ||
stmt_trct = f"TRUNCATE TABLE {schema}.{table} CASCADE" | ||
cursor.execute(stmt_trct) | ||
logger.info("Truncated all records from all tables") | ||
|
||
|
||
def _fetch_insert_opportunity_data(conn: Connection) -> None: | ||
"""Streamlines opportunity tables from S3 and insert into the database.""" | ||
s3_config = LoadOpportunityDataFileConfig() | ||
|
||
cursor = conn.connection.cursor() | ||
for table in OpportunityTables: | ||
logger.info("Copying data for table: %s", table) | ||
|
||
columns = MAP_TABLES_TO_COLS.get(table, ()) | ||
query = f""" | ||
COPY {f"{os.getenv("DB_SCHEMA")}.{table} ({', '.join(columns)})"} | ||
FROM STDIN WITH (FORMAT CSV, DELIMITER ',', QUOTE '"', HEADER) | ||
""" | ||
|
||
with ExitStack() as stack: | ||
file = stack.enter_context( | ||
smart_open.open( | ||
f"{s3_config.load_opportunity_data_file_path}/{table}.csv", | ||
"r", | ||
), | ||
) | ||
copy = stack.enter_context(cursor.copy(query)) | ||
|
||
while data := file.read(): | ||
copy.write(data) | ||
|
||
logger.info("Successfully loaded data for table: %s", table) |
18 changes: 18 additions & 0 deletions
18
analytics/src/analytics/integrations/extracts/s3_config.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
"""Configuration for S3.""" | ||
|
||
import boto3 | ||
import botocore | ||
|
||
|
||
def get_s3_client( | ||
session: boto3.Session | None = None, | ||
boto_config: botocore.config.Config | None = None, | ||
) -> botocore.client.BaseClient: | ||
"""Return an S3 client.""" | ||
if boto_config is None: | ||
boto_config = botocore.config.Config(signature_version="s3v4") | ||
|
||
if session is not None: | ||
return session.client("s3", config=boto_config) | ||
|
||
return boto3.client("s3", config=boto_config) |
Oops, something went wrong.