Skip to content
This repository has been archived by the owner on Aug 8, 2023. It is now read-only.

fix use of pluto change files #252

Merged
merged 20 commits into from
Feb 9, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@ Best practice to run the app locally is to use the dev container (especially via

2. If in VS Code, a popup should appear with an option to navigate to the site in a browser

3. If an error of `Access to localhost was denied` appears in the browser, try navigating to `http://127.0.0.1:5000/`
3. If an error of `Access to localhost was denied` appears in the browser, try navigating to `127.0.0.1:5000` rather than `localhost:5000`
4 changes: 4 additions & 0 deletions example.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
AWS_S3_ENDPOINT=
AWS_SECRET_ACCESS_KEY=
AWS_ACCESS_KEY_ID=
AWS_S3_BUCKET=
8 changes: 5 additions & 3 deletions src/digital_ocean_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,11 @@ def get_all_folder_names_in_repo_folder(self):

return all_folders

def get_all_filenames_in_folder(self, folder: str):
# TODO
return None
def get_all_filenames_in_folder(self, folder_path: str):
filenames = set()
for object in self.bucket.objects.filter(Prefix=f"{folder_path}/"):
filenames.add(object.key.split("/")[-1])
return filenames

def unzip_csv(self, csv_filename, zipfile):
try:
Expand Down
59 changes: 43 additions & 16 deletions src/pluto/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,13 @@
REPO_NAME = "db-pluto"


def get_data(branch) -> Dict[str, pd.DataFrame]:
def get_output_folder_path(branch: str) -> str:
return f"{REPO_NAME}/{branch}/latest/output"


def get_data(branch: str) -> Dict[str, pd.DataFrame]:
rv = {}
url = f"https://edm-publishing.nyc3.digitaloceanspaces.com/db-pluto/{branch}/latest/output"
url = f"https://edm-publishing.nyc3.digitaloceanspaces.com/{get_output_folder_path(branch)}"

client = DigitalOceanClient(bucket_name=BUCKET_NAME, repo_name=REPO_NAME)
kwargs = {"true_values": ["t"], "false_values": ["f"]}
Expand Down Expand Up @@ -46,24 +50,47 @@ def get_data(branch) -> Dict[str, pd.DataFrame]:
return rv


def get_changes(client, branch):
def get_changes(client: DigitalOceanClient, branch: str) -> Dict[str, pd.DataFrame]:
rv = {}
pluto_corrections_zip = client.zip_from_DO(
zip_filename=f"db-pluto/{branch}/latest/output/pluto_corrections.zip",
)

rv["pluto_corrections"] = client.unzip_csv(
csv_filename="pluto_corrections.csv", zipfile=pluto_corrections_zip
valid_changes_files_group = [
{
"zip_filename": "pluto_changes.zip",
"applied_filename": "pluto_chanes_applied.csv",
damonmcc marked this conversation as resolved.
Show resolved Hide resolved
"not_applied_filename": "pluto_chanes_not_applied.csv",
},
{
"zip_filename": "pluto_corrections.zip",
"applied_filename": "pluto_corrections_applied.csv",
"not_applied_filename": "pluto_corrections_not_applied.csv",
},
]
output_filenames = client.get_all_filenames_in_folder(
folder_path=get_output_folder_path(branch)
)

rv["pluto_corrections_applied"] = client.unzip_csv(
csv_filename="pluto_corrections_applied.csv", zipfile=pluto_corrections_zip
for changes_files_group in valid_changes_files_group:
if changes_files_group["zip_filename"] in output_filenames:
pluto_changes_zip = client.zip_from_DO(
zip_filename=f"db-pluto/{branch}/latest/output/{changes_files_group['zip_filename']}",
)
rv["pluto_corrections_applied"] = client.unzip_csv(
csv_filename=changes_files_group["applied_filename"],
zipfile=pluto_changes_zip,
)
rv["pluto_corrections_not_applied"] = client.unzip_csv(
csv_filename=changes_files_group["applied_filename"],
zipfile=pluto_changes_zip,
)

return rv

raise FileNotFoundError(
f"""
No valid pluto changes zip file found!
Files in branch folder "{branch}"
{output_filenames}
"""
)
rv["pluto_corrections_not_applied"] = client.unzip_csv(
csv_filename="pluto_corrections_not_applied.csv", zipfile=pluto_corrections_zip
)

return rv


def get_version_text(source_data_versions):
Expand Down