diff --git a/pbp/meta_gen/gen_iclisten.py b/pbp/meta_gen/gen_iclisten.py index 2c5b135..dcb8c7b 100644 --- a/pbp/meta_gen/gen_iclisten.py +++ b/pbp/meta_gen/gen_iclisten.py @@ -13,7 +13,12 @@ import pandas as pd from pathlib import Path from progressbar import progressbar -from pbp.meta_gen.utils import InstrumentType, parse_s3_or_gcp_url, get_datetime, plot_daily_coverage +from pbp.meta_gen.utils import ( + InstrumentType, + parse_s3_or_gcp_url, + get_datetime, + plot_daily_coverage, +) from pbp.meta_gen.json_generator import JsonGenerator from pbp.meta_gen.meta_reader import GenericWavFile from pbp.meta_gen.gen_abstract import MetadataGeneratorAbstract @@ -94,7 +99,7 @@ def run(self): wav_files.append(GenericWavFile(self.log, filename, wav_dt)) if scheme == "s3": - client = boto3.client('s3', config=Config(signature_version=UNSIGNED)) + client = boto3.client("s3", config=Config(signature_version=UNSIGNED)) for day_hour in pd.date_range(start=start_dt, end=end_dt, freq="h"): bucket = f"{bucket_name}-{day_hour.year:04d}" @@ -176,9 +181,12 @@ def run(self): except Exception as ex: self.log.exception(str(ex)) - plot_file = plot_daily_coverage(InstrumentType.ICLISTEN, self.df, self.json_base_dir, self.start, self.end) + plot_file = plot_daily_coverage( + InstrumentType.ICLISTEN, self.df, self.json_base_dir, self.start, self.end + ) self.log.info(f"Plot file: {plot_file}") + if __name__ == "__main__": from pbp.logging_helper import create_logger diff --git a/pbp/meta_gen/gen_nrs.py b/pbp/meta_gen/gen_nrs.py index d9732cf..923ece8 100644 --- a/pbp/meta_gen/gen_nrs.py +++ b/pbp/meta_gen/gen_nrs.py @@ -107,7 +107,9 @@ def run(self): sound_files.append(WavFile(self.log, f_path, f_dt)) # delay to avoid 400 error if i % 100 == 0: - self.log.info(f"{i} files searched...found {len(sound_files)} files that match the search pattern") + self.log.info( + f"{i} files searched...found {len(sound_files)} files that match the search pattern" + ) time.sleep(1) if f_dt > end_dt: break @@ -152,9 +154,12 @@ def run(self): self.log.exception(str(ex)) # plot the daily coverage - plot_file = plot_daily_coverage(InstrumentType.NRS, self.df, self.json_base_dir, self.start, self.end) + plot_file = plot_daily_coverage( + InstrumentType.NRS, self.df, self.json_base_dir, self.start, self.end + ) self.log.info(f"Coverage plot saved to {plot_file}") + if __name__ == "__main__": from pbp.logging_helper import create_logger diff --git a/pbp/meta_gen/gen_soundtrap.py b/pbp/meta_gen/gen_soundtrap.py index 2b0a456..caa29e6 100644 --- a/pbp/meta_gen/gen_soundtrap.py +++ b/pbp/meta_gen/gen_soundtrap.py @@ -18,7 +18,12 @@ from pbp.meta_gen.gen_abstract import MetadataGeneratorAbstract from pbp.meta_gen.meta_reader import SoundTrapWavFile from pbp.meta_gen.json_generator import JsonGenerator -from pbp.meta_gen.utils import parse_s3_or_gcp_url, InstrumentType, get_datetime, plot_daily_coverage +from pbp.meta_gen.utils import ( + parse_s3_or_gcp_url, + InstrumentType, + get_datetime, + plot_daily_coverage, +) class SoundTrapMetadataGenerator(MetadataGeneratorAbstract): @@ -92,7 +97,7 @@ def run(self): # dates self.log.info(f"Searching between {start_dt} and {end_dt}") - client = boto3.client('s3', config=Config(signature_version=UNSIGNED)) + client = boto3.client("s3", config=Config(signature_version=UNSIGNED)) paginator = client.get_paginator("list_objects") operation_parameters = {"Bucket": bucket} @@ -121,7 +126,9 @@ def run(self): client.download_file(bucket, key_xml, xml_path) wav_files.append(SoundTrapWavFile(uri, xml_path, key_dt)) except Exception as ex: - self.log.error(f"Could not download {key_xml} - {str(ex)}") + self.log.error( + f"Could not download {key_xml} - {str(ex)}" + ) continue self.log.info( @@ -171,7 +178,13 @@ def run(self): json_gen.run() # plot the daily coverage - plot_file = plot_daily_coverage(InstrumentType.SOUNDTRAP, self.df, self.json_base_dir, self.start, self.end) + plot_file = plot_daily_coverage( + InstrumentType.SOUNDTRAP, + self.df, + self.json_base_dir, + self.start, + self.end, + ) self.log.info(f"Coverage plot saved to {plot_file}") diff --git a/pbp/meta_gen/json_generator.py b/pbp/meta_gen/json_generator.py index 70b0e42..7d93e44 100644 --- a/pbp/meta_gen/json_generator.py +++ b/pbp/meta_gen/json_generator.py @@ -172,7 +172,9 @@ def no_jitter(self, day_df: pd.DataFrame) -> pd.DataFrame: :return: The corrected dataframe """ - self.log.info(f"Using file start times as is, setting jitter to 0 and calculating end times.") + self.log.info( + f"Using file start times as is, setting jitter to 0 and calculating end times." + ) # calculate the difference between each row start time and save as diff in a copy of the dataframe day_df = day_df.copy() day_df["diff"] = day_df["start"].diff() diff --git a/pbp/meta_gen/utils.py b/pbp/meta_gen/utils.py index 325c4ae..7dad864 100644 --- a/pbp/meta_gen/utils.py +++ b/pbp/meta_gen/utils.py @@ -92,7 +92,13 @@ def get_datetime(time_str: str, prefixes: List[str]): return None -def plot_daily_coverage(instrument_type: InstrumentType, df: pd.DataFrame, base_dir: str, start: datetime, end: datetime) -> str: +def plot_daily_coverage( + instrument_type: InstrumentType, + df: pd.DataFrame, + base_dir: str, + start: datetime, + end: datetime, +) -> str: """ Plot the daily coverage of the recordings :param instrument_type: The type of instrument, e.g. NRS, ICLISTEN, SOUNDTRAP @@ -104,20 +110,22 @@ def plot_daily_coverage(instrument_type: InstrumentType, df: pd.DataFrame, base_ """ # Create a plot of the dataframe with the x-axis as the month, and the y-axis as the daily recording coverage, # which is percent of the day covered by recordings - plt.rcParams['text.usetex'] = False + plt.rcParams["text.usetex"] = False df["duration"] = (df["end"] - df["start"]).dt.total_seconds() ts_df = df[["start", "duration"]].copy() - ts_df.set_index('start', inplace=True) - daily_sum_df = ts_df.resample('D').sum() + ts_df.set_index("start", inplace=True) + daily_sum_df = ts_df.resample("D").sum() daily_sum_df["coverage"] = 100 * daily_sum_df["duration"] / 86400 - daily_sum_df["coverage"] = daily_sum_df["coverage"].round() # round to nearest integer + daily_sum_df["coverage"] = daily_sum_df[ + "coverage" + ].round() # round to nearest integer plot = daily_sum_df["coverage"].plot() plot.set_ylabel("Daily Coverage (%)") plot.set_xlabel("Date") plot.set_xticks(daily_sum_df.index.values) # Angle the x-axis labels for better readability and force them to be in the format YYYY-MM-DD - plot.set_xticklabels([x.strftime('%Y-%m-%d') for x in daily_sum_df.index]) - plot.set_xticklabels(plot.get_xticklabels(), rotation=45, horizontalalignment='right') + plot.set_xticklabels([x.strftime("%Y-%m-%d") for x in daily_sum_df.index]) + plot.set_xticklabels(plot.get_xticklabels(), rotation=45, horizontalalignment="right") # Adjust the title based on the instrument type if instrument_type == InstrumentType.NRS: plot.set_title("Daily Coverage of NRS Recordings") @@ -131,4 +139,4 @@ def plot_daily_coverage(instrument_type: InstrumentType, df: pd.DataFrame, base_ fig.set_size_inches(10, 5) fig.set_dpi(dpi) fig.savefig(plot_file.as_posix(), bbox_inches="tight") - return plot_file.as_posix() \ No newline at end of file + return plot_file.as_posix() diff --git a/tests/test_meta_generator.py b/tests/test_meta_generator.py index f68fe69..a90e22a 100644 --- a/tests/test_meta_generator.py +++ b/tests/test_meta_generator.py @@ -83,6 +83,7 @@ def test_soundtrap_generator(): coverage_plot = json_dir / "soundtrap_coverage_20230715_20230716.jpg" assert coverage_plot.exists() + def test_iclisten_generator(): """ Test fixture for IcListenMetadataGenerator.