-
Notifications
You must be signed in to change notification settings - Fork 4
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Use pyhydrophone for extracting Soundtrap sensitivity and gain #43
Comments
@cparcerisas I added this enhancement during the workshop. Do you have an example of hydrophone deployment that we could use to implement this? Any details or pseudo code on how to use pyhydrophone would also be beneficial. |
Hello! Yes sorry I said I would give an example and it got lost somewhere in my todo list. # Import package modules
import xarray as xr
import dask
import pandas as pd
import time
import json
import pathlib
import yaml
from urllib.parse import urlparse
import os
import pyhydrophone as pyhy
from pbp.meta_gen.gen_soundtrap import SoundTrapMetadataGenerator
from pbp.logging_helper import create_logger_info, create_logger
from pbp.process_helper import ProcessHelper
from pbp.file_helper import FileHelper
config_file = pathlib.Path(input('Where is the json file of the deployment to process?'))
f = open(config_file, 'r')
deployment_config = json.load(f)
instrument_file = config_file.parent.parent.joinpath('receivers', deployment_config['RECORDER_ID'] + '.json')
f_i = open(instrument_file, 'r')
instrument_config = json.load(f_i)
# Audio data input specifications
wav_uri = deployment_config['FOLDER_PATH']
# file storage location for the input audio data
wav_path = pathlib.Path(urlparse(wav_uri).path)
json_base_dir = wav_path.parent.joinpath('metadata', 'json') # location to store generated data in JSON format
xml_dir = wav_path # file storage location for the input audio data
serial_number = instrument_config['recorder']['serial_number']
wav_prefix = f'{serial_number}.' # prefix for the audio files
start_date = pd.to_datetime(deployment_config['AUDIO_START_DEPLOYMENT_DATETIME']).to_pydatetime() # start date
end_date = pd.to_datetime(deployment_config['AUDIO_END_DEPLOYMENT_DATETIME']).to_pydatetime() # end date for
# A prefix for the name of generate files:
deployment_name = deployment_config['DEPLOYMENT_NAME']
# Location for generated files:
output_dir = config_file.parent.parent.parent.joinpath('HMD_pbp', deployment_name)
global_attrs_uri = "../data/pbp_yaml/globalAttributes.yaml"
variable_attrs_uri = "../data/pbp_yaml/variableAttributes.yaml"
# Populate deployment-specific yaml attributes
deployment_attrs_uri = output_dir.joinpath(f'globalAttributes_{deployment_name}.yml')
yaml_config = yaml.safe_load(open(global_attrs_uri))
yaml_config['creator_name'] = deployment_config['AUTHOR_NAME']
yaml_config['creator_email'] = deployment_config['AUTHOR_EMAIL']
lon = deployment_config['location']['DEP_LON_DEG']
lat = deployment_config['location']['DEP_LAT_DEG']
yaml_config['geospatial_bounds'] = f'POINT ({lon} {lat})'
yaml_config['comment'] = deployment_config['DEPLOYMENT_COMMENTS']
yaml_config['platform'] = deployment_config['location']['MOORING_TYPE']
yaml_config['instrument'] = deployment_config['RECORDER_ID']
if not output_dir.exists():
print('Creating a new directory...', output_dir)
os.mkdir(output_dir)
with open(deployment_attrs_uri, 'w') as file:
yaml.dump(yaml_config, file)
voltage_multiplier = 1.0
subset_to = (10, 24000)
st = pyhy.SoundTrap(model=instrument_config['recorder']['model'],
serial_number=int(instrument_config['recorder']['serial_number']),
name=deployment_config['RECORDER_ID'],
gain_type='High')
print('SoundTrap settings to:')
print('sensitivity: ', st.sensitivity)
print('Vpp: ', st.Vpp)
print('preamp_gain: ', st.preamp_gain)
print('gain_type: ', 'High')
meta_generation = input('is data already generated? yes/no') == 'yes'
# a logger that only logs messages tagged as info to the console, for more verbose logging
log = create_logger_info(deployment_config['DEPLOYMENT_NAME'])
if not meta_generation:
# Create the data generators
meta_gen = SoundTrapMetadataGenerator(
log=log,
uri=wav_uri,
json_base_dir=str(json_base_dir),
xml_dir=str(xml_dir),
start=start_date,
end=end_date,
prefixes=[wav_prefix],
seconds_per_file=20)
# Generate the data - this will generate JSON files in the json_base_dir
meta_gen.run()
def process_date(date: str, gen_netcdf: bool = True):
"""
Main function to generate the HMB product for a given day.
It makes use of supporting elements in PBP in terms of logging,
file handling, and PyPAM based HMB generation.
:param date: Date to process, in YYYYMMDD format.
:param gen_netcdf: Allows caller to skip the `.nc` creation here
and instead save the datasets after all days have been generated
(see parallel execution below).
:return: the generated xarray dataset.
"""
log_filename = f"{output_dir}/{deployment_name}{date}.log"
log = create_logger(
log_filename_and_level=(log_filename, "INFO"),
console_level=None,
)
file_helper = FileHelper(
log=log,
json_base_dir=json_base_dir,
gs_client=None,
download_dir=None,
)
process_helper = ProcessHelper(
log=log,
file_helper=file_helper,
output_dir=str(output_dir),
output_prefix=deployment_name,
global_attrs_uri=str(deployment_attrs_uri),
variable_attrs_uri=variable_attrs_uri,
voltage_multiplier=voltage_multiplier,
sensitivity_uri=None,
sensitivity_flat_value=-st.sensitivity,
subset_to=subset_to,
)
# now, get the HMB result:
print(f"::: Started processing {date=}")
result = process_helper.process_day(date)
if gen_netcdf:
nc_filename = f"{output_dir}/{deployment_name}{date}.nc"
print(f"::: Ended processing {date=} => {nc_filename=}")
else:
print(f"::: Ended processing {date=} => (dataset generated in memory)")
if result is not None:
return result.dataset
else:
print(f"::: UNEXPECTED: no segments were processed for {date=}")
def process_multiple_dates(
dates: list[str], gen_netcdf: bool = False
) -> list[xr.Dataset]:
"""
Generates HMB for multiple days in parallel using Dask.
Returns the resulting HMB datasets.
:param dates: The dates to process, each in YYYYMMDD format.
:param gen_netcdf: Allows caller to skip the `.nc` creation here
and instead save the datasets after all days have been generated.
:return: the list of generated datasets.
"""
@dask.delayed
def delayed_process_date(date: str):
return process_date(date, gen_netcdf=True)
# To display total elapsed time at the end the processing:
start_time = time.time()
# This will be called by Dask when all dates have completed processing:
def aggregate(*datasets): # -> list[xr.Dataset]:
elapsed_time = time.time() - start_time
print(
f"===> All {len(datasets)} dates completed. Elapsed time: {elapsed_time:.1f} seconds ({elapsed_time/60:.1f} mins)"
)
return datasets
# Prepare the processes:
delayed_processes = [delayed_process_date(date) for date in dates]
aggregation = dask.delayed(aggregate)(*delayed_processes)
# And launch them:
return aggregation.compute()
# but in general we can use pandas to help us generate the list:
date_range = pd.date_range(start=start_date, end=end_date, freq='1D')
dates = date_range.strftime("%Y%m%d").tolist()
# Now, launch the generation:
print(f"Launching HMB generation for {len(dates)} {dates=}")
# Get all HMB datasets:
generated_datasets = process_multiple_dates(dates, gen_netcdf=True)
print(f"Generated datasets: {len(generated_datasets)}\n") The json files which we use look like the attached ones. As you can see from the code, we have a directory where we store the metadata with this structure: metadata And in the script we pass the deployment config and afterwards look for the receiver config by having the receiver config file name matching the RECORDER_ID field form the deployment config (this is to avoid duplication of sensors metadata info) I hope this helps! Let me know if I can give more information or something is unclear :) |
Using the pyhydrophone package is preferred way to pull sensitivity from the Soundtrap serial number and model type.
Propose adding to the globalAtttribute file
INSTRUMENT_NAME=AUTOGENERATE
INSTRUMENT_SN=AUTOGENERATE
to
globalAttributes.yml
then using pyhydrophone to populate the sensitivity and gain.
The hydrophone serial numbers are not necessarily unique to the hydrophone. Workaround is if there is an error, along the lines of "There are multiple instruments," error, this returns dict_key with the possible instruments" which can be parsed.
@cparcerisas can provide more details and examples
The text was updated successfully, but these errors were encountered: