From d67c848d82ce522a8d7927428a5939f24daefe00 Mon Sep 17 00:00:00 2001 From: aerodynamic-sauce-pan Date: Tue, 14 May 2024 11:47:07 +0200 Subject: [PATCH] docstrings --- scripts/crop_rasters.py | 42 ++++++++++++++-------- scripts/split_val_per_species_frequency.py | 17 ++++++--- 2 files changed, 39 insertions(+), 20 deletions(-) diff --git a/scripts/crop_rasters.py b/scripts/crop_rasters.py index 3a3d134b..6349dcc9 100644 --- a/scripts/crop_rasters.py +++ b/scripts/crop_rasters.py @@ -1,19 +1,31 @@ -import os +"""This script crops a window from raster files based on coordinates and +outputs it as a new file. +""" -import numpy as np import pyproj import rasterio -from pyproj import CRS, Transformer +from pyproj import Transformer from rasterio.mask import mask from shapely.geometry import box from tqdm import tqdm -def main(fps, data_crs, coords_crs): +def main(fps: dict, + data_crs: str, + coords_crs: str): """Clip and export a window from raster files. - + Also possible to do via command line: `rio input_raster output_raster --bounds "xmin, ymin xmax ymax"` + + Parameters + ---------- + fps : dict + file paths to the rasters + data_crs : str + data CRS (destination crs) + coords_crs : str + coordinates CRS (source crs) """ # Define the coordinates of the area you want to crop minx, miny = 499980.0, 4790220.0 # EPSG 32631 @@ -21,7 +33,7 @@ def main(fps, data_crs, coords_crs): if data_crs != coords_crs: transformer = Transformer.from_crs(pyproj.CRS.from_epsg(coords_crs), pyproj.CRS.from_epsg(data_crs), always_xy=True) minx, miny, maxx, maxy = transformer.transform_bounds(minx, miny, maxx, maxy) - bbox = box(minx-0.8, miny-0.8, maxx+0.8, maxy+0.8) + bbox = box(minx - 0.8, miny - 0.8, maxx + 0.8, maxy + 0.8) for k, v in tqdm(fps.items()): # Open the raster file @@ -41,14 +53,14 @@ def main(fps, data_crs, coords_crs): # Write the cropped raster to a new file with rasterio.open(f'{k}_crop_sample.tif', "w", **out_meta) as dest: dest.write(out_image) - + if __name__ == '__main__': - root = './' - fps = {'bio_1': root + 'bio_1/bio_1_FR.tif', - 'bio_5': root + 'bio_5/bio_5_FR.tif', - 'bio_6': root + 'bio_6/bio_6_FR.tif', - 'bio_12': root + 'bio_12/bio_12_FR.tif', + ROOT = './' + FPS = {'bio_1': ROOT + 'bio_1/bio_1_FR.tif', + 'bio_5': ROOT + 'bio_5/bio_5_FR.tif', + 'bio_6': ROOT + 'bio_6/bio_6_FR.tif', + 'bio_12': ROOT + 'bio_12/bio_12_FR.tif', } - data_crs = '4326' - coords_crs = '32631' - main(fps, data_crs, coords_crs) \ No newline at end of file + DATA_CRS = '4326' + COORDS_CRS = '32631' + main(FPS, DATA_CRS, COORDS_CRS) diff --git a/scripts/split_val_per_species_frequency.py b/scripts/split_val_per_species_frequency.py index e0f3e074..437aade0 100644 --- a/scripts/split_val_per_species_frequency.py +++ b/scripts/split_val_per_species_frequency.py @@ -1,3 +1,8 @@ +"""This script splits an obs csv in val/train based on the frequency +of occurrences in the whole dataset. +It does NOT perform a spatial split. +""" + from copy import deepcopy import numpy as np @@ -5,7 +10,9 @@ from tqdm import tqdm -def main(input_name: str, output_name:str, val_ratio: float = 0.05): +def main(input_name: str, + output_name: str, + val_ratio: float = 0.05): """Split an obs csv in val/train. Performs a split with equal proportions of classes @@ -14,7 +21,7 @@ def main(input_name: str, output_name:str, val_ratio: float = 0.05): the obs file, they are not included in the val split. The val proportion is defined by the val_ratio argument. - + Input csv is expected to have at least the following columns: ['speciesId'] """ @@ -44,6 +51,6 @@ def main(input_name: str, output_name:str, val_ratio: float = 0.05): print(f'{indivisible_sid_n_rows} rows were not included in val due to indivisibility by {val_ratio} (too few observations to split in at least 1 obs train / 1 obs val).') if __name__ == '__main__': - input_name = 'sample_obs' - output_name = 'sample_obs' - main(input_name, output_name) + INPUT_NAME = 'sample_obs' + OUTPUT_NAME = 'sample_obs' + main(INPUT_NAME, OUTPUT_NAME)