Skip to content

Commit

Permalink
docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
tlarcher committed May 14, 2024
1 parent d08e9c6 commit d67c848
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 20 deletions.
42 changes: 27 additions & 15 deletions scripts/crop_rasters.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,39 @@
import os
"""This script crops a window from raster files based on coordinates and
outputs it as a new file.
"""

import numpy as np
import pyproj
import rasterio
from pyproj import CRS, Transformer
from pyproj import Transformer
from rasterio.mask import mask
from shapely.geometry import box
from tqdm import tqdm


def main(fps, data_crs, coords_crs):
def main(fps: dict,
data_crs: str,
coords_crs: str):
"""Clip and export a window from raster files.
Also possible to do via command line:
`rio input_raster output_raster --bounds "xmin, ymin xmax ymax"`
Parameters
----------
fps : dict
file paths to the rasters
data_crs : str
data CRS (destination crs)
coords_crs : str
coordinates CRS (source crs)
"""
# Define the coordinates of the area you want to crop
minx, miny = 499980.0, 4790220.0 # EPSG 32631
maxx, maxy = 609780.0, 4900020.0 # EPSG 32631
if data_crs != coords_crs:
transformer = Transformer.from_crs(pyproj.CRS.from_epsg(coords_crs), pyproj.CRS.from_epsg(data_crs), always_xy=True)
minx, miny, maxx, maxy = transformer.transform_bounds(minx, miny, maxx, maxy)
bbox = box(minx-0.8, miny-0.8, maxx+0.8, maxy+0.8)
bbox = box(minx - 0.8, miny - 0.8, maxx + 0.8, maxy + 0.8)

for k, v in tqdm(fps.items()):
# Open the raster file
Expand All @@ -41,14 +53,14 @@ def main(fps, data_crs, coords_crs):
# Write the cropped raster to a new file
with rasterio.open(f'{k}_crop_sample.tif', "w", **out_meta) as dest:
dest.write(out_image)

if __name__ == '__main__':
root = './'
fps = {'bio_1': root + 'bio_1/bio_1_FR.tif',
'bio_5': root + 'bio_5/bio_5_FR.tif',
'bio_6': root + 'bio_6/bio_6_FR.tif',
'bio_12': root + 'bio_12/bio_12_FR.tif',
ROOT = './'
FPS = {'bio_1': ROOT + 'bio_1/bio_1_FR.tif',
'bio_5': ROOT + 'bio_5/bio_5_FR.tif',
'bio_6': ROOT + 'bio_6/bio_6_FR.tif',
'bio_12': ROOT + 'bio_12/bio_12_FR.tif',
}
data_crs = '4326'
coords_crs = '32631'
main(fps, data_crs, coords_crs)
DATA_CRS = '4326'
COORDS_CRS = '32631'
main(FPS, DATA_CRS, COORDS_CRS)
17 changes: 12 additions & 5 deletions scripts/split_val_per_species_frequency.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
"""This script splits an obs csv in val/train based on the frequency
of occurrences in the whole dataset.
It does NOT perform a spatial split.
"""

from copy import deepcopy

import numpy as np
import pandas as pd
from tqdm import tqdm


def main(input_name: str, output_name:str, val_ratio: float = 0.05):
def main(input_name: str,
output_name: str,
val_ratio: float = 0.05):
"""Split an obs csv in val/train.
Performs a split with equal proportions of classes
Expand All @@ -14,7 +21,7 @@ def main(input_name: str, output_name:str, val_ratio: float = 0.05):
the obs file, they are not included in the val split.
The val proportion is defined by the val_ratio argument.
Input csv is expected to have at least the following columns:
['speciesId']
"""
Expand Down Expand Up @@ -44,6 +51,6 @@ def main(input_name: str, output_name:str, val_ratio: float = 0.05):
print(f'{indivisible_sid_n_rows} rows were not included in val due to indivisibility by {val_ratio} (too few observations to split in at least 1 obs train / 1 obs val).')

if __name__ == '__main__':
input_name = 'sample_obs'
output_name = 'sample_obs'
main(input_name, output_name)
INPUT_NAME = 'sample_obs'
OUTPUT_NAME = 'sample_obs'
main(INPUT_NAME, OUTPUT_NAME)

0 comments on commit d67c848

Please sign in to comment.