-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Update jobscript with findings from #9
- Loading branch information
Showing
1 changed file
with
28 additions
and
23 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,37 +1,42 @@ | ||
#!/bin/bash | ||
#SBATCH --job-name=wrf_experiment # Job name | ||
#SBATCH --partition=rome # Request thin partition. Up to one hour goes to fast queue | ||
#SBATCH --time=00:05:00 # Maximum runtime (D-HH:MM:SS) | ||
#SBATCH --nodes=1 # Number of nodes (one thin node has 128 cpu cores) | ||
#SBATCH --ntasks=32 # Number of tasks per node / number of patches in the domain - parallelized with MPI / DMPAR / multiprocessing | ||
#SBATCH --cpus-per-task=4 # Number of CPU cores per task / number of tiles within each patch - parallelized with OpenMP / SMPAR / multithreading | ||
|
||
# Note: number cpus-per-task * ntasks should not exceed the total available cores on requested nodes | ||
# 8*16 = 128 exactly fits on one thin node. | ||
|
||
# Each process can do multithreading but limited to the number of cpu cores allocated to each process | ||
export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK} | ||
# export OMP_PLACES=cores | ||
# export OMP_PROC_BIND=close | ||
|
||
# https://journal.fluidnumerics.com/wrf-v4-on-google-cloud#h.vin1ct6ww426 | ||
export OMP_PLACES=threads | ||
export OMP_PROC_BIND=true | ||
|
||
# | ||
# From WRF/run, submit as | ||
# sbatch wrf.job | ||
# | ||
# or, from any other directory, submit as | ||
# sbatch wrf.job /path/to/wrf.exe | ||
wrf_executable="${1:-wrf.exe}" | ||
# | ||
# | ||
# Number cpus-per-task * ntasks should not exceed the total available cores on requested nodes | ||
# 8*16 = 128 exactly fits on one rome node. | ||
# 8*24 = 192 exactly fits on one genoa node. | ||
# | ||
# For reference, see e.g. | ||
# | ||
# SURF docs: https://servicedesk.surf.nl/wiki/display/WIKI/Methods+of+parallelization | ||
# Nice guide on Hybrid MPI/OpenMP: https://nrel.github.io/HPC/blog/2021-06-18-srun/#6-hybrid-mpiopenmpi | ||
# Rome WRF benchmark: https://www.dell.com/support/kbdoc/en-us/000152654/wrf-performance-on-amd-rome-platform-multi-node-study | ||
# Genoa WRF benchmark: https://infohub.delltechnologies.com/en-us/p/hpc-application-performance-on-dell-poweredge-r6625-with-amd-epyc-genoa/ | ||
# | ||
# | ||
#SBATCH --job-name=wrf_experiment # Job name | ||
#SBATCH --partition=genoa # Request thin partition. Up to one hour goes to fast queue | ||
#SBATCH --time=5-00:00:00 # Maximum runtime (D-HH:MM:SS) | ||
#SBATCH --nodes=1 # Number of nodes (one thin node has 128 cpu cores) | ||
#SBATCH --ntasks=24 # Number of tasks per node / number of patches in the domain - parallelized with MPI / DMPAR / multiprocessing | ||
#SBATCH --cpus-per-task=8 # Number of CPU cores per task / number of tiles within each patch - parallelized with OpenMP / SMPAR / multithreading | ||
|
||
# Load dependencies | ||
module load 2023 | ||
module load netCDF-Fortran/4.6.1-gompi-2023a # also loads gcc and gompi | ||
export NETCDF=$(nf-config --prefix) | ||
|
||
# mpiexec -np ${SLURM_NTASKS} --map-by node:PE=$OMP_NUM_THREADS --rank-by core $wrf_executable | ||
# Configure OpenMP threads & core affinity | ||
export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK} | ||
export OMP_PLACES=cores | ||
export OMP_PROC_BIND=close | ||
|
||
# https://journal.fluidnumerics.com/wrf-v4-on-google-cloud#h.vin1ct6ww426 | ||
mpiexec -np ${SLURM_NTASKS} --map-by core --bind-to core $wrf_executable | ||
# If wrf executable not passed explicitly to script, then default to wrf.exe in working dir | ||
wrf_executable="${1:-wrf.exe}" | ||
|
||
srun $wrf_executable |