Skip to content

Commit

Permalink
Update jobscript with findings from #9
Browse files Browse the repository at this point in the history
  • Loading branch information
Peter9192 committed Aug 15, 2024
1 parent 6c90d2a commit ca32205
Showing 1 changed file with 28 additions and 23 deletions.
51 changes: 28 additions & 23 deletions workflows/snakemake/wrf.job
Original file line number Diff line number Diff line change
@@ -1,37 +1,42 @@
#!/bin/bash
#SBATCH --job-name=wrf_experiment # Job name
#SBATCH --partition=rome # Request thin partition. Up to one hour goes to fast queue
#SBATCH --time=00:05:00 # Maximum runtime (D-HH:MM:SS)
#SBATCH --nodes=1 # Number of nodes (one thin node has 128 cpu cores)
#SBATCH --ntasks=32 # Number of tasks per node / number of patches in the domain - parallelized with MPI / DMPAR / multiprocessing
#SBATCH --cpus-per-task=4 # Number of CPU cores per task / number of tiles within each patch - parallelized with OpenMP / SMPAR / multithreading

# Note: number cpus-per-task * ntasks should not exceed the total available cores on requested nodes
# 8*16 = 128 exactly fits on one thin node.

# Each process can do multithreading but limited to the number of cpu cores allocated to each process
export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK}
# export OMP_PLACES=cores
# export OMP_PROC_BIND=close

# https://journal.fluidnumerics.com/wrf-v4-on-google-cloud#h.vin1ct6ww426
export OMP_PLACES=threads
export OMP_PROC_BIND=true

#
# From WRF/run, submit as
# sbatch wrf.job
#
# or, from any other directory, submit as
# sbatch wrf.job /path/to/wrf.exe
wrf_executable="${1:-wrf.exe}"
#
#
# Number cpus-per-task * ntasks should not exceed the total available cores on requested nodes
# 8*16 = 128 exactly fits on one rome node.
# 8*24 = 192 exactly fits on one genoa node.
#
# For reference, see e.g.
#
# SURF docs: https://servicedesk.surf.nl/wiki/display/WIKI/Methods+of+parallelization
# Nice guide on Hybrid MPI/OpenMP: https://nrel.github.io/HPC/blog/2021-06-18-srun/#6-hybrid-mpiopenmpi
# Rome WRF benchmark: https://www.dell.com/support/kbdoc/en-us/000152654/wrf-performance-on-amd-rome-platform-multi-node-study
# Genoa WRF benchmark: https://infohub.delltechnologies.com/en-us/p/hpc-application-performance-on-dell-poweredge-r6625-with-amd-epyc-genoa/
#
#
#SBATCH --job-name=wrf_experiment # Job name
#SBATCH --partition=genoa # Request thin partition. Up to one hour goes to fast queue
#SBATCH --time=5-00:00:00 # Maximum runtime (D-HH:MM:SS)
#SBATCH --nodes=1 # Number of nodes (one thin node has 128 cpu cores)
#SBATCH --ntasks=24 # Number of tasks per node / number of patches in the domain - parallelized with MPI / DMPAR / multiprocessing
#SBATCH --cpus-per-task=8 # Number of CPU cores per task / number of tiles within each patch - parallelized with OpenMP / SMPAR / multithreading

# Load dependencies
module load 2023
module load netCDF-Fortran/4.6.1-gompi-2023a # also loads gcc and gompi
export NETCDF=$(nf-config --prefix)

# mpiexec -np ${SLURM_NTASKS} --map-by node:PE=$OMP_NUM_THREADS --rank-by core $wrf_executable
# Configure OpenMP threads & core affinity
export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK}
export OMP_PLACES=cores
export OMP_PROC_BIND=close

# https://journal.fluidnumerics.com/wrf-v4-on-google-cloud#h.vin1ct6ww426
mpiexec -np ${SLURM_NTASKS} --map-by core --bind-to core $wrf_executable
# If wrf executable not passed explicitly to script, then default to wrf.exe in working dir
wrf_executable="${1:-wrf.exe}"

srun $wrf_executable

0 comments on commit ca32205

Please sign in to comment.