-
Notifications
You must be signed in to change notification settings - Fork 90
White
Building instructions on Wite
White is a POWER8 cluster at Sandia NL
Nodes on white.sandia.gov consist of two sockets of POWER8. Each socket is then sub-divided into two parts or "NUMA regions", each of which has a memory controller. This means cores local to the NUMA-domain will access local memory faster than going across the socket or to the other socket to find data. It is important that your thread affinity and MPI process placements reflect this configurations to achieve the highest performance.
-
In purpose to build Trilinos on White, please use next configure script as a reference:
#!/bin/bash
rm -f CMakeCache.txt; rm -rf CMakeFiles echo "" echo "WARNING: In order to get modules correctly loaded source this files, don't just execute it." echo "" EXTRA_ARGS=$@
Trilinos Path:
TRILINOS_PATH=${HOME}/TrilinosDir/Trilinos
Top Level Configuration Options
TESTS=ON EXAMPLES=ON
CUDA=OFF OPENMP=ON PTHREAD=OFF SERIAL=ON
COMPLEX=ON
#---------------------------------------------------- #TPL Paths------------------------------------------- #----------------------------------------------------
#Generic parts of the paths, change this only to switch for example #compiler while using same version numbers for tpl libraries #This is meant for Sandia SEMS module system and similar setups. TPL_PATH=/home/projects/power8
#Options: gcc/4.7.2 gcc/4.8.4 gcc/4.9.2 gcc/5.1.0 intel/14.0.4 intel/15.0.5 clang/3.5.2 clang/3.6.1 COMPILER_SUFFIX=gnu/4.9.2 #Options: cuda/6.5.14 cuda/7.0.28 CUDA_SUFFIX=cuda/7.5.7 #Options: openmpi/1.6.5 openmpi/1.8.7 MPI_SUFFIX=openmpi/1.10.0
#Using BLAS from modules BLAS_SUFFIX=blas/3.5.0 BLAS_PATH=${TPL_PATH}/${BLAS_SUFFIX}/${COMPILER_SUFFIX}
#Using LAPACK from modules LAPACK_SUFFIX=lapack/3.5.0 LAPACK_PATH=${TPL_PATH}/${LAPACK_SUFFIX}/${COMPILER_SUFFIX}
#Using Boost from sems BOOST_SUFFIX=boost/1.55.0 BOOST_PATH=${TPL_PATH}/${BOOST_SUFFIX}/${MPI_SUFFIX}/${COMPILER_SUFFIX}/${CUDA_SUFFIX}
#SuperLU is not correctly installed in the sems modules so use your own SUPERLU_SUFFIX=superlu/4.3 SUPERLU_PATH=${HOME}/libs/${SUPERLU_SUFFIX} #SUPERLU_PATH=${TPL_PATH}/${SUPERLU_SUFFIX}/${COMPILER_SUFFIX}/base
#Using HDF5 from sems HDF5_SUFFIX=hdf5/1.8.15 HDF5_PATH=${TPL_PATH}/${HDF5_SUFFIX}/${MPI_SUFFIX}/${COMPILER_SUFFIX}/${CUDA_SUFFIX} #Using NetCDF from sems NETCDF_SUFFIX=netcdf-4.3.3.1 NETCDF_PATH=/home/ipdemes/install/netcdf-4.3.3.1
#Zlib doesn't exist in the sems modules yet ZLIB_SUFFIX=zlib/1.2.8 ZLIB_PATH=${TPL_PATH}/${ZLIB_SUFFIX}
HWLOC_PATH=/home/projects/power8/hwloc/1.10.1/gnu/4.8.2/
#-------------------------------------------------- #--------------------------------------------------
module purge module load cmake/3.0.2 if [ "${CUDA}" == 'ON' ]; then module load
${CUDA_SUFFIX} module load $ {MPI_SUFFIX}/${COMPILER_SUFFIX}/${CUDA_SUFFIX} export OMPI_CXX=${TRILINOS_PATH}/packages/kokkos/config/nvcc_wrapper else module load ${MPI_SUFFIX}/${COMPILER_SUFFIX}/${CUDA_SUFFIX} fi module load ${BOOST_SUFFIX}/${MPI_SUFFIX}/${COMPILER_SUFFIX}/${CUDA_SUFFIX} module load ${HDF5_SUFFIX}/${MPI_SUFFIX}/${COMPILER_SUFFIX}/${CUDA_SUFFIX} module load ${NETCDF_SUFFIX}/${MPI_SUFFIX}/${COMPILER_SUFFIX}/${CUDA_SUFFIX} module load ${ZLIB_SUFFIX}CMAKE Command structured
Basic Options for backends (Enable OpenMP etc.)
\
Build control (flags, debug, ETI)
\
Generic CMake options
\
TPL Setup
\
Packages (on/off, tests, examples)
cmake
-D Trilinos_ENABLE_OpenMP=${OPENMP}
-D Kokkos_ENABLE_Pthread=${PTHREAD}
-D TPL_ENABLE_CUDA=${CUDA}
-D Kokkos_ENABLE_Cuda_UVM:BOOL=ON
-D Teuchos_ENABLE_COMPLEX=${COMPLEX}
-D CMAKE_CXX_FLAGS="-mcpu=power8 -mpowerpc -DNOSSE -ldl -Wall -ansi -pedantic -Wno-unknown-pragmas -Wno-narrowing -Wno-pragmas -Wno-unused-but-set-variable -Wno-delete-non-virtual-dtor -Wno-inline -Wshadow -L/${ZLIB_PATH}/lib -L/${HDF5_PATH}/lib -L/${NETCDF_PATH}/lib -L/${BOOST_PATH}/lib -L/${LAPACK_PATH}/lib -L/${BLAS_PATH}/lib -L/${SUPERLU_PATH}/lib"
-D CMAKE_C_FLAGS="-mcpu=power8 -mpowerpc -DNOSSE -ldl -Wall -ansi -pedantic -Wno-unknown-pragmas -Wno-narrowing -Wno-pragmas -Wno-unused-but-set-variable -Wno-inline -Wshadow -L/${ZLIB_PATH}/lib -L/${HDF5_PATH}/lib -L/${NETCDF_PATH}/lib -L/${BOOST_PATH}/lib -L/${LAPACK_PATH}/lib -L/${BLAS_PATH}/lib -L/${SUPERLU_PATH}/lib"
-D CMAKE_EXE_LINKER_FLAGS="-mcpu=power8 -mpowerpc -lgfortran -ldl -L/${ZLIB_PATH}/lib -L/${HDF5_PATH}/lib -L/${NETCDF_PATH}/lib -L/${BOOST_PATH}/lib -L/${LAPACK_PATH}/lib -L/${BLAS_PATH}/lib -L/${SUPERLU_PATH}/lib"
-D Trilinos_ENABLE_EXPLICIT_INSTANTIATION:BOOL=ON
-D Trilinos_ENABLE_DEBUG:BOOL=OFF
-D CMAKE_INSTALL_PREFIX:PATH="$PWD/install_OpenMP/"
-D Trilinos_ENABLE_INSTALL_CMAKE_CONFIG_FILES:BOOL=ON
-D CMAKE_BUILD_TYPE:STRING=RELEASE
-D CMAKE_VERBOSE_MAKEFILE:BOOL=OFF
-D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF
-D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF
-D Trilinos_ENABLE_ALL_OPTIONAL_PACKAGES:BOOL=OFF
-D BUILD_SHARED_LIBS:BOOL=OFF
-D DART_TESTING_TIMEOUT:STRING=600
-D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=""
-D Trilinos_ENABLE_CXX11=ON
-D Trilinos_CXX11_FLAGS:STRING="--std=c++11"
-D TPL_ENABLE_MPI=ON
-D MPI_EXEC_POST_NUMPROCS_FLAGS:STRING="-bind-to;numa;-map-by;numa;"
-D TPL_ENABLE_BinUtils=OFF
-D TPL_ENABLE_SuperLU=OFF
-D TPL_SuperLU_LIBRARIES:STRING="${SUPERLU_PATH}/lib/libsuperlu.a"
-D TPL_SuperLU_INCLUDE_DIRS:STRING="${SUPERLU_PATH}/include"
-D TPL_ENABLE_BLAS=ON
-D BLAS_INCLUDE_DIRS:PATH="${BLAS_PATH}/include"
-D BLAS_LIBRARY_DIRS:PATH="${BLAS_PATH}/lib"
-D TPL_ENABLE_LAPACK=ON
-D LAPACK_INCLUDE_DIRS:PATH="${LAPACK_PATH}/include"
-D LAPACK_LIBRARY_DIRS:PATH="${LAPACK_PATH}/lib"
-D TPL_ENABLE_Boost=ON
-D Boost_INCLUDE_DIRS:PATH="${BOOST_PATH}/include"
-D Boost_LIBRARY_DIRS:PATH="${BOOST_PATH}/lib"
-D TPL_ENABLE_BoostLib=ON
-D BoostLib_INCLUDE_DIRS:PATH="${BOOST_PATH}/include"
-D BoostLib_LIBRARY_DIRS:PATH="${BOOST_PATH}/lib"
-D TPL_ENABLE_Netcdf=ON
-D Netcdf_INCLUDE_DIRS:PATH="${NETCDF_PATH}/include"
-D Netcdf_LIBRARY_DIRS:PATH="${NETCDF_PATH}/lib"
-D TPL_Netcdf_LIBRARIES:PATH="${NETCDF_PATH}/lib/libnetcdf.a;${HDF5_PATH}/lib/libhdf5_hl.a;${HDF5_PATH}/lib/libhdf5.a;${ZLIB_PATH}/lib/libz.a"
-D TPL_ENABLE_HDF5=ON
-D HDF5_INCLUDE_DIRS:PATH="${HDF5_PATH}/include"
-D TPL_HDF5_LIBRARIES:PATH="${HDF5_PATH}/lib/libhdf5_hl.a;${HDF5_PATH}/lib/libhdf5.a;${ZLIB_PATH}/lib/libz.a"
-D TPL_ENABLE_Zlib=ON
-D Zlib_INCLUDE_DIRS:PATH="${ZLIB_PATH}/include"
-D TPL_Zlib_LIBRARIES:PATH="${ZLIB_PATH}/lib/libz.a"
-D CMAKE_VERBOSE_MAKEFILE:BOOL=OFF
-D Trilinos_VERBOSE_CONFIGURE:BOOL=OFF
-D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF
-D Trilinos_ENABLE_ALL_OPTIONAL_PACKAGES:BOOL=OFF
-D Trilinos_ENABLE_SECONDARY_TESTED_CODE:BOOL=ON
-D Trilinos_ENABLE_EXPORT_MAKEFILES:BOOL=OFF
-D Trilinos_ASSERT_MISSING_PACKAGES:BOOL=OFF
-D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=""
-D Teuchos_ENABLE_LONG_LONG_INT:BOOL=ON
-D Teuchos_ENABLE_COMPLEX:BOOL=OFF
-D TPL_ENABLE_Matio=OFF
-D Trilinos_ENABLE_TESTS:BOOL=OFF
-D Trilinos_ENABLE_TriKota:BOOL=OFF
-D Trilinos_ENABLE_Teuchos:BOOL=ON
-D Trilinos_ENABLE_Shards:BOOL=ON
-D Trilinos_ENABLE_Sacado:BOOL=ON
-D Trilinos_ENABLE_Epetra:BOOL=ON
-D Trilinos_ENABLE_EpetraExt:BOOL=ON
-D Trilinos_ENABLE_Ifpack:BOOL=ON
-D Trilinos_ENABLE_AztecOO:BOOL=ON
-D Trilinos_ENABLE_Amesos:BOOL=ON
-D Trilinos_ENABLE_Anasazi:BOOL=ON
-D Trilinos_ENABLE_Belos:BOOL=ON
-D Trilinos_ENABLE_ML:BOOL=ON
-D Trilinos_ENABLE_Phalanx:BOOL=ON
-D Phalanx_ENABLE_TESTS:BOOL=ON
-D Phalanx_ENABLE_EXAMPLES:BOOL=ON
-D Trilinos_ENABLE_Intrepid:BOOL=ON
-D Trilinos_ENABLE_Intrepid2:BOOL=ON
-D Intrepid2_ENABLE_TESTS:BOOL=ON
-D Intrepid2_ENABLE_EXAMPLES:BOOL=ON
-D Trilinos_ENABLE_NOX:BOOL=ON
-D Trilinos_ENABLE_Stratimikos:BOOL=ON
-D Trilinos_ENABLE_Thyra:BOOL=ON
-D Trilinos_ENABLE_Rythmos:BOOL=ON
-D Trilinos_ENABLE_OptiPack:BOOL=ON
-D Trilinos_ENABLE_GlobiPack:BOOL=ON
-D Trilinos_ENABLE_MOOCHO:BOOL=ON
-D Trilinos_ENABLE_Stokhos:BOOL=ON
-D Trilinos_ENABLE_Piro:BOOL=ON
-D Trilinos_ENABLE_Pamgen:BOOL=ON
-D Trilinos_ENABLE_Isorropia:BOOL=ON
-D Trilinos_ENABLE_Teko:BOOL=ON
-D Trilinos_ENABLE_PyTrilinos:BOOL=OFF
-D Trilinos_ENABLE_STK:BOOL=ON
-D Trilinos_ENABLE_STKExp:BOOL=OFF
-D Trilinos_ENABLE_STKClassic:BOOL=OFF
-D Trilinos_ENABLE_STKDoc_tests:BOOL=OFF
-D Trilinos_ENABLE_STKIO:BOOL=ON
-D Trilinos_ENABLE_STKMesh:BOOL=ON
-D Trilinos_ENABLE_STKSearch:BOOL=OFF
-D Trilinos_ENABLE_STKSearchUtil:BOOL=OFF
-D Trilinos_ENABLE_STKTopology:BOOL=ON
-D Trilinos_ENABLE_STKTransfer:BOOL=ON
-D Trilinos_ENABLE_STKUnit_tests:BOOL=OFF
-D Trilinos_ENABLE_STKUtil:BOOL=ON
-D Trilinos_ENABLE_SEACAS:BOOL=ON
-D Trilinos_ENABLE_SEACASIoss:BOOL=ON
-D Trilinos_ENABLE_SEACASExodus:BOOL=ON
-D SEACAS_ENABLE_SEACASSVDI:BOOL=OFF
-D Trilinos_ENABLE_SEACASFastq:BOOL=OFF
-D Trilinos_ENABLE_SEACASBlot:BOOL=OFF
-D Trilinos_ENABLE_SEACASPLT:BOOL=OFF
-D TPL_ENABLE_X11:BOOL=OFF
-D Trilinos_ENABLE_Tpetra:BOOL=ON
-D Trilinos_ENABLE_Kokkos:BOOL=ON
-D Trilinos_ENABLE_Ifpack2:BOOL=ON
-D Trilinos_ENABLE_Amesos2:BOOL=ON
-D Trilinos_ENABLE_Zoltan2:BOOL=ON
-D Trilinos_ENABLE_Zoltan:BOOL=ON
-D Zoltan_ENABLE_ULONG_IDS:BOOL=ON
-D ZOLTAN_BUILD_ZFDRIVE:BOOL=OFF
-D Trilinos_ENABLE_FEI:BOOL=OFF
-D Phalanx_ENABLE_TEUCHOS_TIME_MONITOR:BOOL=ON
-D Stokhos_ENABLE_TEUCHOS_TIME_MONITOR:BOOL=ON
-D Stratimikos_ENABLE_TEUCHOS_TIME_MONITOR:BOOL=ON
-D Trilinos_ENABLE_MueLu:BOOL=ON
-D Amesos2_ENABLE_KLU2:BOOL=ON
-D Trilinos_ENABLE_EXPLICIT_INSTANTIATION:BOOL=ON
-D Tpetra_INST_INT_LONG_LONG:BOOL=ON
-D Tpetra_INST_INT_INT:BOOL=ON
-D Tpetra_INST_DOUBLE:BOOL=ON
-D Tpetra_INST_FLOAT:BOOL=OFF
-D Tpetra_INST_COMPLEX_FLOAT:BOOL=OFF
-D Tpetra_INST_COMPLEX_DOUBLE:BOOL=OFF
-D Tpetra_INST_INT_LONG:BOOL=OFF
-D Tpetra_INST_INT_UNSIGNED:BOOL=OFF
-D Trilinos_ENABLE_Kokkos:BOOL=ON
-D Trilinos_ENABLE_KokkosCore:BOOL=ON
-D Phalanx_KOKKOS_DEVICE_TYPE:STRING="OPENMP"
-D Phalanx_INDEX_SIZE_TYPE:STRING="INT"
-D Phalanx_SHOW_DEPRECATED_WARNINGS:BOOL=OFF
-D Trilinos_ENABLE_OpenMP:BOOL=ON
-D HAVE_INTREPID_KOKKOSCORE:BOOL=ON
-D TPL_ENABLE_HWLOC:STRING=OFF
-D TPL_HWLOC_LIBRARIES:PATHNAME="${HWLOC_PATH}/lib/libhwloc.so"
-D TPL_HWLOC_INCLUDE_DIRS:PATHNAME="${HWLOC_PATH}/include"
-D Trilinos_ENABLE_ThreadPool:BOOL=ON
-D Trilinos_ENABLE_Panzer:BOOL=OFF
-D Panzer_ENABLE_TESTS:BOOL=ON
-D Panzer_ENABLE_EXAMPLES:BOOL=ON
-D Panzer_ENABLE_EXPLICIT_INSTANTIATION:BOOL=ON
-D Panzer_ENABLE_FADTYPE:STRING="Sacado::Fad::DFad"
-D MPI_EXEC=mpirun
-D MPI_EXEC_MAX_NUMPROCS:STRING="4"
-D MPI_EXEC_NUMPROCS_FLAG:STRING="-np"
$EXTRA_ARGS
${TRILINOS_PATH} -
load modules as for Trilinos
module load openmpi/1.10.0/gnu/4.9.2/cuda/7.5.7 superlu-dist/4.3.0/openmpi/1.10.0/gnu/4.9.2/cuda/7.5.7 zlib netcdf/4.3.3.1/openmpi/1.10.0/gnu/4.9.2/cuda/7.5.7 boost/1.55.0/openmpi/1.10.0/gnu/4.9.2/cuda/7.5.7
-
Configure Albany as usual:
rm -f CMakeCache.txt BUILD_DIR=`pwd` cmake \ -D ALBANY_TRILINOS_DIR:PATH=/home/ipdemes/TrilinosDir/BuildTrilinos_Albany/install_OpenMP\ -D ENABLE_LCM:BOOL=ON \ -D ENABLE_MOR:BOOL=ON \ -D ENABLE_GOAL:BOOL=OFF \ -D ENABLE_FELIX:BOOL=ON \ -D ENABLE_HYDRIDE:BOOL=ON \ -D ENABLE_AMP:BOOL=OFF \ -D ENABLE_ATO:BOOL=ON \ -D ENABLE_SCOREC:BOOL=OFF \ -D ENABLE_QCAD:BOOL=ON \ -D ENABLE_SG:BOOL=OFF \ -D ENABLE_ENSEMBLE:BOOL=OFF \ -D ENABLE_ASCR:BOOL=OFF \ -D ENABLE_AERAS:BOOL=ON \ -D ENABLE_64BIT_INT:BOOL=OFF \ -D ENABLE_LAME:BOOL=OFF \ -D ENABLE_DEMO_PDES:BOOL=ON \ -D ENABLE_KOKKOS_UNDER_DEVELOPMENT:BOOL=ON \ -D ALBANY_CTEST_TIMEOUT=400 \ -D ENABLE_CHECK_FPE:BOOL=OFF \ ..\
-
make -j 16
-
login to the node: salloc -N 1
-
run with srun/mpirun
you can try to run your executable with 4 MPI ranks, 40 OPENMP threads per rank for better performance on POWER8:
mpirun -x OMP_NUM_THREADS=40 --bind-to numa --map-by numa -n 1 Albany --kokkos-threads=40 input.xml