#!/bin/bash #SBATCH -J ifcMpi_tst #SBATCH -p sched_mit_hill #SBATCH -t 10:00:00 #SBATCH --mem-per-cpu 4000 #SBATCH -N 2 #SBATCH --tasks-per-node 4 # #SBATCH -x node122 # #SBATCH -x node[051,052,065,066] #SBATCH -e /home/jm_c/test_engaging/output/ifcMpi_tst.stderr #SBATCH -o /home/jm_c/test_engaging/output/ifcMpi_tst.stdout #SBATCH --no-requeue # $Header: /home/ubuntu/mnt/e9_copy/MITgcm_contrib/test_scripts/engaging/test_engag_ifc_mpi,v 1.25 2023/12/30 16:55:12 jmc Exp $ # $Name: $ if test -f /etc/profile.d/modules.sh ; then . /etc/profile.d/modules.sh ; fi # Note: added "ulimit -s unlimited" in file "~/.bashrc" # to pass big test (the 2 fizhi-cs-* test & adjoint tests) with MPI umask 0022 #- to get case insensitive "ls" (and order of tested experiments) export LC_ALL="en_US.UTF-8" echo " running on: "`hostname` headNode='eofe8' dNam='engaging' HERE="$HOME/test_${dNam}" OUTP="$HERE/output"; SavD="$HERE/send" SEND="ssh $headNode $SavD/mpack" TST_DIR="/pool001/jm_c/test_$dNam" tmpFil="/tmp/"`basename $0`".$$" cmdCVS='cvs -d :pserver:cvsanon@mitgcm.org:/u/gcmpack -q' #- which GitHub repository to use and how to access it: git_repo='MITgcm'; git_code='MITgcm' ; git_other='verification_other' #git_repo='altMITgcm'; #git_code='MITgcm66h' #-- git_repo="https://github.com/$git_repo" #git_repo="git://github.com/$git_repo" #git_repo="git@github.com:$git_repo" dblTr=0 ; typ='' ; addExp='' ; skipExp='' sfx='ifcMpi'; dblTr=1 addExp='global_oce_cs32 global_oce_llc90' module add slurm module add gcc module add engaging/intel/2013.1.046 #export MPI_INC_DIR="$MPI_HOME/include" OPTFILE="../tools/build_options/linux_amd64_ifort+impi" mpiCMD="mpirun -env I_MPI_DEBUG 2 -n TR_NPROC ./mitgcmuv" #- currently not used options="$typ -MPI 8" #options="-j 4 $options" #export OMP_NUM_THREADS=2 #export KMP_SLAVE_STACK_SIZE=400m #options="$options -gsl" #export GSL_IEEE_MODE=double-precision,mask-underflow,mask-denormalized ulimit -s unlimited #- need this to get "staf" & "do_make_syntax.sh": export PATH="$PATH:$HOME/bin" gcmDIR="MITgcm_$sfx" dAlt=`date +%d` ; dAlt=`expr $dAlt % 3` if [ $dAlt -eq 1 ] ; then options="$options -ur4 -match 5" ; fi options="$options -devel" if test "x$skipExp" != x ; then skipExp=`echo $skipExp | sed 's/^ *//'` ; fi checkOut=1 ; #options="$options -do" #options="$options -nc" ; checkOut=1 ; dblTr=0 #options="$options -q" ; checkOut=0 ; dblTr=0 # dblTr=-1 #- skip testreport completely (only run "do_tst_2+2") if test -d $TST_DIR ; then echo "start from TST_DIR='$TST_DIR' at: "`date` else #if test ! -d $TST_DIR ; then mkdir $TST_DIR ; fi #if test ! -d $TST_DIR ; then # echo "ERROR: Can't create directory \"$TST_DIR\"" # exit 1 #fi #echo "start in new dir TST_DIR='$TST_DIR' at: "`date` echo "ERROR: missing directory \"$TST_DIR\"" exit 1 fi cd $TST_DIR pwd df . NSLOTS=$SLURM_NTASKS THEDATE=`date` echo '********************************************************************************' echo 'Start job '$THEDATE echo 'NSLOTS = '$NSLOTS echo '======= NODELIST ===============================================================' echo $SLURM_NODELIST cat /etc/redhat-release echo '======= env ====================================================================' env | grep SLURM echo '======= modules ================================================================' module list 2>&1 echo '================================================================================' #- MPI_MFile="${OUTP}/mf_${sfx}" #- currently not used #mpiCMD="mpirun -hostfile TR_MFILE -n TR_NPROC ./mitgcmuv" #- make the testreport MPI_MFILE: listNODES=`echo $SLURM_NODELIST | sed -e 's/\[/ /' -e 's/\]//' -e 's/,/ /' -e 's/-/ /'` # duplicate the 2 listed nodes into a 6 list file: /bin/rm -f $MPI_MFile ; touch $MPI_MFile for nc in `seq 1 4` ; do pfx='' for nd in $listNODES ; do if test "x$pfx" = x ; then pfx=$nd ; else echo "${pfx}${nd}" >> $MPI_MFile fi done done #- check for disk space: relative space (99%) or absolute (10.G): dsp=`df -P . | tail -1 | awk '{print $5}' | sed 's/%$//'` if [ $dsp -gt 99 ] ; then #dsp=`df -P . | tail -1 | awk '{print $4}'` #if [ $dsp -le 100000000 ] ; then echo 'Not enough space on this disk => do not run testreport.' df . exit fi if [ $checkOut -eq 1 ] ; then if test ! -e $gcmDIR/.git/config ; then echo "no file: $gcmDIR/.git/config => try to download a fresh clone" checkOut=2 fi if test "x$addExp" != x ; then if test ! -e $gcmDIR/$git_other/.git/config ; then echo "no file: $gcmDIR/$git_other/.git/config => try a fresh clone" checkOut=2 fi fi fi if [ $checkOut -eq 1 ] ; then echo "cleaning output from $gcmDIR/verification @ "`date +"%H:%M:%S"`" :" #- remove previous output tar files and tar & remove previous output-dir /bin/rm -f $gcmDIR/verification/??_${dNam}-${sfx}_????????_?.tar.gz ( cd $gcmDIR/verification listD=`ls -1 -d ??_${dNam}-${sfx}_????????_? 2> /dev/null` for dd in $listD do if test -d $dd ; then tar -cf ${dd}".tar" $dd > /dev/null 2>&1 && gzip ${dd}".tar" && /bin/rm -rf $dd retVal=$? if test "x$retVal" != x0 ; then echo "ERROR in tar+gzip prev outp-dir: $dd" echo " on '"`hostname`"' (return val=$retVal) but continue" fi fi done ) echo "clean tst_2+2 + testreport output (+ Makefile_syntax files)" ( cd $gcmDIR/verification ; ../tools/do_tst_2+2 -clean ) ( cd $gcmDIR/verification ; ./testreport $typ -clean ) ( cd $gcmDIR/verification ; rm -f */build/Makefile_syntax ) ( cd $gcmDIR/verification ; rm -f */build/port_rand.i */build/ptracers_set_iolabel.i ) if test "x$addExp" != x ; then ( cd $gcmDIR/verification listD=`ls -o | grep '^l' | awk '{print $8}' 2> /dev/null` echo " + remove local links: $listD" /bin/rm -f $listD ) fi echo "Update $git_code code in dir $gcmDIR @ "`date +"%H:%M:%S"`" :" ( cd $gcmDIR ; git pull ) 2>&1 retVal=$? if test "x$retVal" != x0 ; then echo "git pull on '"`hostname`"' fail (return val=$retVal) => exit" exit fi echo " and checkout master @ "`date +"%H:%M:%S"`" :" ( cd $gcmDIR ; git checkout master -- . ) 2>&1 if test "x$addExp" != x ; then echo "Update $git_other code in dir $gcmDIR/$git_other @ "`date +"%H:%M:%S"`" :" ( cd $gcmDIR/$git_other ; git pull ) 2>&1 retVal=$? if test "x$retVal" != x0 ; then echo "git pull on '"`hostname`"' fail (return val=$retVal) => exit" exit fi echo " and checkout master @ "`date +"%H:%M:%S"`" :" ( cd $gcmDIR/$git_other ; git checkout master -- . ) 2>&1 fi fi if [ $checkOut -eq 2 ] ; then if test -e $gcmDIR ; then echo -n "Removing working copy: $gcmDIR ..." rm -rf $gcmDIR echo " done" fi echo "Make a clone of $git_code from repo: $git_repo ..." git clone $git_repo/${git_code}.git $gcmDIR 2> $tmpFil retVal=$? if test $retVal = 0 ; then echo ' done' ; rm -f $tmpFil else echo " Error: 'git clone' returned: $retVal" cat $tmpFil ; rm -f $tmpFil exit fi if test "x$addExp" != x ; then echo "Make a clone of $git_other from repo: $git_repo ..." ( cd $gcmDIR ; git clone $git_repo/${git_other}.git 2> $tmpFil ) retVal=$? if test $retVal = 0 ; then echo ' done' ; rm -f $tmpFil else echo " Error: 'git clone' returned: $retVal" cat $tmpFil ; rm -f $tmpFil exit fi fi if test -d $gcmDIR/verification ; then /usr/bin/find $gcmDIR -type d | xargs chmod g+rxs /usr/bin/find $gcmDIR -type f | xargs chmod g+r fi fi #- change dir to $gcmDIR/verification + add link for additional experiments: if test -e $gcmDIR/verification ; then if [ $checkOut -lt 2 ] ; then echo " dir $gcmDIR/verification exist" ; fi cd $gcmDIR/verification for exp2add in $addExp ; do test -r $exp2add && /bin/rm -rf $exp2add if test -d ../$git_other/$exp2add ; then echo " add $exp2add link from $git_other" ln -s ../$git_other/$exp2add . if test $exp2add = 'global_oce_cs32' ; then echo " link dir 'other_input/core2_cnyf' in here" ( cd ../${git_other}/${exp2add} test -L core2_cnyf && /bin/rm -f core2_cnyf ln -s ../../../other_input/core2_cnyf . ) fi if test $exp2add = 'global_oce_llc90' ; then echo " link dir 'other_input/gael_oce_llc90_input' to 'input_fields'" ( cd ../${git_other}/${exp2add} test -L input_fields && /bin/rm -f input_fields ln -s ../../../other_input/gael_oce_llc90_input input_fields echo " link dirs: 'core2_cnyf' & 'global_oce_input_fields/*' in input_verifs" test ! -e input_verifs && mkdir input_verifs ( cd input_verifs ; /bin/rm -f * ln -s ../../../../other_input/core2_cnyf . ln -s ../../../../other_input/global_oce_input_fields/* . ) ) fi else echo " missing dir: $git_other/$exp2add" continue fi done else echo "no dir: $gcmDIR/verification => exit" exit fi if [ $dblTr -eq 1 ] ; then echo '' #- 0) just make all module header ( *__genmod.mod files) using modified Makefile echo ./testreport $options -of $OPTFILE -skd \'$skipExp\' \ -j 4 -nc -repl_mk do_make_syntax.sh -obj -dd ./testreport $options -of $OPTFILE -skd "$skipExp" \ -j 4 -nc -repl_mk do_make_syntax.sh -obj -dd options="$options -q" echo '' #- 1) just compile ("-nr"), using "-j 4" to speed up echo ./testreport $options -of $OPTFILE -skd \'$skipExp\' \ -j 4 -nr -odir ${dNam}-$sfx ./testreport $options -of $OPTFILE -skd "$skipExp" \ -j 4 -nr -odir ${dNam}-$sfx nFc=`grep -c '^Y . N N ' tr_out.txt` echo " <= fail to compile $nFc experiments" fi if [ $dblTr -ge 0 ] ; then echo '' #- 2) run and report results ; also finish to compile those who failed with "-j" #echo ./testreport $options -of $OPTFILE -command \'$mpiCMD\' -mf $MPI_MFile \ echo ./testreport $options -of $OPTFILE -skd \'$skipExp\' \ -odir ${dNam}-$sfx -send \'$SEND\' -sd $SavD -a jm_c@mitgcm.org #./testreport $options -of $OPTFILE -command "$mpiCMD" -mf $MPI_MFile \ ./testreport $options -of $OPTFILE -skd "$skipExp" \ -odir ${dNam}-$sfx -send "$SEND" -sd $SavD -a jm_c@mitgcm.org retVal=$? else retVal=0 ; fi if test "x$retVal" != x0 ; then echo "<== testreport returned retVal=${retVal}, expecting 0" echo " -> skip restart test 'do_tst_2+2'" else echo '' #- 3) test restart and report results #echo ../tools/do_tst_2+2 -mpi -exe \'$mpiCMD\' -mf $MPI_MFile \ echo ../tools/do_tst_2+2 -mpi \ -o ${dNam}-$sfx -send \'$SEND\' -sd $SavD -a jm_c@mitgcm.org #../tools/do_tst_2+2 -mpi -exe "$mpiCMD" -mf $MPI_MFile \ ../tools/do_tst_2+2 -mpi \ -o ${dNam}-$sfx -send "$SEND" -sd $SavD -a jm_c@mitgcm.org fi