#!/bin/bash #SBATCH -J ifcMpi_tst #SBATCH -p edr #SBATCH -t 23:30:00 #SBATCH --mem-per-cpu 4000 #SBATCH -N 1 #SBATCH --tasks-per-node 6 # #SBATCH -x curly,larry,moe,shemp #SBATCH -e /home/jm_c/test_svante/output/ifcMpi_tst.stderr #SBATCH -o /home/jm_c/test_svante/output/ifcMpi_tst.stdout #SBATCH --no-requeue # $Header: /home/ubuntu/mnt/e9_copy/MITgcm_contrib/test_scripts/svante/test_svante_ifc_mpi,v 1.22 2023/11/06 13:35:01 jmc Exp $ # $Name: $ if test -f /etc/profile.d/modules.sh ; then . /etc/profile.d/modules.sh ; fi if test -f /etc/profile.d/zz_modules.sh ; then . /etc/profile.d/zz_modules.sh ; fi # Note: added "ulimit -s unlimited" in file "~/.bashrc" # to pass big test (the 2 fizhi-cs-* test & adjoint tests) with MPI umask 0022 #- to get case insensitive "ls" (and order of tested experiments) export LC_ALL="en_US.UTF-8" echo " running on: "`hostname` headNode='svante-login' dNam='svante' HERE="$HOME/test_${dNam}" OUTP="$HERE/output"; SavD="$HERE/send" SEND="ssh $headNode $SavD/mpack" #TST_DISK="/net/fs09/d1/jm_c" TST_DISK="/scratch/jm_c" #TST_DIR="$TST_DISK/test_${dNam}" TST_DIR="test_${dNam}" #- where local copy of code is (need to be consistent with "test_submit_svante"): #srcDIR='.' srcDIR=$HERE srcCode="MITgcm_today" #- following lines are not used here: cmdCVS='cvs -d :pserver:cvsanon@mitgcm.org:/u/gcmpack -q' #- which GitHub repository to use and how to access it: git_repo='MITgcm'; git_code='MITgcm' ; git_other='verification_other' #git_repo='altMITgcm'; #git_code='MITgcm66h' #-- git_repo="https://github.com/$git_repo" #git_repo="git://github.com/$git_repo" #git_repo="git@github.com:$git_repo" dblTr=0 ; typ='' ; addExp='' ; skipExp='' sfx='ifcMpi'; dblTr=1 module add intel/2021.4.0 module add openmpi #module add netcdf OPTFILE="../tools/build_options/linux_amd64_ifort" export GENERIC='on' # <-- to prevent the use of "-xHost" option #- needed for DIVA with MPI: #export MPI_INC_DIR=$INC_MPI options="$typ -MPI 6" #- need this to get "staf" & "do_make_syntax.sh": export PATH="$PATH:$HOME/bin" gcmDIR="MITgcm_$sfx" dAlt=`date +%d` ; dAlt=`expr $dAlt % 3` if [ $dAlt -eq 1 ] ; then options="$options -fast" else options="$options -devel" ; fi if test "x$skipExp" != x ; then skipExp=`echo $skipExp | sed 's/^ *//'` ; fi checkOut=2 ; #options="$options -do" #options="$options -nc" ; checkOut=1 ; dblTr=0 #options="$options -q" ; checkOut=0 ; dblTr=0 echo "cd $TST_DISK ; pwd (x1)" cd $TST_DISK pwd ; ls -l if test ! -d $TST_DIR ; then sleep 5 ; pwd ; ls -l ; fi if test ! -d $TST_DIR ; then echo -n "Creating a working dir: $TST_DIR ..." mkdir $TST_DIR retVal=$? if test "x$retVal" != x0 ; then if test ! -d $TST_DIR ; then echo " FAIL" echo "Error: unable to make dir: $TST_DIR (err=$retVal ) from $TST_DISK --> Exit" exit 1 else echo " FAIL but dir now exists ! -> continue" ; fi else echo " done" ; fi fi echo "start from DIR='$TST_DISK/$TST_DIR' at: "`date` cd $TST_DIR pwd NSLOTS=$SLURM_NTASKS THEDATE=`date` echo '********************************************************************************' echo 'Start job '$THEDATE echo 'NSLOTS = '$NSLOTS echo '======= NODELIST ===============================================================' echo $SLURM_NODELIST cat /etc/redhat-release echo '======= env ====================================================================' env | grep SLURM echo '======= modules ================================================================' module list 2>&1 echo '================================================================================' #- check for disk space: relative space (99%) or absolute (10.G): dsp=`df -P . | tail -1 | awk '{print $5}' | sed 's/%$//'` if [ $dsp -gt 99 ] ; then #dsp=`df -P . | tail -1 | awk '{print $4}'` #if [ $dsp -le 100000000 ] ; then echo 'Not enough space on this disk => do not run testreport.' df . exit fi if [ $checkOut -eq 1 ] ; then if test ! -e $gcmDIR/.git/config ; then echo "no file: $gcmDIR/.git/config => try to download a fresh clone" checkOut=2 fi if test "x$addExp" != x ; then if test ! -e $gcmDIR/$git_other/.git/config ; then echo "no file: $gcmDIR/$git_other/.git/config => try a fresh clone" checkOut=2 fi fi fi if [ $checkOut -eq 1 ] ; then echo "cleaning output from $gcmDIR/verification :" #- remove previous output tar files and tar & remove previous output-dir /bin/rm -f $gcmDIR/verification/??_${dNam}-${sfx}_????????_?.tar.gz ( cd $gcmDIR/verification listD=`ls -1 -d ??_${dNam}-${sfx}_????????_? 2> /dev/null` for dd in $listD do if test -d $dd ; then tar -cf ${dd}".tar" $dd > /dev/null 2>&1 && gzip ${dd}".tar" && /bin/rm -rf $dd retVal=$? if test "x$retVal" != x0 ; then echo "ERROR in tar+gzip prev outp-dir: $dd" echo " on '"`hostname`"' (return val=$retVal) but continue" fi fi done ) echo "clean tst_2+2 + testreport output (+ Makefile_syntax files)" ( cd $gcmDIR/verification ; ../tools/do_tst_2+2 -clean ) ( cd $gcmDIR/verification ; ./testreport $typ -clean ) ( cd $gcmDIR/verification ; rm -f */build/Makefile_syntax ) ( cd $gcmDIR/verification ; rm -f */build/port_rand.i */build/ptracers_set_iolabel.i ) if test "x$addExp" != x ; then ( cd $gcmDIR/verification listD=`ls -o | grep '^l' | awk '{print $8}' 2> /dev/null` echo " + remove local links: $listD" /bin/rm -f $listD ) fi echo "Update $git_code code in dir $gcmDIR :" ( cd $gcmDIR ; git pull ) 2>&1 retVal=$? if test "x$retVal" != x0 ; then echo "git pull on '"`hostname`"' fail (return val=$retVal) => exit" exit fi echo " and checkout master:" ( cd $gcmDIR ; git checkout master -- . ) 2>&1 if test "x$addExp" != x ; then echo "Update $git_other code in dir $gcmDIR/$git_other :" ( cd $gcmDIR/$git_other ; git pull ) 2>&1 retVal=$? if test "x$retVal" != x0 ; then echo "git pull on '"`hostname`"' fail (return val=$retVal) => exit" exit fi echo " and checkout master:" ( cd $gcmDIR/$git_other ; git checkout master -- . ) 2>&1 fi fi if [ $checkOut -eq 2 ] ; then if test -e $gcmDIR ; then echo -n "Removing working copy: $gcmDIR ..." rm -rf $gcmDIR echo " done" fi # make a local copy (instead of making a new clone): today=`date +%Y%m%d` nCount=0; updFile="${srcDIR}/updated_code" updDate=0 ; test -f $updFile && updDate=`cat $updFile` while [ $today -gt $updDate ] ; do nCount=`expr $nCount + 1` if [ $nCount -gt 40 ] ; then echo " waiting too long (nCount=$nCount) for updated code" echo " today=$today , updDate=$updDate " ls -l $updFile exit fi sleep 60 updDate=0 ; test -f $updFile && updDate=`cat $updFile` done ls -l $updFile echo " waited nCount=$nCount for updated code ($updDate) to copy" if test -d $srcDIR/$srcCode ; then echo -n "Make local copy of dir '$srcDIR/$srcCode' to: $gcmDIR ..." cp -pra $srcDIR/$srcCode $gcmDIR echo " done" else echo " dir: $srcDIR/$srcCode missing => exit" ; exit ; fi fi #- change dir to $gcmDIR/verification + add link for additional experiments: if test -e $gcmDIR/verification ; then if [ $checkOut -lt 2 ] ; then echo " dir $gcmDIR/verification exist" ; fi cd $gcmDIR/verification for exp2add in $addExp ; do test -r $exp2add && /bin/rm -rf $exp2add if test -d ../$git_other/$exp2add ; then echo " add $exp2add link from $git_other" ln -s ../$git_other/$exp2add . if test $exp2add = 'global_oce_cs32' ; then echo " link dir 'other_input/core2_cnyf' in here" ( cd ../${git_other}/${exp2add} test -L core2_cnyf && /bin/rm -f core2_cnyf ln -s ../../../other_input/core2_cnyf . ) fi if test $exp2add = 'global_oce_llc90' ; then echo " link dir 'other_input/gael_oce_llc90_input' to 'input_fields'" ( cd ../${git_other}/${exp2add} test -L input_fields && /bin/rm -f input_fields ln -s ../../../other_input/gael_oce_llc90_input input_fields echo " link dirs: 'core2_cnyf' & 'global_oce_input_fields/*' in input_verifs" test ! -e input_verifs && mkdir input_verifs ( cd input_verifs ; /bin/rm -f * ln -s ../../../../other_input/core2_cnyf . ln -s ../../../../other_input/global_oce_input_fields/* . ) ) fi else echo " missing dir: $git_other/$exp2add" continue fi done else echo "no dir: $gcmDIR/verification => exit" exit fi if [ $dblTr -eq 1 ] ; then if [ $dAlt -ne 1 ] ; then echo '' #- 0) just make all module header ( *__genmod.mod files) using modified Makefile echo ./testreport $options -of $OPTFILE -skd \'$skipExp\' \ -j 4 -repl_mk do_make_syntax.sh -obj -dd ./testreport $options -of $OPTFILE -skd "$skipExp" \ -j 4 -repl_mk do_make_syntax.sh -obj -dd 2>&1 options="$options -q" fi echo '' #- 1) just compile ("-nr"), using "-j 4" to speed up echo ./testreport $options -of $OPTFILE -skd \'$skipExp\' \ -j 4 -nr -odir ${dNam}-$sfx ./testreport $options -of $OPTFILE -skd "$skipExp" \ -j 4 -nr -odir ${dNam}-$sfx nFc=`grep -c '^Y . N N ' tr_out.txt` echo " <= fail to compile $nFc experiments" if [ $dAlt -eq 1 ] ; then options="$options -q" ; fi fi echo '' #- 2) run and report results ; also finish to compile those who failed with "-j" echo ./testreport $options -of $OPTFILE -skd \'$skipExp\' \ -odir ${dNam}-$sfx -send \'$SEND\' -sd $SavD -a jm_c@mitgcm.org ./testreport $options -of $OPTFILE -skd "$skipExp" \ -odir ${dNam}-$sfx -send "$SEND" -sd $SavD -a jm_c@mitgcm.org retVal=$? $HERE/${dNam}/backup_outp tr_${dNam}-${sfx} $OUTP/backup if test "x$retVal" != x0 ; then echo "<== testreport returned retVal=${retVal}, expecting 0" echo " -> skip restart test 'do_tst_2+2'" else echo '' #- 3) test restart and report results echo ../tools/do_tst_2+2 -mpi \ -o ${dNam}-$sfx -send \'$SEND\' -sd $SavD -a jm_c@mitgcm.org ../tools/do_tst_2+2 -mpi \ -o ${dNam}-$sfx -send "$SEND" -sd $SavD -a jm_c@mitgcm.org $HERE/${dNam}/backup_outp rs_${dNam}-${sfx} $OUTP/backup fi