#!/bin/bash #SBATCH -J o64Mpi_tst #SBATCH -p sched_mit_hill #SBATCH -t 06:00:00 #SBATCH --mem-per-cpu 4000 #SBATCH -n 6 #SBATCH -N 2 #SBATCH -x node122 # #SBATCH -x node[360,365] #SBATCH -e /home/jm_c/test_engaging/output/o64Mpi_tst.stderr #SBATCH -o /home/jm_c/test_engaging/output/o64Mpi_tst.stdout #SBATCH --no-requeue # $Header: /home/ubuntu/mnt/e9_copy/MITgcm_contrib/test_scripts/engaging/test_engag_op64_mpi,v 1.6 2023/12/30 16:55:12 jmc Exp $ # $Name: $ if test -f /etc/profile.d/modules.sh ; then . /etc/profile.d/modules.sh ; fi # Note: added "ulimit -s unlimited" in file "~/.bashrc" # to pass big test (the 2 fizhi-cs-* test & adjoint tests) with MPI umask 0022 #- to get case insensitive "ls" (and order of tested experiments) export LC_ALL="en_US.UTF-8" echo " running on: "`hostname` headNode='eofe8' dNam='engaging' HERE="$HOME/test_${dNam}" OUTP="$HERE/output"; SavD="$HERE/send" SEND="ssh $headNode $SavD/mpack" TST_DIR="/pool001/jm_c/test_$dNam" tmpFil="/tmp/"`basename $0`".$$" cmdCVS='cvs -d :pserver:cvsanon@mitgcm.org:/u/gcmpack -q' #- which GitHub repository to use and how to access it: git_repo='MITgcm'; git_code='MITgcm' ; git_other='verification_other' #git_repo='altMITgcm'; #git_code='MITgcm66h' #-- git_repo="https://github.com/$git_repo" #git_repo="git://github.com/$git_repo" #git_repo="git@github.com:$git_repo" dblTr=0 ; typ='' ; addExp='' ; skipExp='' sfx='o64Mpi'; dblTr=1 #- currently, no NetCDF => no pkg/profiles #skipExp="$skipExp global_oce_biogeo_bling" module add open64 module add mvapich2/open64/64/2.0b export MPI_INC_DIR="$MPI_HOME/include" OPTFILE="../tools/build_options/linux_amd64_open64" #options="-j 2 -devel -gsl" options="$typ -MPI 6" #export OMP_NUM_THREADS=2 #export OMP_SLAVE_STACK_SIZE=400m #export GSL_IEEE_MODE=double-precision,mask-underflow,mask-denormalized ulimit -s unlimited #- need this to get "staf": #export PATH="$PATH:$HOME/bin" gcmDIR="MITgcm_$sfx" dAlt=`date +%d` ; dAlt=`expr $dAlt % 3` if [ $dAlt -eq 1 ] ; then options="$options -fast" else options="$options -devel" ; fi if test "x$skipExp" != x ; then skipExp=`echo $skipExp | sed 's/^ *//'` ; fi checkOut=1 ; #options="$options -do" #options="$options -nc" ; checkOut=1 #options="$options -q" ; checkOut=0 ; dblTr=0 # dblTr=-1 #- skip testreport completely (only run "do_tst_2+2") if test -d $TST_DIR ; then echo "start from TST_DIR='$TST_DIR' at: "`date` else #if test ! -d $TST_DIR ; then mkdir $TST_DIR ; fi #if test ! -d $TST_DIR ; then # echo "ERROR: Can't create directory \"$TST_DIR\"" # exit 1 #fi #echo "start in new dir TST_DIR='$TST_DIR' at: "`date` echo "ERROR: missing directory \"$TST_DIR\"" exit 1 fi cd $TST_DIR pwd df . NSLOTS=$SLURM_NTASKS THEDATE=`date` echo '********************************************************************************' echo 'Start job '$THEDATE echo 'NSLOTS = '$NSLOTS echo '======= NODELIST ===============================================================' echo $SLURM_NODELIST cat /etc/redhat-release echo '======= env ====================================================================' env | grep SLURM echo '======= modules ================================================================' module list 2>&1 echo '================================================================================' #- check for disk space: relative space (99%) or absolute (10.G): dsp=`df -P . | tail -1 | awk '{print $5}' | sed 's/%$//'` if [ $dsp -gt 99 ] ; then #dsp=`df -P . | tail -1 | awk '{print $4}'` #if [ $dsp -le 100000000 ] ; then echo 'Not enough space on this disk => do not run testreport.' df . exit fi if [ $checkOut -eq 1 ] ; then if test ! -e $gcmDIR/.git/config ; then echo "no file: $gcmDIR/.git/config => try to download a fresh clone" checkOut=2 fi if test "x$addExp" != x ; then if test ! -e $gcmDIR/$git_other/.git/config ; then echo "no file: $gcmDIR/$git_other/.git/config => try a fresh clone" checkOut=2 fi fi fi if [ $checkOut -eq 1 ] ; then echo "cleaning output from $gcmDIR/verification @ "`date +"%H:%M:%S"`" :" #- remove previous output tar files and tar & remove previous output-dir /bin/rm -f $gcmDIR/verification/??_${dNam}-${sfx}_????????_?.tar.gz ( cd $gcmDIR/verification listD=`ls -1 -d ??_${dNam}-${sfx}_????????_? 2> /dev/null` for dd in $listD do if test -d $dd ; then tar -cf ${dd}".tar" $dd > /dev/null 2>&1 && gzip ${dd}".tar" && /bin/rm -rf $dd retVal=$? if test "x$retVal" != x0 ; then echo "ERROR in tar+gzip prev outp-dir: $dd" echo " on '"`hostname`"' (return val=$retVal) but continue" fi fi done ) echo "clean tst_2+2 + testreport output" ( cd $gcmDIR/verification ; ../tools/do_tst_2+2 -clean ) ( cd $gcmDIR/verification ; ./testreport $typ -clean ) if test "x$addExp" != x ; then ( cd $gcmDIR/verification listD=`ls -o | grep '^l' | awk '{print $8}' 2> /dev/null` echo " + remove local links: $listD" /bin/rm -f $listD ) fi echo "Update $git_code code in dir $gcmDIR @ "`date +"%H:%M:%S"`" :" ( cd $gcmDIR ; git pull ) 2>&1 retVal=$? if test "x$retVal" != x0 ; then echo "git pull on '"`hostname`"' fail (return val=$retVal) => exit" exit fi echo " and checkout master @ "`date +"%H:%M:%S"`" :" ( cd $gcmDIR ; git checkout master -- . ) 2>&1 if test "x$addExp" != x ; then echo "Update $git_other code in dir $gcmDIR/$git_other @ "`date +"%H:%M:%S"`" :" ( cd $gcmDIR/$git_other ; git pull ) 2>&1 retVal=$? if test "x$retVal" != x0 ; then echo "git pull on '"`hostname`"' fail (return val=$retVal) => exit" exit fi echo " and checkout master @ "`date +"%H:%M:%S"`" :" ( cd $gcmDIR/$git_other ; git checkout master -- . ) 2>&1 fi fi if [ $checkOut -eq 2 ] ; then if test -e $gcmDIR ; then echo -n "Removing working copy: $gcmDIR ..." rm -rf $gcmDIR echo " done" fi echo "Make a clone of $git_code from repo: $git_repo ..." git clone $git_repo/${git_code}.git $gcmDIR 2> $tmpFil retVal=$? if test $retVal = 0 ; then echo ' done' ; rm -f $tmpFil else echo " Error: 'git clone' returned: $retVal" cat $tmpFil ; rm -f $tmpFil exit fi if test "x$addExp" != x ; then echo "Make a clone of $git_other from repo: $git_repo ..." ( cd $gcmDIR ; git clone $git_repo/${git_other}.git 2> $tmpFil ) retVal=$? if test $retVal = 0 ; then echo ' done' ; rm -f $tmpFil else echo " Error: 'git clone' returned: $retVal" cat $tmpFil ; rm -f $tmpFil exit fi fi if test -d $gcmDIR/verification ; then /usr/bin/find $gcmDIR -type d | xargs chmod g+rxs /usr/bin/find $gcmDIR -type f | xargs chmod g+r fi fi #- change dir to $gcmDIR/verification + add link for additional experiments: if test -e $gcmDIR/verification ; then if [ $checkOut -lt 2 ] ; then echo " dir $gcmDIR/verification exist" ; fi cd $gcmDIR/verification for exp2add in $addExp ; do test -r $exp2add && /bin/rm -rf $exp2add if test -d ../$git_other/$exp2add ; then echo " add $exp2add link from $git_other" ln -s ../$git_other/$exp2add . else echo " missing dir: $git_other/$exp2add" continue fi done else echo "no dir: $gcmDIR/verification => exit" exit fi if [ $dblTr -eq 1 ] ; then echo '' #- 1) just compile ("-nr"), using "-j 2" to speed up echo ./testreport $options -of $OPTFILE -skd \'$skipExp\' \ -j 2 -nr -odir ${dNam}-$sfx ./testreport $options -of $OPTFILE -skd "$skipExp" \ -j 2 -nr -odir ${dNam}-$sfx options="$options -q" fi if [ $dblTr -ge 0 ] ; then echo '' #- 2) run and report results ; also finish to compile those who failed with "-j" echo ./testreport $options -of $OPTFILE -skd \'$skipExp\' \ -odir ${dNam}-$sfx -send \'$SEND\' -sd $SavD -a jm_c@mitgcm.org ./testreport $options -of $OPTFILE -skd "$skipExp" \ -odir ${dNam}-$sfx -send "$SEND" -sd $SavD -a jm_c@mitgcm.org retVal=$? else retVal=0 ; fi if test "x$retVal" != x0 ; then echo "<== testreport returned retVal=${retVal}, expecting 0" echo " -> skip restart test 'do_tst_2+2'" else echo '' #- 3) test restart and report results echo ../tools/do_tst_2+2 -mpi \ -o ${dNam}-$sfx -send \'$SEND\' -sd $SavD -a jm_c@mitgcm.org ../tools/do_tst_2+2 -mpi \ -o ${dNam}-$sfx -send "$SEND" -sd $SavD -a jm_c@mitgcm.org fi # exit 0 # echo -n "-- SLURM_TASKS_PER_NODE= $SLURM_TASKS_PER_NODE ; " >> ${OUTP}/tracer_adj.log # date >> ${OUTP}/tracer_adj.log # grep 'My Processor Name' tutorial_tracer_adjsens/run/STDOUT.000? \ # | sed 's/tutorial_tracer_adjsens\// /' >> ${OUTP}/tracer_adj.log # grep 'My Processor Name' tutorial_tracer_adjsens/tr_run.som81/STDOUT.000? \ # | sed 's/tutorial_tracer_adjsens\// /' >> ${OUTP}/tracer_adj.log