#!/bin/bash -e # new script for running testreport on ollie.awi.de # - compile and run on compute nodes # - use ssh to call mpack command from the head node ollie0 #$Header: /home/ubuntu/mnt/e9_copy/MITgcm_contrib/test_scripts/ollie/mitgcmtestreport_cray,v 1.11 2020/09/22 15:50:25 mlosch Exp $ #$Name: $ # needed for cron-job #source /usr/Modules/current/init/bash source /etc/profile.d/cray_pe.sh # this seems to be enough to make the module cmd work source /etc/profile.d/modules.sh # module purge module load PrgEnv-cray module load netcdf # set the netcdf root directory here, because the definitions always # change with different "default" netcdf modules export NETCDF_ROOT=`nc-config --prefix` # not sure why I have to set these paths here again export MPI_ROOT=$(dirname $(dirname `which mpicc`)) # $(dirname `echo $LD_LIBRARY_PATH | awk -F: '{print $1}'`) export MPI_INC_DIR=${MPI_ROOT}/include # there is no slurm module anymore and this is the current recommendation to # have sbatch in your path (rather than running /etc/profile.d./slurm.sh) export PATH=${PATH}:/global/opt/slurm/default/bin dNam='ollie' TST_DIR="/work/ollie/mlosch/test_$dNam" echo "start from TST_DIR='$TST_DIR' at: "`date` umask 0022 sfx='cray' RUNIT="runit_"$sfx OPTFILE=../tools/build_options/linux_ia64_${sfx}_ollie options="-MPI 6" options="$options -odir ${dNam}-c" options="$options -j 6" #options="$options -t global_ocean.cs32x15" #EXE='srun --mpi=pmi2 -n TR_NPROC ./mitgcmuv' #EXE='srun -n TR_NPROC ./mitgcmuv' EXE='srun -n TR_NPROC --cpu_bind=cores ./mitgcmuv' if [ -e $TST_DIR ]; then echo "$TST_DIR exists" else mkdir $TST_DIR fi cd $TST_DIR HERE=$TST_DIR/output if [ -e $HERE ]; then echo "$HERE" else mkdir $HERE fi OUTFILE=$HERE/slurm_${sfx}.out MYOUTPUT=$HERE/out_$sfx if [ -e $MYOUTPUT ]; then rm -rf $MYOUTPUT fi if [ -e $OUTFILE ]; then rm -r $OUTFILE fi gcmDIR="MITgcm_${sfx}" git_repo='MITgcm' git_code='MITgcm' # checkOut determines how much checking out is being done # checkOut = 3: new clone from GitHub and make a new copy # checkOut = 2: update (git pull) existing repo and make a new copy # checkOut = 1: skip update # checkOut = 0: use existing test code (if available otherwise switch to 1) checkOut=2 tdir=${TST_DIR} today=`date +%Y%m%d` TODAY=`date +%d` #tmpFil="/tmp/"`basename $0`".$$" tmpFil=$TST_DIR/error.out if [ $checkOut -le 1 ] ; then if test -e $TST_DIR/${gcmDIR}/doc ; then echo $TST_DIR/${gcmDIR}/doc 'exist' else echo -n "$TST_DIR/${gcmDIR} missing ; " checkOut=2 echo "will make a new copy ( checkOut=$checkOut )" fi fi if [ $checkOut -ge 2 ] ; then #---- cleaning: cd $TST_DIR #---- Make a new clone or update existing one: if test -e ${gcmDIR}/.git/config ; then echo "${gcmDIR}/.git/config exist" else echo -n "${gcmDIR}/.git/config missing ; " checkOut=3 echo "will get new clone ( checkOut=$checkOut )" fi if [ $checkOut -eq 3 ] ; then echo -n "Removing old clone: $TST_DIR/${gcmDIR} ..." test -e $TST_DIR/${gcmDIR} && rm -rf $TST_DIR/${gcmDIR} echo " done" echo -n "Make a new clone of $git_code from repo: $git_repo ..." git clone https://github.com/$git_repo/${git_code}.git ${gcmDIR} 2> $tmpFil retVal=$? if test $retVal = 0 ; then echo ' --> done!' rm -f $tmpFil else echo " Error: 'git clone' returned: $retVal" cat $tmpFil rm -f $tmpFil exit 2 fi else # echo "clean tst_2+2 + testreport output" ( cd $gcmDIR/verification ; ../tools/do_tst_2+2 -clean ) ( cd $gcmDIR/verification ; ./testreport -clean ) echo "Updating current clone ( $git_code ) ..." ( cd ${gcmDIR}; git checkout master ; git pull ; git ls-files -d | xargs git checkout -- ) echo ' --> done!' fi else cd $TST_DIR fi cd ${TST_DIR}/${gcmDIR}/verification cwd=\`pwd\` SENDCMD="ssh ollie0 ${TST_DIR}/${gcmDIR}/tools/mpack-1.6/mpack" runtestreport="./testreport $options -of $OPTFILE -command \"${EXE}\" -send \"${SENDCMD}\" -sd ${cwd}" emailaddress="-a jm_c@mitgcm.org" testrestart="../tools/do_tst_2+2 -mpi -exe \"${EXE}\" -o ${dNam}-c -send \"${SENDCMD}\" -sd ${cwd}" if [ ! -e $MYOUTPUT ] then touch $MYOUTPUT fi # echo "running testreport like this:" # echo ${runtestreport} -norun # eval "${runtestreport} -norun >> $MYOUTPUT 2>&1" # create batch script # JOBNAME=tst$sfx echo "creating batch script $HERE/$RUNIT" cat << EOF >| $HERE/$RUNIT #!/bin/bash #SBATCH --job-name=${JOBNAME} #SBATCH -o ${OUTFILE} #SBATCH --time=12:00:00 #SBATCH --ntasks=6 # still need this to be able to run a few experiments: # cfc_example, fizhi, tutorial_deep_convection ulimit -s unlimited # binds OpenMP task to given cores export OMP_PROC_BIND=TRUE # for debugging # export FLEXLM_DIAGNOSTICS=2 # export FNP_IP_ENV=1 # export LM_A_CONN_TIMEOUT=99 cd \${SLURM_SUBMIT_DIR} cwd=`pwd` echo "running testreport like this in \${cwd}:" echo "${runtestreport} -devel -match 10" ${runtestreport} -devel -match 10 ${emailaddress} >> $MYOUTPUT 2>&1 echo "running restart test like this in \${cwd}:" echo "${testrestart}" ${testrestart} ${emailaddress} >> $MYOUTPUT 2>&1 ../tools/do_tst_2+2 -clean echo "running testreport like this in \${cwd}:" echo "./testreport -clean" ./testreport -clean # Hack,hack,hack to avoid running dome: echo "running testreport like this in \${cwd}:" echo "${runtestreport} -skipdir dome -fast -match 10" ${runtestreport} -skipdir dome -fast -match 10 ${emailaddress} >> $MYOUTPUT 2>&1 echo "running restart test like this in \${cwd}:" echo "${testrestart}" ${testrestart} ${emailaddress} >> $MYOUTPUT 2>&1 EOF chmod a+x $HERE/$RUNIT echo " " >> $MYOUTPUT echo "***********************************************************" >> $MYOUTPUT echo "Submitting this job script:" >> $MYOUTPUT echo "***********************************************************" >> $MYOUTPUT cat $HERE/$RUNIT >> $MYOUTPUT echo "***********************************************************" >> $MYOUTPUT echo "end of job script" >> $MYOUTPUT echo "***********************************************************" >> $MYOUTPUT echo " " >> $MYOUTPUT echo "sbatch $HERE/$RUNIT" sbatch $HERE/$RUNIT # # keep looking for the job in the job queues and wait until it has disappeared # jobruns=somedummy # while [ "${jobruns}"x != x ] # do # sleep 200 # jobruns=`squeue --noheader -u mlosch | grep "$JOBNAME" | awk '{print $1}'` # echo "waiting for job ${jobruns%% *} ($JOBNAME) to complete" # currentexp=`grep Experiment $MYOUTPUT | tail -1` # echo "currently running $currentexp" # done # # workaround for mailing the stuff # echo "mail the stuff" # MPACKCMD=../tools/mpack-1.6/mpack # fn=`ls -dtr tr_$dNam* | grep -v tar.gz | tail -1` # echo "fname ${fn}" # tar cf - $fn | gzip > "${fn}.tar.gz" # $MPACKCMD -s MITgcm-test -m 3555000 ${fn}.tar.gz jm_c@mitgcm.org # sleep 2 # rm -rf "${fn}.tar.gz" echo "end of mitgcmtestreport"