| 1 | 
#!/bin/bash -e | 
| 2 | 
# new script for running testreport on ollie.awi.de | 
| 3 | 
# - compile and run on compute nodes | 
| 4 | 
# - use ssh to call mpack command from the head node ollie0 | 
| 5 | 
#$Header: /u/gcmpack/MITgcm_contrib/test_scripts/ollie/mitgcmtestreport_cray,v 1.10 2020/06/10 13:58:05 mlosch Exp $ | 
| 6 | 
#$Name:  $ | 
| 7 | 
 | 
| 8 | 
# needed for cron-job | 
| 9 | 
#source /usr/Modules/current/init/bash | 
| 10 | 
source /etc/profile.d/cray_pe.sh | 
| 11 | 
# this seems to be enough to make the module cmd work | 
| 12 | 
source /etc/profile.d/modules.sh | 
| 13 | 
# | 
| 14 | 
module purge | 
| 15 | 
module load PrgEnv-cray | 
| 16 | 
module load netcdf | 
| 17 | 
# set the netcdf root directory here, because the definitions always | 
| 18 | 
# change with different "default" netcdf modules | 
| 19 | 
export NETCDF_ROOT=`nc-config --prefix` | 
| 20 | 
# not sure why I have to set these paths here again | 
| 21 | 
export MPI_ROOT=$(dirname $(dirname `which mpicc`)) | 
| 22 | 
# $(dirname `echo $LD_LIBRARY_PATH  | awk -F: '{print $1}'`) | 
| 23 | 
export MPI_INC_DIR=${MPI_ROOT}/include | 
| 24 | 
 | 
| 25 | 
# there is no slurm module anymore and this is the current recommendation to | 
| 26 | 
# have sbatch in your path (rather than running /etc/profile.d./slurm.sh) | 
| 27 | 
export PATH=${PATH}:/global/opt/slurm/default/bin | 
| 28 | 
 | 
| 29 | 
dNam='ollie' | 
| 30 | 
TST_DIR="/work/ollie/mlosch/test_$dNam" | 
| 31 | 
echo "start from TST_DIR='$TST_DIR' at: "`date` | 
| 32 | 
 | 
| 33 | 
umask 0022 | 
| 34 | 
 | 
| 35 | 
sfx='cray' | 
| 36 | 
RUNIT="runit_"$sfx | 
| 37 | 
 | 
| 38 | 
OPTFILE=../tools/build_options/linux_ia64_${sfx}_ollie | 
| 39 | 
options="-MPI 6" | 
| 40 | 
options="$options -odir ${dNam}-c" | 
| 41 | 
options="$options -j 6" | 
| 42 | 
#options="$options -t global_ocean.cs32x15" | 
| 43 | 
 | 
| 44 | 
#EXE='srun --mpi=pmi2 -n TR_NPROC ./mitgcmuv' | 
| 45 | 
#EXE='srun -n TR_NPROC ./mitgcmuv' | 
| 46 | 
EXE='srun -n TR_NPROC --cpu_bind=cores ./mitgcmuv' | 
| 47 | 
 | 
| 48 | 
if [ -e $TST_DIR ]; then | 
| 49 | 
    echo "$TST_DIR exists" | 
| 50 | 
else | 
| 51 | 
    mkdir $TST_DIR | 
| 52 | 
fi | 
| 53 | 
cd $TST_DIR | 
| 54 | 
HERE=$TST_DIR/output | 
| 55 | 
if [ -e $HERE ]; then | 
| 56 | 
    echo "$HERE" | 
| 57 | 
else | 
| 58 | 
    mkdir $HERE | 
| 59 | 
fi | 
| 60 | 
OUTFILE=$HERE/slurm_${sfx}.out | 
| 61 | 
MYOUTPUT=$HERE/out_$sfx | 
| 62 | 
if [ -e $MYOUTPUT ]; then | 
| 63 | 
  rm -rf $MYOUTPUT | 
| 64 | 
fi | 
| 65 | 
if [ -e $OUTFILE ]; then | 
| 66 | 
  rm -r $OUTFILE | 
| 67 | 
fi | 
| 68 | 
gcmDIR="MITgcm_${sfx}" | 
| 69 | 
git_repo='MITgcm' | 
| 70 | 
git_code='MITgcm' | 
| 71 | 
 | 
| 72 | 
# checkOut determines how much checking out is being done | 
| 73 | 
# checkOut = 3: new clone from GitHub and make a new copy | 
| 74 | 
# checkOut = 2: update (git pull) existing repo and make a new copy | 
| 75 | 
# checkOut = 1: skip update | 
| 76 | 
# checkOut = 0: use existing test code (if available otherwise switch to 1) | 
| 77 | 
 | 
| 78 | 
checkOut=2 | 
| 79 | 
tdir=${TST_DIR} | 
| 80 | 
today=`date +%Y%m%d` | 
| 81 | 
TODAY=`date +%d` | 
| 82 | 
#tmpFil="/tmp/"`basename $0`".$$" | 
| 83 | 
tmpFil=$TST_DIR/error.out | 
| 84 | 
 | 
| 85 | 
if [ $checkOut -le 1 ] ; then | 
| 86 | 
  if test -e $TST_DIR/${gcmDIR}/doc ; then | 
| 87 | 
    echo $TST_DIR/${gcmDIR}/doc 'exist' | 
| 88 | 
  else | 
| 89 | 
    echo -n "$TST_DIR/${gcmDIR} missing ; " | 
| 90 | 
    checkOut=2 | 
| 91 | 
    echo "will make a new copy ( checkOut=$checkOut )" | 
| 92 | 
  fi | 
| 93 | 
fi | 
| 94 | 
 | 
| 95 | 
if [ $checkOut -ge 2 ] ; then | 
| 96 | 
  #---- cleaning: | 
| 97 | 
  cd $TST_DIR | 
| 98 | 
 | 
| 99 | 
  #---- Make a new clone or update existing one: | 
| 100 | 
  if test -e ${gcmDIR}/.git/config ; then | 
| 101 | 
    echo "${gcmDIR}/.git/config exist" | 
| 102 | 
  else | 
| 103 | 
    echo -n "${gcmDIR}/.git/config missing ; " | 
| 104 | 
    checkOut=3 | 
| 105 | 
    echo "will get new clone ( checkOut=$checkOut )" | 
| 106 | 
  fi | 
| 107 | 
  if [ $checkOut -eq 3 ] ; then | 
| 108 | 
    echo -n "Removing old clone: $TST_DIR/${gcmDIR} ..." | 
| 109 | 
    test -e $TST_DIR/${gcmDIR}  &&  rm -rf $TST_DIR/${gcmDIR} | 
| 110 | 
    echo "  done" | 
| 111 | 
    echo -n "Make a new clone of $git_code from repo: $git_repo ..." | 
| 112 | 
    git clone https://github.com/$git_repo/${git_code}.git ${gcmDIR} 2> $tmpFil | 
| 113 | 
    retVal=$? | 
| 114 | 
    if test $retVal = 0 ; then | 
| 115 | 
       echo ' --> done!' | 
| 116 | 
       rm -f $tmpFil | 
| 117 | 
    else | 
| 118 | 
       echo " Error: 'git clone' returned: $retVal" | 
| 119 | 
       cat $tmpFil | 
| 120 | 
       rm -f $tmpFil | 
| 121 | 
       exit 2 | 
| 122 | 
    fi | 
| 123 | 
  else | 
| 124 | 
#    echo "clean tst_2+2 + testreport output" | 
| 125 | 
    ( cd $gcmDIR/verification ; ../tools/do_tst_2+2 -clean ) | 
| 126 | 
    ( cd $gcmDIR/verification ; ./testreport -clean ) | 
| 127 | 
    echo "Updating current clone ( $git_code ) ..." | 
| 128 | 
    ( cd ${gcmDIR}; git checkout master ; git pull ; git ls-files -d | xargs git checkout -- ) | 
| 129 | 
    echo ' --> done!' | 
| 130 | 
  fi | 
| 131 | 
else | 
| 132 | 
  cd $TST_DIR | 
| 133 | 
fi | 
| 134 | 
 | 
| 135 | 
cd ${TST_DIR}/${gcmDIR}/verification | 
| 136 | 
 | 
| 137 | 
cwd=\`pwd\` | 
| 138 | 
SENDCMD="ssh ollie0 ${TST_DIR}/${gcmDIR}/tools/mpack-1.6/mpack" | 
| 139 | 
runtestreport="./testreport $options -of $OPTFILE -command \"${EXE}\" -send \"${SENDCMD}\" -sd ${cwd}" | 
| 140 | 
emailaddress="-a jm_c@mitgcm.org" | 
| 141 | 
testrestart="../tools/do_tst_2+2 -mpi -exe \"${EXE}\" -o ${dNam}-c -send \"${SENDCMD}\" -sd ${cwd}" | 
| 142 | 
 | 
| 143 | 
if [ ! -e $MYOUTPUT ] | 
| 144 | 
    then | 
| 145 | 
    touch $MYOUTPUT | 
| 146 | 
fi | 
| 147 | 
 | 
| 148 | 
# echo "running testreport like this:" | 
| 149 | 
# echo ${runtestreport} -norun | 
| 150 | 
# eval "${runtestreport} -norun >> $MYOUTPUT 2>&1" | 
| 151 | 
 | 
| 152 | 
# create batch script | 
| 153 | 
# | 
| 154 | 
JOBNAME=tst$sfx | 
| 155 | 
echo "creating batch script $HERE/$RUNIT" | 
| 156 | 
cat << EOF >| $HERE/$RUNIT | 
| 157 | 
#!/bin/bash | 
| 158 | 
#SBATCH --job-name=${JOBNAME} | 
| 159 | 
#SBATCH  -o ${OUTFILE} | 
| 160 | 
#SBATCH --time=12:00:00 | 
| 161 | 
#SBATCH --ntasks=6 | 
| 162 | 
 | 
| 163 | 
# still need this to be able to run a few experiments: | 
| 164 | 
# cfc_example, fizhi, tutorial_deep_convection | 
| 165 | 
ulimit -s unlimited | 
| 166 | 
 | 
| 167 | 
# binds OpenMP task to given cores | 
| 168 | 
export OMP_PROC_BIND=TRUE | 
| 169 | 
 | 
| 170 | 
# for debugging | 
| 171 | 
# export FLEXLM_DIAGNOSTICS=2 | 
| 172 | 
# export FNP_IP_ENV=1 | 
| 173 | 
# export LM_A_CONN_TIMEOUT=99 | 
| 174 | 
 | 
| 175 | 
cd \${SLURM_SUBMIT_DIR} | 
| 176 | 
 | 
| 177 | 
cwd=`pwd` | 
| 178 | 
echo "running testreport like this in \${cwd}:" | 
| 179 | 
echo "${runtestreport} -devel -match 10" | 
| 180 | 
${runtestreport} -devel -match 10 ${emailaddress} >> $MYOUTPUT 2>&1 | 
| 181 | 
 | 
| 182 | 
echo "running restart test like this in \${cwd}:" | 
| 183 | 
echo "${testrestart}" | 
| 184 | 
${testrestart} ${emailaddress} >> $MYOUTPUT 2>&1 | 
| 185 | 
 | 
| 186 | 
../tools/do_tst_2+2 -clean | 
| 187 | 
 | 
| 188 | 
echo "running testreport like this in \${cwd}:" | 
| 189 | 
echo "./testreport -clean" | 
| 190 | 
./testreport -clean | 
| 191 | 
 | 
| 192 | 
# Hack,hack,hack to avoid running dome: | 
| 193 | 
echo "running testreport like this in \${cwd}:" | 
| 194 | 
echo "${runtestreport} -skipdir dome -fast -match 10" | 
| 195 | 
${runtestreport} -skipdir dome -fast -match 10 ${emailaddress} >> $MYOUTPUT 2>&1 | 
| 196 | 
 | 
| 197 | 
echo "running restart test like this in \${cwd}:" | 
| 198 | 
echo "${testrestart}" | 
| 199 | 
${testrestart} ${emailaddress} >> $MYOUTPUT 2>&1 | 
| 200 | 
 | 
| 201 | 
EOF | 
| 202 | 
 | 
| 203 | 
chmod a+x $HERE/$RUNIT | 
| 204 | 
 | 
| 205 | 
echo " " >> $MYOUTPUT | 
| 206 | 
echo "***********************************************************" >> $MYOUTPUT | 
| 207 | 
echo "Submitting this job script:" >> $MYOUTPUT | 
| 208 | 
echo "***********************************************************" >> $MYOUTPUT | 
| 209 | 
cat $HERE/$RUNIT >> $MYOUTPUT | 
| 210 | 
echo "***********************************************************" >> $MYOUTPUT | 
| 211 | 
echo "end of job script" >> $MYOUTPUT | 
| 212 | 
echo "***********************************************************" >> $MYOUTPUT | 
| 213 | 
echo " " >> $MYOUTPUT | 
| 214 | 
 | 
| 215 | 
echo "sbatch $HERE/$RUNIT" | 
| 216 | 
sbatch $HERE/$RUNIT | 
| 217 | 
 | 
| 218 | 
# # keep looking for the job in the job queues and wait until it has disappeared | 
| 219 | 
# jobruns=somedummy | 
| 220 | 
# while [ "${jobruns}"x != x ] | 
| 221 | 
# do | 
| 222 | 
#   sleep 200 | 
| 223 | 
#   jobruns=`squeue --noheader -u mlosch | grep "$JOBNAME" | awk '{print $1}'` | 
| 224 | 
#   echo "waiting for job ${jobruns%% *} ($JOBNAME) to complete" | 
| 225 | 
#   currentexp=`grep Experiment $MYOUTPUT | tail -1` | 
| 226 | 
#   echo "currently running $currentexp" | 
| 227 | 
# done | 
| 228 | 
 | 
| 229 | 
# # workaround for mailing the stuff | 
| 230 | 
# echo "mail the stuff" | 
| 231 | 
 | 
| 232 | 
# MPACKCMD=../tools/mpack-1.6/mpack | 
| 233 | 
# fn=`ls -dtr tr_$dNam* | grep -v tar.gz | tail -1` | 
| 234 | 
# echo "fname ${fn}" | 
| 235 | 
# tar cf - $fn | gzip > "${fn}.tar.gz" | 
| 236 | 
# $MPACKCMD -s MITgcm-test -m 3555000 ${fn}.tar.gz jm_c@mitgcm.org | 
| 237 | 
# sleep 2 | 
| 238 | 
# rm -rf "${fn}.tar.gz" | 
| 239 | 
 | 
| 240 | 
echo "end of mitgcmtestreport" |