| 1 | jmc | 1.1 | #! /usr/bin/env bash | 
| 2 |  |  |  | 
| 3 | jmc | 1.23 | # $Header: /u/gcmpack/MITgcm_contrib/test_scripts/svante/test_submit_svante,v 1.22 2018/11/07 23:10:03 jmc Exp $ | 
| 4 | jmc | 1.1 | # $Name:  $ | 
| 5 |  |  |  | 
| 6 |  |  | today=`date +%Y%m%d` | 
| 7 |  |  | dInWeek=`date +%a` | 
| 8 |  |  |  | 
| 9 |  |  | if test $# = 0 | 
| 10 |  |  | then | 
| 11 | jmc | 1.13 | TEST_LIST='ifcMPI pgiMPI pgiAdm pgiMth' | 
| 12 | jmc | 1.1 | else | 
| 13 |  |  | TEST_LIST=$* | 
| 14 |  |  | fi | 
| 15 |  |  |  | 
| 16 | jmc | 1.7 | headNode=`hostname -s` | 
| 17 | jmc | 1.5 | #QSUB="qsub" | 
| 18 | jmc | 1.8 | #QSTAT="qstat -u $USER" | 
| 19 | jmc | 1.7 | #dNam=$headNode | 
| 20 | jmc | 1.5 | QSUB="/usr/bin/sbatch" | 
| 21 | jmc | 1.10 | QLIST="/usr/bin/squeue -u $USER" | 
| 22 | jmc | 1.5 | dNam='svante' | 
| 23 | jmc | 1.2 | HERE="$HOME/test_${dNam}" | 
| 24 | jmc | 1.15 | TST_DISK="/net/fs09/d1/jm_c" | 
| 25 | jmc | 1.6 | TST_DIR="$TST_DISK/test_${dNam}" | 
| 26 | jmc | 1.18 | tmpFil="/tmp/"`basename $0`".$$" | 
| 27 | jmc | 1.16 | #- where local copy of code is (need to be consistent with run-job scripts): | 
| 28 | jmc | 1.17 | #srcDIR=$TST_DIR | 
| 29 |  |  | srcDIR=$HERE | 
| 30 | jmc | 1.16 | srcCode="MITgcm_today" | 
| 31 |  |  |  | 
| 32 | jmc | 1.1 | logPfix='test_submit' | 
| 33 |  |  | SUB_DIR="$HERE/$dNam" | 
| 34 |  |  | OUT_DIR="$HERE/output" | 
| 35 |  |  | LOG_FIL="$OUT_DIR/$logPfix."`date +%m%d`".log" | 
| 36 |  |  | #SUB_DIR="$HERE/temp" | 
| 37 |  |  |  | 
| 38 |  |  | #-- clean up old log files and start a new one: | 
| 39 |  |  | cd $OUT_DIR | 
| 40 |  |  |  | 
| 41 |  |  | rm -f $logPfix.*.log_bak | 
| 42 |  |  | if test -f $LOG_FIL ; then mv -f $LOG_FIL ${LOG_FIL}_bak ; fi | 
| 43 |  |  | echo -n '-- Starting: '					| tee -a $LOG_FIL | 
| 44 |  |  | date							| tee -a $LOG_FIL | 
| 45 |  |  |  | 
| 46 |  |  | n=$(( `ls $logPfix.*.log | wc -l` - 10 )) | 
| 47 |  |  | if test $n -gt 0 ; then | 
| 48 |  |  | echo ' remove old log files:'				| tee -a $LOG_FIL | 
| 49 |  |  | ls -lt $logPfix.*.log | tail -"$n"			| tee -a $LOG_FIL | 
| 50 |  |  | ls -t  $logPfix.*.log | tail -"$n" | xargs rm -f | 
| 51 |  |  | fi | 
| 52 |  |  |  | 
| 53 |  |  | #------------------------------------------------------------- | 
| 54 |  |  | # defaults | 
| 55 |  |  | #export PATH="$PATH:/usr/local/bin" | 
| 56 |  |  | if [ -d ~/bin ]; then export PATH=$PATH:~/bin ; fi | 
| 57 |  |  | #- to get case insensitive "ls" (and order of tested experiments) | 
| 58 |  |  | export LC_ALL="en_US.UTF-8" | 
| 59 |  |  | #  Turn off stack limit for FIZHI & AD-tests | 
| 60 |  |  | ulimit -s unlimited | 
| 61 |  |  |  | 
| 62 | jmc | 1.5 | if test -f /etc/profile.d/modules.sh    ; then . /etc/profile.d/modules.sh    ; fi | 
| 63 |  |  | if test -f /etc/profile.d/zz_modules.sh ; then . /etc/profile.d/zz_modules.sh ; fi | 
| 64 | jmc | 1.1 | #- load standard modules: | 
| 65 | jmc | 1.5 | #module add fedora slurm maui svante | 
| 66 |  |  | module add slurm | 
| 67 | jmc | 1.1 | module list 						>> $LOG_FIL 2>&1 | 
| 68 |  |  |  | 
| 69 | jmc | 1.21 | #- method to access CVS: | 
| 70 | jmc | 1.18 | cmdCVS='cvs -d :pserver:cvsanon@mitgcm.org:/u/gcmpack -q' | 
| 71 | jmc | 1.21 | #- which GitHub repository to use and how to access it: | 
| 72 | jmc | 1.23 | git_repo='MITgcm';  git_code='MITgcm' ; git_other='verification_other' | 
| 73 | jmc | 1.18 | #git_repo='altMITgcm'; #git_code='MITgcm66h' | 
| 74 | jmc | 1.21 | #-- | 
| 75 |  |  | git_repo="https://github.com/$git_repo" | 
| 76 |  |  | #git_repo="git://github.com/$git_repo" | 
| 77 |  |  | #git_repo="git@github.com:$git_repo" | 
| 78 | jmc | 1.18 |  | 
| 79 |  |  | checkOut=1 | 
| 80 |  |  | addExp='' | 
| 81 |  |  | updFile='updated_code' | 
| 82 | jmc | 1.1 |  | 
| 83 |  |  | #---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----| | 
| 84 | jmc | 1.20 | if [ $checkOut -ge 1 ] ; then | 
| 85 | jmc | 1.1 | #-- Download/Update reference version of MITgcm code: | 
| 86 |  |  |  | 
| 87 | jmc | 1.18 | if test $srcDIR = $TST_DIR ; then | 
| 88 |  |  | echo "cd $TST_DISK ; pwd (x2)" | tee -a $LOG_FIL | 
| 89 |  |  | cd $TST_DISK	| tee -a $LOG_FIL 2>&1 | 
| 90 |  |  | pwd			| tee -a $LOG_FIL | 
| 91 |  |  | fi | 
| 92 |  |  | if test ! -d $srcDIR ; then | 
| 93 |  |  | echo -n "Creating a working dir: $srcDIR ..."	| tee -a $LOG_FIL | 
| 94 |  |  | #/bin/rm -rf $srcDIR | 
| 95 |  |  | mkdir $srcDIR | 
| 96 |  |  | retVal=$? | 
| 97 |  |  | if test "x$retVal" != x0 ; then | 
| 98 |  |  | echo "Error: unable to make dir: $srcDIR (err=$retVal ) --> Exit" | tee -a $LOG_FIL | 
| 99 |  |  | exit 1 | 
| 100 |  |  | fi | 
| 101 |  |  | fi | 
| 102 |  |  | cd $srcDIR | 
| 103 | jmc | 1.16 | pwd		| tee -a $LOG_FIL | 
| 104 | jmc | 1.1 |  | 
| 105 | jmc | 1.18 | #- remove date/lock-file and old copy: | 
| 106 |  |  | if test -f $updFile ; then rm -f $updFile ; sleep 2 ; fi | 
| 107 |  |  | test -e $srcCode && rm -rf $srcCode | 
| 108 |  |  |  | 
| 109 |  |  | if [ $checkOut -eq 1 ] ; then | 
| 110 | jmc | 1.23 | if test ! -e $git_code/.git/config ; then | 
| 111 |  |  | echo "no file: $git_code/.git/config => try a new clone"	| tee -a $LOG_FIL | 
| 112 |  |  | checkOut=2 | 
| 113 |  |  | fi | 
| 114 |  |  | if test ! -e $git_other/.git/config ; then | 
| 115 |  |  | echo "no file: $git_other/.git/config => try a new clone"	| tee -a $LOG_FIL | 
| 116 | jmc | 1.18 | checkOut=2 | 
| 117 | jmc | 1.1 | fi | 
| 118 |  |  | fi | 
| 119 | jmc | 1.18 | if [ $checkOut -eq 2 ] ; then | 
| 120 | jmc | 1.20 | if test -e $git_code ; then | 
| 121 | jmc | 1.23 | echo -n " removing dir: $git_code ..."		| tee -a $LOG_FIL | 
| 122 | jmc | 1.20 | rm -rf $git_code | 
| 123 | jmc | 1.23 | echo "  done"					| tee -a $LOG_FIL | 
| 124 | jmc | 1.20 | fi | 
| 125 | jmc | 1.18 | echo -n "Make a clone of $git_code from repo: $git_repo ..."	| tee -a $LOG_FIL | 
| 126 | jmc | 1.21 | git clone $git_repo/${git_code}.git 2> $tmpFil | 
| 127 | jmc | 1.18 | retVal=$? | 
| 128 |  |  | if test $retVal = 0 ; then | 
| 129 |  |  | echo ' --> done!'				| tee -a $LOG_FIL | 
| 130 |  |  | rm -f $tmpFil | 
| 131 |  |  | else echo ''					| tee -a $LOG_FIL | 
| 132 |  |  | echo " Error: 'git clone' returned: $retVal"	| tee -a $LOG_FIL | 
| 133 |  |  | cat $tmpFil ; rm -f $tmpFil | 
| 134 |  |  | exit 2 | 
| 135 | jmc | 1.1 | fi | 
| 136 | jmc | 1.23 | #-- | 
| 137 |  |  | if test -e $git_other ; then | 
| 138 |  |  | echo -n " removing dir: $git_other ..."		| tee -a $LOG_FIL | 
| 139 |  |  | rm -rf $git_other | 
| 140 |  |  | echo "  done"					| tee -a $LOG_FIL | 
| 141 |  |  | fi | 
| 142 |  |  | echo -n "Make a clone of $git_other from repo: $git_repo ..."	| tee -a $LOG_FIL | 
| 143 |  |  | git clone $git_repo/${git_other}.git 2> $tmpFil | 
| 144 |  |  | retVal=$? | 
| 145 |  |  | if test $retVal = 0 ; then | 
| 146 |  |  | echo ' --> done!'				| tee -a $LOG_FIL | 
| 147 |  |  | rm -f $tmpFil | 
| 148 |  |  | else echo ''					| tee -a $LOG_FIL | 
| 149 |  |  | echo " Error: 'git clone' returned: $retVal"	| tee -a $LOG_FIL | 
| 150 |  |  | cat $tmpFil ; rm -f $tmpFil | 
| 151 |  |  | exit 2 | 
| 152 |  |  | fi | 
| 153 |  |  | fi | 
| 154 |  |  | #-- | 
| 155 |  |  | if [ $checkOut -eq 1 ] ; then | 
| 156 |  |  | echo -n "Updating current clone ( $git_code ) ..."	| tee -a $LOG_FIL | 
| 157 |  |  | echo '' >> $LOG_FIL | 
| 158 |  |  | ( cd $git_code ; git pull )				>> $LOG_FIL 2>&1 | 
| 159 |  |  | retVal=$? | 
| 160 |  |  | if test "x$retVal" != x0 ; then echo '' | 
| 161 |  |  | echo "'git pull' on '"`hostname`"' fail (return val=$retVal) => exit" | tee -a $LOG_FIL | 
| 162 |  |  | exit | 
| 163 |  |  | else echo "  done"					| tee -a $LOG_FIL | 
| 164 |  |  | fi | 
| 165 |  |  | echo "  and checkout master:"			| tee -a $LOG_FIL | 
| 166 |  |  | ( cd $git_code ; git checkout master -- . )		| tee -a $LOG_FIL | 
| 167 |  |  | #--- | 
| 168 |  |  | echo -n "Updating current clone ( $git_other ) ..."	| tee -a $LOG_FIL | 
| 169 |  |  | echo '' >> $LOG_FIL | 
| 170 |  |  | ( cd $git_other ; git pull )			>> $LOG_FIL 2>&1 | 
| 171 |  |  | retVal=$? | 
| 172 |  |  | if test "x$retVal" != x0 ; then echo '' | 
| 173 |  |  | echo "'git pull' on '"`hostname`"' fail (return val=$retVal) => exit" | tee -a $LOG_FIL | 
| 174 |  |  | exit | 
| 175 |  |  | else echo "  done"					| tee -a $LOG_FIL | 
| 176 |  |  | fi | 
| 177 |  |  | echo "  and checkout master:"			| tee -a $LOG_FIL | 
| 178 |  |  | ( cd $git_other ; git checkout master -- . )	| tee -a $LOG_FIL | 
| 179 | jmc | 1.18 | fi | 
| 180 |  |  | #---- making a new working copy: MITgcm_today | 
| 181 |  |  | rsync -a $git_code/ $srcCode --exclude '.git' | 
| 182 | jmc | 1.19 | ls -ld $srcCode					| tee -a $LOG_FIL | 
| 183 | jmc | 1.18 | /usr/bin/find $srcCode -type d | xargs chmod g+rxs | 
| 184 |  |  | /usr/bin/find $srcCode -type f | xargs chmod g+r | 
| 185 |  |  | #- update date/lock-file: | 
| 186 |  |  | if test -d $srcCode/verification ; then | 
| 187 |  |  | echo $today > $updFile ; sleep 2 | 
| 188 |  |  | ls -l $updFile					| tee -a $LOG_FIL | 
| 189 | jmc | 1.19 | echo ''						| tee -a $LOG_FIL | 
| 190 | jmc | 1.18 | fi | 
| 191 | jmc | 1.1 |  | 
| 192 | jmc | 1.18 | #-- Done with Download/Update of MITgcm code | 
| 193 | jmc | 1.1 | fi | 
| 194 | jmc | 1.20 | #---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----| | 
| 195 | jmc | 1.1 |  | 
| 196 | jmc | 1.18 | #-- leave srcDIR and go back to output dir | 
| 197 | jmc | 1.1 | cd $OUT_DIR | 
| 198 |  |  | #-- now really do something: | 
| 199 |  |  |  | 
| 200 |  |  | JOB_LIST=$TEST_LIST | 
| 201 |  |  | NB_SUB_JOBS=0 | 
| 202 |  |  | for i in $JOB_LIST | 
| 203 |  |  | do | 
| 204 |  |  | case $i in | 
| 205 |  |  | 'pgiAdm') sfx='pgi_adm' ;; | 
| 206 | jmc | 1.13 | 'pgiMth') sfx='pgi_mth' ;; | 
| 207 |  |  | *) sfx=`echo ${i} | sed 's/MPI$/_mpi/'`  ;; | 
| 208 | jmc | 1.1 | esac | 
| 209 | jmc | 1.14 | BATCH_SCRIPT="test_${dNam}_$sfx" | 
| 210 | jmc | 1.1 | if test -f $SUB_DIR/$BATCH_SCRIPT ; then | 
| 211 | jmc | 1.9 | #- job name ( $JOB ) & output-file name ( $JOB.std??? ) must match | 
| 212 |  |  | #  definition within $BATCH_SCRIPT slurm script | 
| 213 | jmc | 1.13 | JOB="${i}_tst" | 
| 214 | jmc | 1.9 | sJob=`printf "%8.8s" $JOB` #- squeue truncate name to only 1rst 8c | 
| 215 |  |  | #job_exist=`$QSTAT | grep $JOB | wc -l` | 
| 216 |  |  | job_exist=`$QLIST | grep $sJob | wc -l` | 
| 217 | jmc | 1.1 | if test "x_$job_exist" = x_0 ; then | 
| 218 |  |  | #-- move previous output file | 
| 219 |  |  | outList=`ls $JOB.std??? 2> /dev/null` | 
| 220 |  |  | if test "x$outList" != x ; then | 
| 221 |  |  | echo -n " moving job $JOB old output files:"	| tee -a $LOG_FIL | 
| 222 |  |  | if test -d $OUT_DIR/prev ; then | 
| 223 |  |  | for xx in $outList ; do | 
| 224 |  |  | pp=$OUT_DIR/prev/$xx ; echo -n " $xx"	| tee -a $LOG_FIL | 
| 225 |  |  | test -f $pp.sav && mv -f $pp.sav $pp.old | 
| 226 |  |  | test -f $pp     && mv -f $pp     $pp.sav | 
| 227 |  |  | chmod a+r $xx ; mv -f $xx $OUT_DIR/prev | 
| 228 |  |  | done | 
| 229 |  |  | echo " to dir ./prev"			| tee -a $LOG_FIL | 
| 230 |  |  | else | 
| 231 |  |  | echo " <-- missing dir $OUT_DIR/prev"	| tee -a $LOG_FIL | 
| 232 |  |  | fi | 
| 233 |  |  | else echo " no old output files from job '$JOB'" | tee -a $LOG_FIL | 
| 234 |  |  | fi | 
| 235 |  |  | #-- submit job | 
| 236 |  |  | echo -n "--> $JOB : "				| tee -a $LOG_FIL | 
| 237 |  |  | $QSUB $SUB_DIR/$BATCH_SCRIPT			| tee -a $LOG_FIL | 
| 238 |  |  | NB_SUB_JOBS=`expr $NB_SUB_JOBS + 1` | 
| 239 | jmc | 1.18 | sleep 1 | 
| 240 | jmc | 1.1 | else | 
| 241 |  |  | echo "--> $JOB :"				| tee -a $LOG_FIL | 
| 242 | jmc | 1.9 | #$QSTAT | grep $JOB				| tee -a $LOG_FIL | 
| 243 |  |  | $QLIST | grep $sJob				| tee -a $LOG_FIL | 
| 244 | jmc | 1.1 | echo ' job already exist => skip this test'	| tee -a $LOG_FIL | 
| 245 |  |  | fi | 
| 246 |  |  | else | 
| 247 |  |  | echo 'no file:' $BATCH_SCRIPT 'to submit'	| tee -a $LOG_FIL | 
| 248 |  |  | fi | 
| 249 |  |  | done | 
| 250 |  |  | echo "info-sub-list: NB_SUB_JOBS='$NB_SUB_JOBS'"	>> $LOG_FIL | 
| 251 |  |  | echo -n '-- Finished at: '				| tee -a $LOG_FIL | 
| 252 |  |  | date							| tee -a $LOG_FIL | 
| 253 |  |  |  | 
| 254 |  |  | #---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----| | 
| 255 |  |  | exit 0 |