| 1 |
#! /usr/bin/env bash |
| 2 |
|
| 3 |
# $Header: /u/gcmpack/MITgcm_contrib/test_scripts/svante/test_submit_svante,v 1.26 2023/02/19 14:56:32 jmc Exp $ |
| 4 |
# $Name: $ |
| 5 |
|
| 6 |
today=`date +%Y%m%d` |
| 7 |
dInWeek=`date +%a` |
| 8 |
|
| 9 |
if test $# = 0 |
| 10 |
then |
| 11 |
TEST_LIST='ifcMpi ifcAdm pgiMpi pgiAdm pgiMth' |
| 12 |
else |
| 13 |
TEST_LIST=$* |
| 14 |
fi |
| 15 |
|
| 16 |
headNode=`hostname -s` |
| 17 |
#QSUB="qsub" |
| 18 |
#QLIST="qstat -u $USER" |
| 19 |
#dNam=$headNode |
| 20 |
QSUB="/usr/bin/sbatch" |
| 21 |
QLIST="/usr/bin/squeue -u $USER" |
| 22 |
dNam='svante' |
| 23 |
HERE="$HOME/test_${dNam}" |
| 24 |
TST_DISK="/net/fs09/d1/jm_c" |
| 25 |
TST_DIR="$TST_DISK/test_${dNam}" |
| 26 |
tmpFil="/tmp/"`basename $0`".$$" |
| 27 |
#- where local copy of code is (need to be consistent with run-job scripts): |
| 28 |
#srcDIR=$TST_DIR |
| 29 |
srcDIR=$HERE |
| 30 |
srcCode="MITgcm_today" |
| 31 |
|
| 32 |
logPfix='test_submit' |
| 33 |
SUB_DIR="$HERE/$dNam" |
| 34 |
OUT_DIR="$HERE/output" |
| 35 |
LOG_FIL="$OUT_DIR/$logPfix."`date +%m%d`".log" |
| 36 |
#SUB_DIR="$HERE/temp" |
| 37 |
|
| 38 |
#-- clean up old log files and start a new one: |
| 39 |
cd $OUT_DIR |
| 40 |
|
| 41 |
rm -f $logPfix.*.log_bak |
| 42 |
if test -f $LOG_FIL ; then mv -f $LOG_FIL ${LOG_FIL}_bak ; fi |
| 43 |
echo -n '-- Starting: ' | tee -a $LOG_FIL |
| 44 |
date | tee -a $LOG_FIL |
| 45 |
|
| 46 |
n=$(( `ls $logPfix.*.log | wc -l` - 10 )) |
| 47 |
if test $n -gt 0 ; then |
| 48 |
echo ' remove old log files:' | tee -a $LOG_FIL |
| 49 |
ls -lt $logPfix.*.log | tail -"$n" | tee -a $LOG_FIL |
| 50 |
ls -t $logPfix.*.log | tail -"$n" | xargs rm -f |
| 51 |
fi |
| 52 |
|
| 53 |
if test -d backup ; then |
| 54 |
echo ' from "backup/", remove *.tar.gz files & gzip previous *.tar files' |
| 55 |
n1=`ls backup/*.tar.gz | wc -l` ; n2=`ls backup/*.tar | wc -l` |
| 56 |
echo " from 'backup/', remove $n1 *.tar.gz files:" >> $LOG_FIL 2>&1 |
| 57 |
( cd backup ; rm -f ??_${dNam}*.tar.gz ) >> $LOG_FIL 2>&1 |
| 58 |
echo " and gzip previous $n2 *.tar files:" >> $LOG_FIL 2>&1 |
| 59 |
( cd backup ; gzip -9 ??_${dNam}*.tar ; ls -l *.tar.gz ) >> $LOG_FIL 2>&1 |
| 60 |
else |
| 61 |
echo ' create dir "backup/" for backup tar files:' | tee -a $LOG_FIL |
| 62 |
mkdir backup |
| 63 |
fi |
| 64 |
#------------------------------------------------------------- |
| 65 |
# defaults |
| 66 |
#export PATH="$PATH:/usr/local/bin" |
| 67 |
if [ -d ~/bin ]; then export PATH=$PATH:~/bin ; fi |
| 68 |
#- to get case insensitive "ls" (and order of tested experiments) |
| 69 |
export LC_ALL="en_US.UTF-8" |
| 70 |
# Turn off stack limit for FIZHI & AD-tests |
| 71 |
ulimit -s unlimited |
| 72 |
|
| 73 |
if test -f /etc/profile.d/modules.sh ; then . /etc/profile.d/modules.sh ; fi |
| 74 |
if test -f /etc/profile.d/zz_modules.sh ; then . /etc/profile.d/zz_modules.sh ; fi |
| 75 |
#- load standard modules: |
| 76 |
#module add fedora slurm maui svante |
| 77 |
module add slurm |
| 78 |
module list >> $LOG_FIL 2>&1 |
| 79 |
|
| 80 |
#- method to access CVS: |
| 81 |
cmdCVS='cvs -d :pserver:cvsanon@mitgcm.org:/u/gcmpack -q' |
| 82 |
#- which GitHub repository to use and how to access it: |
| 83 |
git_repo='MITgcm'; git_code='MITgcm' ; git_other='verification_other' |
| 84 |
#git_repo='altMITgcm'; #git_code='MITgcm66h' |
| 85 |
#-- |
| 86 |
git_repo="https://github.com/$git_repo" |
| 87 |
#git_repo="git://github.com/$git_repo" |
| 88 |
#git_repo="git@github.com:$git_repo" |
| 89 |
|
| 90 |
checkOut=1 |
| 91 |
addExp='' |
| 92 |
updFile='updated_code' |
| 93 |
|
| 94 |
#---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----| |
| 95 |
if [ $checkOut -ge 1 ] ; then |
| 96 |
#-- Download/Update reference version of MITgcm code: |
| 97 |
|
| 98 |
if test $srcDIR = $TST_DIR ; then |
| 99 |
echo "cd $TST_DISK ; pwd (x2)" | tee -a $LOG_FIL |
| 100 |
cd $TST_DISK | tee -a $LOG_FIL 2>&1 |
| 101 |
pwd | tee -a $LOG_FIL |
| 102 |
fi |
| 103 |
if test ! -d $srcDIR ; then |
| 104 |
echo -n "Creating a working dir: $srcDIR ..." | tee -a $LOG_FIL |
| 105 |
#/bin/rm -rf $srcDIR |
| 106 |
mkdir $srcDIR |
| 107 |
retVal=$? |
| 108 |
if test "x$retVal" != x0 ; then |
| 109 |
echo "Error: unable to make dir: $srcDIR (err=$retVal ) --> Exit" | tee -a $LOG_FIL |
| 110 |
exit 1 |
| 111 |
fi |
| 112 |
fi |
| 113 |
cd $srcDIR |
| 114 |
pwd | tee -a $LOG_FIL |
| 115 |
|
| 116 |
#- remove date/lock-file and old copy: |
| 117 |
if test -f $updFile ; then rm -f $updFile ; sleep 2 ; fi |
| 118 |
test -e $srcCode && rm -rf $srcCode |
| 119 |
|
| 120 |
if [ $checkOut -eq 1 ] ; then |
| 121 |
if test ! -e $git_code/.git/config ; then |
| 122 |
echo "no file: $git_code/.git/config => try a new clone" | tee -a $LOG_FIL |
| 123 |
checkOut=2 |
| 124 |
fi |
| 125 |
if test ! -e $git_other/.git/config ; then |
| 126 |
echo "no file: $git_other/.git/config => try a new clone" | tee -a $LOG_FIL |
| 127 |
checkOut=2 |
| 128 |
fi |
| 129 |
fi |
| 130 |
if [ $checkOut -eq 2 ] ; then |
| 131 |
if test -e $git_code ; then |
| 132 |
echo -n " removing dir: $git_code ..." | tee -a $LOG_FIL |
| 133 |
rm -rf $git_code |
| 134 |
echo " done" | tee -a $LOG_FIL |
| 135 |
fi |
| 136 |
echo -n "Make a clone of $git_code from repo: $git_repo ..." | tee -a $LOG_FIL |
| 137 |
git clone $git_repo/${git_code}.git 2> $tmpFil |
| 138 |
retVal=$? |
| 139 |
if test $retVal = 0 ; then |
| 140 |
echo ' --> done!' | tee -a $LOG_FIL |
| 141 |
rm -f $tmpFil |
| 142 |
else echo '' | tee -a $LOG_FIL |
| 143 |
echo " Error: 'git clone' returned: $retVal" | tee -a $LOG_FIL |
| 144 |
cat $tmpFil ; rm -f $tmpFil |
| 145 |
exit 2 |
| 146 |
fi |
| 147 |
#-- |
| 148 |
if test -e $git_other ; then |
| 149 |
echo -n " removing dir: $git_other ..." | tee -a $LOG_FIL |
| 150 |
rm -rf $git_other |
| 151 |
echo " done" | tee -a $LOG_FIL |
| 152 |
fi |
| 153 |
echo -n "Make a clone of $git_other from repo: $git_repo ..." | tee -a $LOG_FIL |
| 154 |
git clone $git_repo/${git_other}.git 2> $tmpFil |
| 155 |
retVal=$? |
| 156 |
if test $retVal = 0 ; then |
| 157 |
echo ' --> done!' | tee -a $LOG_FIL |
| 158 |
rm -f $tmpFil |
| 159 |
else echo '' | tee -a $LOG_FIL |
| 160 |
echo " Error: 'git clone' returned: $retVal" | tee -a $LOG_FIL |
| 161 |
cat $tmpFil ; rm -f $tmpFil |
| 162 |
exit 2 |
| 163 |
fi |
| 164 |
fi |
| 165 |
#-- |
| 166 |
if [ $checkOut -eq 1 ] ; then |
| 167 |
echo -n "Updating current clone ( $git_code ) ..." | tee -a $LOG_FIL |
| 168 |
echo '' >> $LOG_FIL |
| 169 |
( cd $git_code ; git pull ) >> $LOG_FIL 2>&1 |
| 170 |
retVal=$? |
| 171 |
if test "x$retVal" != x0 ; then echo '' |
| 172 |
echo "'git pull' on '"`hostname`"' fail (return val=$retVal) => exit" | tee -a $LOG_FIL |
| 173 |
exit |
| 174 |
else echo " done" | tee -a $LOG_FIL |
| 175 |
fi |
| 176 |
echo " and checkout master:" | tee -a $LOG_FIL |
| 177 |
( cd $git_code ; git checkout master -- . ) | tee -a $LOG_FIL |
| 178 |
#--- |
| 179 |
echo -n "Updating current clone ( $git_other ) ..." | tee -a $LOG_FIL |
| 180 |
echo '' >> $LOG_FIL |
| 181 |
( cd $git_other ; git pull ) >> $LOG_FIL 2>&1 |
| 182 |
retVal=$? |
| 183 |
if test "x$retVal" != x0 ; then echo '' |
| 184 |
echo "'git pull' on '"`hostname`"' fail (return val=$retVal) => exit" | tee -a $LOG_FIL |
| 185 |
exit |
| 186 |
else echo " done" | tee -a $LOG_FIL |
| 187 |
fi |
| 188 |
echo " and checkout master:" | tee -a $LOG_FIL |
| 189 |
( cd $git_other ; git checkout master -- . ) | tee -a $LOG_FIL |
| 190 |
fi |
| 191 |
#---- making a new working copy: MITgcm_today |
| 192 |
rsync -a $git_code/ $srcCode --exclude '.git' |
| 193 |
ls -ld $srcCode | tee -a $LOG_FIL |
| 194 |
/usr/bin/find $srcCode -type d | xargs chmod g+rxs |
| 195 |
/usr/bin/find $srcCode -type f | xargs chmod g+r |
| 196 |
#- update date/lock-file: |
| 197 |
if test -d $srcCode/verification ; then |
| 198 |
echo $today > $updFile ; sleep 2 |
| 199 |
ls -l $updFile | tee -a $LOG_FIL |
| 200 |
echo '' | tee -a $LOG_FIL |
| 201 |
fi |
| 202 |
|
| 203 |
#-- Done with Download/Update of MITgcm code |
| 204 |
fi |
| 205 |
#---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----| |
| 206 |
|
| 207 |
#-- leave srcDIR and go back to output dir |
| 208 |
cd $OUT_DIR |
| 209 |
#-- now really do something: |
| 210 |
|
| 211 |
JOB_LIST=$TEST_LIST |
| 212 |
NB_SUB_JOBS=0 |
| 213 |
for i in $JOB_LIST |
| 214 |
do |
| 215 |
sfx=`echo ${i} | sed 's/^.../&_/' | tr '[:upper:]' '[:lower:]'` |
| 216 |
BATCH_SCRIPT="test_${dNam}_$sfx" |
| 217 |
if test -f $SUB_DIR/$BATCH_SCRIPT ; then |
| 218 |
#- job name ( $JOB ) & output-file name ( $JOB.std??? ) must match |
| 219 |
# definition within $BATCH_SCRIPT slurm script |
| 220 |
JOB="${i}_tst" |
| 221 |
sJob=`printf "%8.8s" $JOB` #- squeue truncate name to only 1rst 8c |
| 222 |
#job_exist=`$QLIST | grep $JOB | wc -l` |
| 223 |
job_exist=`$QLIST | grep $sJob | wc -l` |
| 224 |
if test "x_$job_exist" = x_0 ; then |
| 225 |
#-- move previous output file |
| 226 |
outList=`ls $JOB.std??? 2> /dev/null` |
| 227 |
if test "x$outList" != x ; then |
| 228 |
echo -n " moving job $JOB old output files:" | tee -a $LOG_FIL |
| 229 |
if test -d $OUT_DIR/prev ; then |
| 230 |
for xx in $outList ; do |
| 231 |
pp=$OUT_DIR/prev/$xx ; echo -n " $xx" | tee -a $LOG_FIL |
| 232 |
test -f $pp.sav && mv -f $pp.sav $pp.old |
| 233 |
test -f $pp && mv -f $pp $pp.sav |
| 234 |
chmod a+r $xx ; mv -f $xx $OUT_DIR/prev |
| 235 |
done |
| 236 |
echo " to dir ./prev" | tee -a $LOG_FIL |
| 237 |
else |
| 238 |
echo " <-- missing dir $OUT_DIR/prev" | tee -a $LOG_FIL |
| 239 |
fi |
| 240 |
else echo " no old output files from job '$JOB'" | tee -a $LOG_FIL |
| 241 |
fi |
| 242 |
#-- submit job |
| 243 |
echo -n "--> $JOB : $BATCH_SCRIPT , " | tee -a $LOG_FIL |
| 244 |
$QSUB $SUB_DIR/$BATCH_SCRIPT | tee -a $LOG_FIL |
| 245 |
NB_SUB_JOBS=`expr $NB_SUB_JOBS + 1` |
| 246 |
sleep 1 |
| 247 |
else |
| 248 |
echo "--> $JOB :" | tee -a $LOG_FIL |
| 249 |
$QLIST | grep $sJob | tee -a $LOG_FIL |
| 250 |
echo ' job already exist => skip this test' | tee -a $LOG_FIL |
| 251 |
fi |
| 252 |
else |
| 253 |
echo 'no file:' $BATCH_SCRIPT 'to submit' | tee -a $LOG_FIL |
| 254 |
fi |
| 255 |
done |
| 256 |
#echo "info-sub-list: NB_SUB_JOBS='$NB_SUB_JOBS'" >> $LOG_FIL |
| 257 |
echo "Submitted $NB_SUB_JOBS jobs from dir: $SUB_DIR" | tee -a $LOG_FIL |
| 258 |
echo -n '-- Finished at: ' | tee -a $LOG_FIL |
| 259 |
date | tee -a $LOG_FIL |
| 260 |
|
| 261 |
#---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----| |
| 262 |
exit 0 |