1 |
jmc |
1.1 |
#! /usr/bin/env bash |
2 |
|
|
|
3 |
jmc |
1.23 |
# $Header: /u/gcmpack/MITgcm_contrib/test_scripts/svante/test_submit_svante,v 1.22 2018/11/07 23:10:03 jmc Exp $ |
4 |
jmc |
1.1 |
# $Name: $ |
5 |
|
|
|
6 |
|
|
today=`date +%Y%m%d` |
7 |
|
|
dInWeek=`date +%a` |
8 |
|
|
|
9 |
|
|
if test $# = 0 |
10 |
|
|
then |
11 |
jmc |
1.13 |
TEST_LIST='ifcMPI pgiMPI pgiAdm pgiMth' |
12 |
jmc |
1.1 |
else |
13 |
|
|
TEST_LIST=$* |
14 |
|
|
fi |
15 |
|
|
|
16 |
jmc |
1.7 |
headNode=`hostname -s` |
17 |
jmc |
1.5 |
#QSUB="qsub" |
18 |
jmc |
1.8 |
#QSTAT="qstat -u $USER" |
19 |
jmc |
1.7 |
#dNam=$headNode |
20 |
jmc |
1.5 |
QSUB="/usr/bin/sbatch" |
21 |
jmc |
1.10 |
QLIST="/usr/bin/squeue -u $USER" |
22 |
jmc |
1.5 |
dNam='svante' |
23 |
jmc |
1.2 |
HERE="$HOME/test_${dNam}" |
24 |
jmc |
1.15 |
TST_DISK="/net/fs09/d1/jm_c" |
25 |
jmc |
1.6 |
TST_DIR="$TST_DISK/test_${dNam}" |
26 |
jmc |
1.18 |
tmpFil="/tmp/"`basename $0`".$$" |
27 |
jmc |
1.16 |
#- where local copy of code is (need to be consistent with run-job scripts): |
28 |
jmc |
1.17 |
#srcDIR=$TST_DIR |
29 |
|
|
srcDIR=$HERE |
30 |
jmc |
1.16 |
srcCode="MITgcm_today" |
31 |
|
|
|
32 |
jmc |
1.1 |
logPfix='test_submit' |
33 |
|
|
SUB_DIR="$HERE/$dNam" |
34 |
|
|
OUT_DIR="$HERE/output" |
35 |
|
|
LOG_FIL="$OUT_DIR/$logPfix."`date +%m%d`".log" |
36 |
|
|
#SUB_DIR="$HERE/temp" |
37 |
|
|
|
38 |
|
|
#-- clean up old log files and start a new one: |
39 |
|
|
cd $OUT_DIR |
40 |
|
|
|
41 |
|
|
rm -f $logPfix.*.log_bak |
42 |
|
|
if test -f $LOG_FIL ; then mv -f $LOG_FIL ${LOG_FIL}_bak ; fi |
43 |
|
|
echo -n '-- Starting: ' | tee -a $LOG_FIL |
44 |
|
|
date | tee -a $LOG_FIL |
45 |
|
|
|
46 |
|
|
n=$(( `ls $logPfix.*.log | wc -l` - 10 )) |
47 |
|
|
if test $n -gt 0 ; then |
48 |
|
|
echo ' remove old log files:' | tee -a $LOG_FIL |
49 |
|
|
ls -lt $logPfix.*.log | tail -"$n" | tee -a $LOG_FIL |
50 |
|
|
ls -t $logPfix.*.log | tail -"$n" | xargs rm -f |
51 |
|
|
fi |
52 |
|
|
|
53 |
|
|
#------------------------------------------------------------- |
54 |
|
|
# defaults |
55 |
|
|
#export PATH="$PATH:/usr/local/bin" |
56 |
|
|
if [ -d ~/bin ]; then export PATH=$PATH:~/bin ; fi |
57 |
|
|
#- to get case insensitive "ls" (and order of tested experiments) |
58 |
|
|
export LC_ALL="en_US.UTF-8" |
59 |
|
|
# Turn off stack limit for FIZHI & AD-tests |
60 |
|
|
ulimit -s unlimited |
61 |
|
|
|
62 |
jmc |
1.5 |
if test -f /etc/profile.d/modules.sh ; then . /etc/profile.d/modules.sh ; fi |
63 |
|
|
if test -f /etc/profile.d/zz_modules.sh ; then . /etc/profile.d/zz_modules.sh ; fi |
64 |
jmc |
1.1 |
#- load standard modules: |
65 |
jmc |
1.5 |
#module add fedora slurm maui svante |
66 |
|
|
module add slurm |
67 |
jmc |
1.1 |
module list >> $LOG_FIL 2>&1 |
68 |
|
|
|
69 |
jmc |
1.21 |
#- method to access CVS: |
70 |
jmc |
1.18 |
cmdCVS='cvs -d :pserver:cvsanon@mitgcm.org:/u/gcmpack -q' |
71 |
jmc |
1.21 |
#- which GitHub repository to use and how to access it: |
72 |
jmc |
1.23 |
git_repo='MITgcm'; git_code='MITgcm' ; git_other='verification_other' |
73 |
jmc |
1.18 |
#git_repo='altMITgcm'; #git_code='MITgcm66h' |
74 |
jmc |
1.21 |
#-- |
75 |
|
|
git_repo="https://github.com/$git_repo" |
76 |
|
|
#git_repo="git://github.com/$git_repo" |
77 |
|
|
#git_repo="git@github.com:$git_repo" |
78 |
jmc |
1.18 |
|
79 |
|
|
checkOut=1 |
80 |
|
|
addExp='' |
81 |
|
|
updFile='updated_code' |
82 |
jmc |
1.1 |
|
83 |
|
|
#---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----| |
84 |
jmc |
1.20 |
if [ $checkOut -ge 1 ] ; then |
85 |
jmc |
1.1 |
#-- Download/Update reference version of MITgcm code: |
86 |
|
|
|
87 |
jmc |
1.18 |
if test $srcDIR = $TST_DIR ; then |
88 |
|
|
echo "cd $TST_DISK ; pwd (x2)" | tee -a $LOG_FIL |
89 |
|
|
cd $TST_DISK | tee -a $LOG_FIL 2>&1 |
90 |
|
|
pwd | tee -a $LOG_FIL |
91 |
|
|
fi |
92 |
|
|
if test ! -d $srcDIR ; then |
93 |
|
|
echo -n "Creating a working dir: $srcDIR ..." | tee -a $LOG_FIL |
94 |
|
|
#/bin/rm -rf $srcDIR |
95 |
|
|
mkdir $srcDIR |
96 |
|
|
retVal=$? |
97 |
|
|
if test "x$retVal" != x0 ; then |
98 |
|
|
echo "Error: unable to make dir: $srcDIR (err=$retVal ) --> Exit" | tee -a $LOG_FIL |
99 |
|
|
exit 1 |
100 |
|
|
fi |
101 |
|
|
fi |
102 |
|
|
cd $srcDIR |
103 |
jmc |
1.16 |
pwd | tee -a $LOG_FIL |
104 |
jmc |
1.1 |
|
105 |
jmc |
1.18 |
#- remove date/lock-file and old copy: |
106 |
|
|
if test -f $updFile ; then rm -f $updFile ; sleep 2 ; fi |
107 |
|
|
test -e $srcCode && rm -rf $srcCode |
108 |
|
|
|
109 |
|
|
if [ $checkOut -eq 1 ] ; then |
110 |
jmc |
1.23 |
if test ! -e $git_code/.git/config ; then |
111 |
|
|
echo "no file: $git_code/.git/config => try a new clone" | tee -a $LOG_FIL |
112 |
|
|
checkOut=2 |
113 |
|
|
fi |
114 |
|
|
if test ! -e $git_other/.git/config ; then |
115 |
|
|
echo "no file: $git_other/.git/config => try a new clone" | tee -a $LOG_FIL |
116 |
jmc |
1.18 |
checkOut=2 |
117 |
jmc |
1.1 |
fi |
118 |
|
|
fi |
119 |
jmc |
1.18 |
if [ $checkOut -eq 2 ] ; then |
120 |
jmc |
1.20 |
if test -e $git_code ; then |
121 |
jmc |
1.23 |
echo -n " removing dir: $git_code ..." | tee -a $LOG_FIL |
122 |
jmc |
1.20 |
rm -rf $git_code |
123 |
jmc |
1.23 |
echo " done" | tee -a $LOG_FIL |
124 |
jmc |
1.20 |
fi |
125 |
jmc |
1.18 |
echo -n "Make a clone of $git_code from repo: $git_repo ..." | tee -a $LOG_FIL |
126 |
jmc |
1.21 |
git clone $git_repo/${git_code}.git 2> $tmpFil |
127 |
jmc |
1.18 |
retVal=$? |
128 |
|
|
if test $retVal = 0 ; then |
129 |
|
|
echo ' --> done!' | tee -a $LOG_FIL |
130 |
|
|
rm -f $tmpFil |
131 |
|
|
else echo '' | tee -a $LOG_FIL |
132 |
|
|
echo " Error: 'git clone' returned: $retVal" | tee -a $LOG_FIL |
133 |
|
|
cat $tmpFil ; rm -f $tmpFil |
134 |
|
|
exit 2 |
135 |
jmc |
1.1 |
fi |
136 |
jmc |
1.23 |
#-- |
137 |
|
|
if test -e $git_other ; then |
138 |
|
|
echo -n " removing dir: $git_other ..." | tee -a $LOG_FIL |
139 |
|
|
rm -rf $git_other |
140 |
|
|
echo " done" | tee -a $LOG_FIL |
141 |
|
|
fi |
142 |
|
|
echo -n "Make a clone of $git_other from repo: $git_repo ..." | tee -a $LOG_FIL |
143 |
|
|
git clone $git_repo/${git_other}.git 2> $tmpFil |
144 |
|
|
retVal=$? |
145 |
|
|
if test $retVal = 0 ; then |
146 |
|
|
echo ' --> done!' | tee -a $LOG_FIL |
147 |
|
|
rm -f $tmpFil |
148 |
|
|
else echo '' | tee -a $LOG_FIL |
149 |
|
|
echo " Error: 'git clone' returned: $retVal" | tee -a $LOG_FIL |
150 |
|
|
cat $tmpFil ; rm -f $tmpFil |
151 |
|
|
exit 2 |
152 |
|
|
fi |
153 |
|
|
fi |
154 |
|
|
#-- |
155 |
|
|
if [ $checkOut -eq 1 ] ; then |
156 |
|
|
echo -n "Updating current clone ( $git_code ) ..." | tee -a $LOG_FIL |
157 |
|
|
echo '' >> $LOG_FIL |
158 |
|
|
( cd $git_code ; git pull ) >> $LOG_FIL 2>&1 |
159 |
|
|
retVal=$? |
160 |
|
|
if test "x$retVal" != x0 ; then echo '' |
161 |
|
|
echo "'git pull' on '"`hostname`"' fail (return val=$retVal) => exit" | tee -a $LOG_FIL |
162 |
|
|
exit |
163 |
|
|
else echo " done" | tee -a $LOG_FIL |
164 |
|
|
fi |
165 |
|
|
echo " and checkout master:" | tee -a $LOG_FIL |
166 |
|
|
( cd $git_code ; git checkout master -- . ) | tee -a $LOG_FIL |
167 |
|
|
#--- |
168 |
|
|
echo -n "Updating current clone ( $git_other ) ..." | tee -a $LOG_FIL |
169 |
|
|
echo '' >> $LOG_FIL |
170 |
|
|
( cd $git_other ; git pull ) >> $LOG_FIL 2>&1 |
171 |
|
|
retVal=$? |
172 |
|
|
if test "x$retVal" != x0 ; then echo '' |
173 |
|
|
echo "'git pull' on '"`hostname`"' fail (return val=$retVal) => exit" | tee -a $LOG_FIL |
174 |
|
|
exit |
175 |
|
|
else echo " done" | tee -a $LOG_FIL |
176 |
|
|
fi |
177 |
|
|
echo " and checkout master:" | tee -a $LOG_FIL |
178 |
|
|
( cd $git_other ; git checkout master -- . ) | tee -a $LOG_FIL |
179 |
jmc |
1.18 |
fi |
180 |
|
|
#---- making a new working copy: MITgcm_today |
181 |
|
|
rsync -a $git_code/ $srcCode --exclude '.git' |
182 |
jmc |
1.19 |
ls -ld $srcCode | tee -a $LOG_FIL |
183 |
jmc |
1.18 |
/usr/bin/find $srcCode -type d | xargs chmod g+rxs |
184 |
|
|
/usr/bin/find $srcCode -type f | xargs chmod g+r |
185 |
|
|
#- update date/lock-file: |
186 |
|
|
if test -d $srcCode/verification ; then |
187 |
|
|
echo $today > $updFile ; sleep 2 |
188 |
|
|
ls -l $updFile | tee -a $LOG_FIL |
189 |
jmc |
1.19 |
echo '' | tee -a $LOG_FIL |
190 |
jmc |
1.18 |
fi |
191 |
jmc |
1.1 |
|
192 |
jmc |
1.18 |
#-- Done with Download/Update of MITgcm code |
193 |
jmc |
1.1 |
fi |
194 |
jmc |
1.20 |
#---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----| |
195 |
jmc |
1.1 |
|
196 |
jmc |
1.18 |
#-- leave srcDIR and go back to output dir |
197 |
jmc |
1.1 |
cd $OUT_DIR |
198 |
|
|
#-- now really do something: |
199 |
|
|
|
200 |
|
|
JOB_LIST=$TEST_LIST |
201 |
|
|
NB_SUB_JOBS=0 |
202 |
|
|
for i in $JOB_LIST |
203 |
|
|
do |
204 |
|
|
case $i in |
205 |
|
|
'pgiAdm') sfx='pgi_adm' ;; |
206 |
jmc |
1.13 |
'pgiMth') sfx='pgi_mth' ;; |
207 |
|
|
*) sfx=`echo ${i} | sed 's/MPI$/_mpi/'` ;; |
208 |
jmc |
1.1 |
esac |
209 |
jmc |
1.14 |
BATCH_SCRIPT="test_${dNam}_$sfx" |
210 |
jmc |
1.1 |
if test -f $SUB_DIR/$BATCH_SCRIPT ; then |
211 |
jmc |
1.9 |
#- job name ( $JOB ) & output-file name ( $JOB.std??? ) must match |
212 |
|
|
# definition within $BATCH_SCRIPT slurm script |
213 |
jmc |
1.13 |
JOB="${i}_tst" |
214 |
jmc |
1.9 |
sJob=`printf "%8.8s" $JOB` #- squeue truncate name to only 1rst 8c |
215 |
|
|
#job_exist=`$QSTAT | grep $JOB | wc -l` |
216 |
|
|
job_exist=`$QLIST | grep $sJob | wc -l` |
217 |
jmc |
1.1 |
if test "x_$job_exist" = x_0 ; then |
218 |
|
|
#-- move previous output file |
219 |
|
|
outList=`ls $JOB.std??? 2> /dev/null` |
220 |
|
|
if test "x$outList" != x ; then |
221 |
|
|
echo -n " moving job $JOB old output files:" | tee -a $LOG_FIL |
222 |
|
|
if test -d $OUT_DIR/prev ; then |
223 |
|
|
for xx in $outList ; do |
224 |
|
|
pp=$OUT_DIR/prev/$xx ; echo -n " $xx" | tee -a $LOG_FIL |
225 |
|
|
test -f $pp.sav && mv -f $pp.sav $pp.old |
226 |
|
|
test -f $pp && mv -f $pp $pp.sav |
227 |
|
|
chmod a+r $xx ; mv -f $xx $OUT_DIR/prev |
228 |
|
|
done |
229 |
|
|
echo " to dir ./prev" | tee -a $LOG_FIL |
230 |
|
|
else |
231 |
|
|
echo " <-- missing dir $OUT_DIR/prev" | tee -a $LOG_FIL |
232 |
|
|
fi |
233 |
|
|
else echo " no old output files from job '$JOB'" | tee -a $LOG_FIL |
234 |
|
|
fi |
235 |
|
|
#-- submit job |
236 |
|
|
echo -n "--> $JOB : " | tee -a $LOG_FIL |
237 |
|
|
$QSUB $SUB_DIR/$BATCH_SCRIPT | tee -a $LOG_FIL |
238 |
|
|
NB_SUB_JOBS=`expr $NB_SUB_JOBS + 1` |
239 |
jmc |
1.18 |
sleep 1 |
240 |
jmc |
1.1 |
else |
241 |
|
|
echo "--> $JOB :" | tee -a $LOG_FIL |
242 |
jmc |
1.9 |
#$QSTAT | grep $JOB | tee -a $LOG_FIL |
243 |
|
|
$QLIST | grep $sJob | tee -a $LOG_FIL |
244 |
jmc |
1.1 |
echo ' job already exist => skip this test' | tee -a $LOG_FIL |
245 |
|
|
fi |
246 |
|
|
else |
247 |
|
|
echo 'no file:' $BATCH_SCRIPT 'to submit' | tee -a $LOG_FIL |
248 |
|
|
fi |
249 |
|
|
done |
250 |
|
|
echo "info-sub-list: NB_SUB_JOBS='$NB_SUB_JOBS'" >> $LOG_FIL |
251 |
|
|
echo -n '-- Finished at: ' | tee -a $LOG_FIL |
252 |
|
|
date | tee -a $LOG_FIL |
253 |
|
|
|
254 |
|
|
#---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----| |
255 |
|
|
exit 0 |