/[MITgcm]/MITgcm/tools/example_scripts/ACESgrid/aces_test_all
ViewVC logotype

Contents of /MITgcm/tools/example_scripts/ACESgrid/aces_test_all

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.34 - (show annotations) (download)
Mon Sep 8 13:48:27 2014 UTC (9 years, 8 months ago) by jmc
Branch: MAIN
CVS Tags: checkpoint65j, checkpoint65k, checkpoint65h, checkpoint65i, checkpoint65f, checkpoint65g, checkpoint65d, checkpoint65e
Changes since 1.33: +2 -1 lines
save previous job *.stdout file before submitting new jobs

1 #! /usr/bin/env bash
2
3 # $Header: /u/gcmpack/MITgcm/tools/example_scripts/ACESgrid/aces_test_all,v 1.33 2014/05/07 16:22:24 jmc Exp $
4 # $Name: $
5
6 # submit list of jobs or get list of submitted jobs
7 sub_list_jobs()
8 {
9 # sub_list_jobs
10 # input : JOB_LIST
11 # output: NB_SUB_JOBS (+ status of jobs: M_{sfx}=submitted or skipped )
12
13 NB_SUB_JOBS=0
14 for i in $JOB_LIST
15 do
16 ( cd output ; test -e tst_${i}.stdout && mv -f tst_${i}.stdout tst_${i}.stdout.old )
17 case $i in
18 'mth') sfx='ifc_'${i} ;;
19 'mp2') sfx='ifc_'${i} ;;
20 'tuv') sfx='op64_'${i} ;;
21 'tlm') sfx='op64_'${i} ;;
22 'opa') sfx='op64_adm' ;;
23 'g77') sfx=${i}'_adm' ;;
24 *) sfx=${i}'_mpi' ;;
25 esac
26 if test -f $SUB_DIR/aces_test_$sfx ; then
27 JOB="tst_"$i
28 job_exist=`$QSTAT -a | grep $USER | grep $JOB | wc -l`
29 if [ $action -eq 2 ] ; then
30 #-- to get outp back:
31 if test "x_$job_exist" = x_0 ; then
32 echo "did not find any job: $JOB" | tee -a $LOG_FIL
33 eval M_$i='skipped'
34 else
35 echo -n "found a job: $JOB" | tee -a $LOG_FIL
36 $QSTAT -a | grep $USER | grep $JOB | tee -a $LOG_FIL
37 eval M_$i='submitted'
38 NB_SUB_JOBS=`expr $NB_SUB_JOBS + 1`
39 fi
40 else
41 #-- to submit job
42 if test "x_$job_exist" = x_0 ; then
43 #-- update code if not done within submitted script
44 doUp=`grep -c '^ *checkOut=0' $SUB_DIR/aces_test_$sfx`
45 if test ! -e $TST_DIR/MITgcm_$i ; then doUp=0; fi
46 if [ $doUp -ge 1 ] ; then
47 echo "" >> $LOG_FIL
48 echo " update $TST_DIR/MITgcm_$i :" | tee -a $LOG_FIL
49 ( cd $TST_DIR/MITgcm_$i
50 cvs -q -d :pserver:cvsanon@mitgcm.org:/u/gcmpack update -P -d
51 ) >> $LOG_FIL 2>&1
52 fi
53 echo -n " $JOB : " | tee -a $LOG_FIL
54 $QSUB $SUB_DIR/aces_test_$sfx | tee -a $LOG_FIL
55 eval M_$i='submitted'
56 NB_SUB_JOBS=`expr $NB_SUB_JOBS + 1`
57 else
58 echo $JOB | tee -a $LOG_FIL
59 $QSTAT -a | grep $USER | grep $JOB | tee -a $LOG_FIL
60 echo 'job already exist => skip this test' | tee -a $LOG_FIL
61 eval M_$i='skipped'
62 fi
63 fi
64 else
65 echo 'no file:' aces_test_$sfx 'to submit' | tee -a $LOG_FIL
66 eval M_$i='skipped'
67 fi
68 done
69 echo " info-sub-list: NB_SUB_JOBS='$NB_SUB_JOBS'" >> $LOG_FIL
70 }
71
72 # retrieve output when job is finished
73 get_outp_back()
74 {
75 # get_outp_back number_of_jobs
76 # input : JOB_LIST (+ status of jobs: M_{sfx}=submitted )
77 # output: REJECTED (= list of fast-to-fail jobs)
78 # (+ change status of jobs to: M_{sfx}=done )
79
80 nbJobs=$1
81 REJECTED=
82 minutes=0 ; freq=10
83 fsec=`expr $freq \* 60`
84 echo "Check every $freq mn for $nbJobs test(s) to finish" | tee -a $LOG_FIL
85 echo "- start at :" `date` | tee -a $LOG_FIL
86 while test $nbJobs != 0 ; do
87
88 sleep $fsec
89 minutes=$(( $minutes + $freq ))
90
91 for i in $JOB_LIST ; do
92
93 eval comm=\$M_$i
94 if test $comm = 'submitted' ; then
95 JOB="tst_"$i
96 $QSTAT -a > $TMP_FIL
97 RETVAL=$?
98 ready_to_send=`grep $USER $TMP_FIL | grep $JOB | wc -l`
99 rm -f $TMP_FIL
100 if test "x$RETVAL" != x0 ; then
101 echo " $QSTAT returned with error code: $RETVAL" | tee -a $LOG_FIL
102 continue
103 fi
104
105 if test "x_$ready_to_send" = x_0 ; then
106 run_dir=${TST_DIR}"/MITgcm_"$i"/verification"
107 #- results output:
108 tdir=`ls -1 -t $run_dir | grep -v tr_out | grep "^tr_$outPfix" | head -1`
109 if test "x$tdir" != x ; then
110 #- check this is the right output
111 chk=`echo $tdir | grep -c $today`
112 if test $chk = '0' ; then
113 curday=`date +%Y%m%d`
114 chk=`echo $tdir | grep -c $curday`
115 fi
116 if test $chk = '0' ; then
117 echo "tdir='$tdir'" | tee -a $LOG_FIL
118 echo "Output do not match, no email sent for $i" | tee -a $LOG_FIL
119 if [ $minutes -eq $freq ] ; then
120 #- add to rejected list if it fails in less than "freq" minutes
121 REJECTED="$REJECTED $i"
122 fi
123 elif [ $action -le 4 ] ; then
124 rm -f "/tmp/tr_$outPfix-"$i".tar.gz"
125 ( cd $run_dir ; tar -czf "/tmp/tr_$outPfix-"$i".tar.gz" ./$tdir )
126 if test "x$HAVE_MPACK" = xt ; then
127 $MPACK -s MITgcm-test -m 3555000 "/tmp/tr_$outPfix-"$i".tar.gz" jmc@mitgcm.org
128 echo "Email sent for $i at: " `date` | tee -a $LOG_FIL
129 else
130 echo " no email sent for $i (no mpack)" | tee -a $LOG_FIL
131 fi
132 fi
133 else
134 echo " no output found for $i" | tee -a $LOG_FIL
135 if [ $minutes -eq $freq ] ; then
136 #- add to rejected list if it fails in less than "freq" minutes
137 REJECTED="$REJECTED $i"
138 fi
139 fi
140 #- restart output:
141 tdir=`ls -1 -t $run_dir | grep -v tr_out | grep "^rs_$outPfix" | head -1`
142 if test "x$tdir" != x ; then
143 #- check this is the right output
144 chk=`echo $tdir | grep -c $today`
145 if test $chk = '0' ; then
146 curday=`date +%Y%m%d`
147 chk=`echo $tdir | grep -c $curday`
148 fi
149 if test $chk = '0' ; then
150 echo "tdir='$tdir'" | tee -a $LOG_FIL
151 echo "Restart output do not match, no email sent for $i" | tee -a $LOG_FIL
152 elif [ $action -le 4 ] ; then
153 rm -f "/tmp/rs_$outPfix-"$i".tar.gz"
154 ( cd $run_dir ; tar -czf "/tmp/rs_$outPfix-"$i".tar.gz" ./$tdir )
155 if test "x$HAVE_MPACK" = xt ; then
156 $MPACK -s MITgcm-test -m 3555000 "/tmp/rs_$outPfix-"$i".tar.gz" jmc@mitgcm.org
157 echo "Email sent for $i restart:" `date` | tee -a $LOG_FIL
158 else
159 echo " no email sent for $i restart (no mpack)" | tee -a $LOG_FIL
160 fi
161 fi
162 else
163 echo " no restart output for $i" | tee -a $LOG_FIL
164 fi
165 #- record successful sending
166 eval M_$i=done
167 nbJobs=`expr $nbJobs - 1`
168 chmod 644 output/tst_$i.std*
169 fi
170 fi
171 done
172
173 # "long" queue is 24hrs = 24*60min = 1440min
174 if test $minutes -gt 2160 ; then
175 hrs=$(( $minutes / 60 ));
176 echo "Time expired after $minutes minutes ($hrs hours)" | tee -a $LOG_FIL
177 echo ' ' $nbJobs '/' $NB_SUB_JOBS 'tests not yet finished' | tee -a $LOG_FIL
178 exit 1
179 fi
180 if [ $action -eq 5 ] ; then nbJobs=0 ; fi
181
182 done
183
184 echo "Retrieving $NB_SUB_JOBS tests finish :" `date` | tee -a $LOG_FIL
185 echo " info-get-outp: REJECTED='$REJECTED'" >> $LOG_FIL
186
187 }
188
189 #---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|
190 #-- Sequential part of the script starts here:
191 #---------------------------------------------
192
193 # action =1 : submit test jobs ; =2 : get jobs output ; =3 : do both
194 # action =4 or 5 : same as, respectively, 3 or 1 but re-submit fast failed jobs
195 case $1 in
196 '-subOnly') action=1 ; shift ;;
197 '-getOnly') action=2 ; shift ;;
198 '-sub+get') action=3 ; shift ;;
199 '-double' ) action=4 ; shift ;;
200 *) action=5 ;;
201 esac
202 #echo "action= $action , Arg= $# "
203
204 today=`date +%Y%m%d`
205
206 if test $# = 0
207 then
208 TEST_LIST='opa g77 adm tlm ifc mp2 mth pgi tuv gnu'
209 else
210 TEST_LIST=$*
211 fi
212
213 # QSUB="/usr/bin/qsub"
214 # QSTAT="/usr/bin/qstat"
215 # logPfix='tst_submit'
216 # outPfix='acesgrid'
217 # HERE="$HOME/test_$outPfix"
218 # #TST_DIR="/data/jm_c/test_$outPfix"
219 # TST_DIR="/scratch/jm_c/test_$outPfix"
220 QSUB=qsub
221 QSTAT=qstat
222 logPfix='tst_all'
223 outPfix='aces'
224 HERE="$HOME/test_$outPfix"
225 TST_DIR=$HERE
226
227 MPACK="MITgcm_tools/mpack-1.6/mpack"
228 SUB_DIR="MITgcm_tools/example_scripts/ACESgrid"
229 TMP_FIL="$HERE/output/TTT.$$"
230 LOG_FIL="$HERE/output/$logPfix."`date +%m%d`".log"
231
232 #-- clean up old log files and start a new one:
233 cd $HERE/output
234
235 rm -f $logPfix.*.log_bak
236 if test -f $LOG_FIL ; then mv -f $LOG_FIL ${LOG_FIL}_bak ; fi
237 echo -n '-- Starting: ' | tee -a $LOG_FIL
238 date | tee -a $LOG_FIL
239
240 n=$(( `ls $logPfix.*.log | wc -l` - 10 ))
241 if test $n -gt 0 ; then
242 echo ' remove old log files:' | tee -a $LOG_FIL
243 ls -lt $logPfix.*.log | tail -"$n" | tee -a $LOG_FIL
244 ls -t $logPfix.*.log | tail -"$n" | xargs rm -f
245 fi
246
247 . /etc/profile.d/modules.sh
248 module list >> $LOG_FIL 2>&1
249
250 #-- now really do something:
251 cd $HERE
252
253 JOB_LIST=$TEST_LIST
254 sub_list_jobs
255 #echo " info-main: NB_SUB_JOBS='$NB_SUB_JOBS'" >> $LOG_FIL
256
257 if test $action = 1 ; then
258 NB_JOBS2GET=0
259 elif test $action = 5 ; then
260 NB_JOBS2GET=$NB_SUB_JOBS
261 else
262 #date_str=`date +%Y%m%d`"_0"
263
264 echo "" | tee -a $LOG_FIL
265 #- build the mpack utility (from build_mpack in testreport):
266 MPACKDIR=`dirname $MPACK`
267 ( cd $MPACKDIR && ./configure && make ) > TTT.build_mpack.$$ 2>&1
268 RETVAL=$?
269 if test "x$RETVAL" != x0 ; then
270 echo "Error building the mpack tools at: $MPACK_DIR" | tee -a $LOG_FIL
271 if test -x $MPACK ; then
272 HAVE_MPACK=t
273 echo " use (old ?) executable:" | tee -a $LOG_FIL
274 ls -l $MPACK | tee -a $LOG_FIL
275 else
276 HAVE_MPACK=f
277 fi
278 else
279 if test -x $MPACK ; then
280 rm -f TTT.build_mpack.$$
281 HAVE_MPACK=t
282 echo "Building mpack: OK" | tee -a $LOG_FIL
283 else
284 echo " $MPACK not executable" | tee -a $LOG_FIL
285 HAVE_MPACK=f
286 fi
287 fi
288 echo "" >> $LOG_FIL
289
290 NB_JOBS2GET=$NB_SUB_JOBS
291 fi
292
293 #- when it's done, retrieve output and send e-mail
294 sleep 60
295 get_outp_back $NB_JOBS2GET
296 #echo " info-main: REJECTED='$REJECTED'" >> $LOG_FIL
297
298 if test "x$REJECTED" != x ; then
299
300 if [ $action -ge 4 ] ; then
301 echo "" >> $LOG_FIL
302 echo "Try 2nd round for fast-failed jobs: '$REJECTED'" | tee -a $LOG_FIL
303 JOB_LIST=$REJECTED
304 sub_list_jobs
305 #echo " info-main: NB_SUB_JOBS='$NB_SUB_JOBS'" >> $LOG_FIL
306 fi
307
308 if [ $action -eq 4 ] ; then
309 echo "" >> $LOG_FIL
310 get_outp_back $NB_SUB_JOBS
311 #echo " info-main: REJECTED='$REJECTED'" >> $LOG_FIL
312 fi
313
314 fi
315
316 #------------------------
317 exit 0

  ViewVC Help
Powered by ViewVC 1.1.22