/[MITgcm]/MITgcm/tools/example_scripts/ACESgrid/aces_test_all
ViewVC logotype

Contents of /MITgcm/tools/example_scripts/ACESgrid/aces_test_all

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.22 - (show annotations) (download)
Sun May 8 11:27:20 2011 UTC (12 years, 10 months ago) by jmc
Branch: MAIN
CVS Tags: checkpoint63a, checkpoint63, checkpoint62z, checkpoint62y, checkpoint62x
Changes since 1.21: +200 -145 lines
a hack to re-submit tests which fails too rapidly

1 #! /usr/bin/env bash
2
3 # $Header: /u/gcmpack/MITgcm/tools/example_scripts/ACESgrid/aces_test_all,v 1.21 2010/08/03 04:02:13 jmc Exp $
4 # $Name: $
5
6 # submit list of jobs or get list of submitted jobs
7 sub_list_jobs()
8 {
9 # sub_list_jobs
10 # input : JOB_LIST
11 # output: NB_SUB_JOBS (+ status of jobs: M_{sfx}=submitted or skipped )
12
13 NB_SUB_JOBS=0
14 for i in $JOB_LIST
15 do
16 case $i in
17 'mth') sfx='ifc_'${i} ;;
18 'tuv') sfx='op64_'${i} ;;
19 'mp2') sfx=${i}'_mth' ;;
20 'g77') sfx=${i}'_adm' ;;
21 *) sfx=${i}'_mpi' ;;
22 esac
23 if test -f $SUB_DIR/aces_test_$sfx ; then
24 JOB="tst_"$i
25 job_exist=`$QSTAT -a | grep $USER | grep $JOB | wc -l`
26 if [ $action -eq 2 ] ; then
27 #-- to get outp back:
28 if test "x_$job_exist" = x_0 ; then
29 echo "did not find any job: $JOB" | tee -a $LOG_FIL
30 eval M_$i='skipped'
31 else
32 echo -n "found a job: $JOB" | tee -a $LOG_FIL
33 $QSTAT -a | grep $USER | grep $JOB | tee -a $LOG_FIL
34 eval M_$i='submitted'
35 NB_SUB_JOBS=`expr $NB_SUB_JOBS + 1`
36 fi
37 else
38 #-- to submit job
39 if test "x_$job_exist" = x_0 ; then
40 echo -n " $JOB : " | tee -a $LOG_FIL
41 $QSUB $SUB_DIR/aces_test_$sfx | tee -a $LOG_FIL
42 eval M_$i='submitted'
43 NB_SUB_JOBS=`expr $NB_SUB_JOBS + 1`
44 else
45 echo $JOB | tee -a $LOG_FIL
46 $QSTAT -a | grep $USER | grep $JOB | tee -a $LOG_FIL
47 echo 'job already exist => skip this test' | tee -a $LOG_FIL
48 eval M_$i='skipped'
49 fi
50 fi
51 else
52 echo 'no file:' aces_test_$sfx 'to submit' | tee -a $LOG_FIL
53 eval M_$i='skipped'
54 fi
55 done
56 echo " info-sub-list: NB_SUB_JOBS='$NB_SUB_JOBS'" >> $LOG_FIL
57 }
58
59 # retrieve output when job is finished
60 get_outp_back()
61 {
62 # get_outp_back number_of_jobs
63 # input : JOB_LIST (+ status of jobs: M_{sfx}=submitted )
64 # output: REJECTED (= list of fast-to-fail jobs)
65 # (+ change status of jobs to: M_{sfx}=done )
66
67 nbJobs=$1
68 REJECTED=
69 minutes=0 ; freq=10
70 fsec=`expr $freq \* 60`
71 echo "Check every $freq mn for $nbJobs test(s) to finish" | tee -a $LOG_FIL
72 echo "- start at :" `date` | tee -a $LOG_FIL
73 while test $nbJobs != 0 ; do
74
75 sleep $fsec
76 minutes=$(( $minutes + $freq ))
77
78 for i in $JOB_LIST ; do
79
80 eval comm=\$M_$i
81 if test $comm = 'submitted' ; then
82 JOB="tst_"$i
83 $QSTAT -a > $TMP_FIL
84 RETVAL=$?
85 ready_to_send=`grep $USER $TMP_FIL | grep $JOB | wc -l`
86 rm -f $TMP_FIL
87 if test "x$RETVAL" != x0 ; then
88 echo " $QSTAT returned with error code: $RETVAL" | tee -a $LOG_FIL
89 continue
90 fi
91
92 if test "x_$ready_to_send" = x_0 ; then
93 run_dir=${TST_DIR}"/MITgcm_"$i"/verification"
94 #- results output:
95 tdir=`ls -1 -t $run_dir | grep -v tr_out | grep '^tr_aces' | head -1`
96 if test "x$tdir" != x ; then
97 #- check this is the right output
98 chk=`echo $tdir | grep -c $today`
99 if test $chk = '0' ; then
100 curday=`date +%Y%m%d`
101 chk=`echo $tdir | grep -c $curday`
102 fi
103 if test $chk = '0' ; then
104 echo "tdir='$tdir'" | tee -a $LOG_FIL
105 echo "Output do not match, no email sent for $i" | tee -a $LOG_FIL
106 if [ $minutes -eq $freq ] ; then
107 #- add to rejected list if it fails in less than "freq" minutes
108 REJECTED="$REJECTED $i"
109 fi
110 else
111 rm -f "/tmp/tr_aces-"$i".tar.gz"
112 ( cd $run_dir ; tar -czf "/tmp/tr_aces-"$i".tar.gz" ./$tdir )
113 if test "x$HAVE_MPACK" = xt ; then
114 $MPACK -s MITgcm-test -m 3555000 "/tmp/tr_aces-"$i".tar.gz" jmc@mitgcm.org
115 echo "Email sent for $i at: " `date` | tee -a $LOG_FIL
116 else
117 echo " no email sent for $i (no mpack)" | tee -a $LOG_FIL
118 fi
119 fi
120 else
121 echo " no output found for $i" | tee -a $LOG_FIL
122 if [ $minutes -eq $freq ] ; then
123 #- add to rejected list if it fails in less than "freq" minutes
124 REJECTED="$REJECTED $i"
125 fi
126 fi
127 #- restart output:
128 tdir=`ls -1 -t $run_dir | grep -v tr_out | grep '^rs_aces' | head -1`
129 if test "x$tdir" != x ; then
130 #- check this is the right output
131 chk=`echo $tdir | grep -c $today`
132 if test $chk = '0' ; then
133 curday=`date +%Y%m%d`
134 chk=`echo $tdir | grep -c $curday`
135 fi
136 if test $chk = '0' ; then
137 echo "tdir='$tdir'" | tee -a $LOG_FIL
138 echo "Restart output do not match, no email sent for $i" | tee -a $LOG_FIL
139 else
140 rm -f "/tmp/rs_aces-"$i".tar.gz"
141 ( cd $run_dir ; tar -czf "/tmp/rs_aces-"$i".tar.gz" ./$tdir )
142 if test "x$HAVE_MPACK" = xt ; then
143 $MPACK -s MITgcm-test -m 3555000 "/tmp/rs_aces-"$i".tar.gz" jmc@mitgcm.org
144 echo "Email sent for $i restart:" `date` | tee -a $LOG_FIL
145 else
146 echo " no email sent for $i restart (no mpack)" | tee -a $LOG_FIL
147 fi
148 fi
149 else
150 echo " no restart output for $i" | tee -a $LOG_FIL
151 fi
152 #- record successful sending
153 eval M_$i=done
154 nbJobs=`expr $nbJobs - 1`
155 chmod 644 output/tst_$i.std*
156 fi
157 fi
158 done
159
160 # "long" queue is 24hrs = 24*60min = 1440min
161 if test $minutes -gt 2160 ; then
162 hrs=$(( $minutes / 60 ));
163 echo "Time expired after $minutes minutes ($hrs hours)" | tee -a $LOG_FIL
164 echo ' ' $nbJobs '/' $NB_SUB_JOBS 'tests not yet finished' | tee -a $LOG_FIL
165 exit 1
166 fi
167
168 done
169
170 echo "Retrieving $NB_SUB_JOBS tests finish :" `date` | tee -a $LOG_FIL
171 echo " info-get-outp: REJECTED='$REJECTED'" >> $LOG_FIL
172
173 }
174
175 #---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|
176 #-- Sequential part of the script starts here:
177 #---------------------------------------------
178
179 # action =1 : submit test jobs ; =2 : get jobs output ; =3 : do both
180 case $1 in
181 '-subOnly') action=1 ; shift ;;
182 '-getOnly') action=2 ; shift ;;
183 '-sub+get') action=3 ; shift ;;
184 '-double' ) action=4 ; shift ;;
185 *) action=4 ;;
186 esac
187 #echo "action= $action , Arg= $# "
188
189 today=`date +%Y%m%d`
190
191 if test $# = 0
192 then
193 TEST_LIST='g77 gnu ifc pgi adm mp2 mth tuv'
194 else
195 TEST_LIST=$*
196 fi
197
198 # QSUB="/usr/local/bin/qsub"
199 # QSTAT="/usr/local/bin/qstat"
200 QSUB=qsub
201 QSTAT=qstat
202 TST_DIR="/home/jmc/test_ACES"
203 MPACK="MITgcm_tools/mpack-1.6/mpack"
204 SUB_DIR="MITgcm_tools/example_scripts/ACESgrid"
205
206 TMP_FIL="$TST_DIR/output/TTT.$$"
207 LOG_FIL="$TST_DIR/output/tst_all."`date +%m%d`".log"
208
209 #-- clean up old log files and start a new one:
210 cd $TST_DIR/output
211
212 rm -f tst_all.*.log_bak
213 if test -f $LOG_FIL ; then mv -f $LOG_FIL ${LOG_FIL}_bak ; fi
214 echo -n '-- Starting: ' | tee -a $LOG_FIL
215 date | tee -a $LOG_FIL
216
217 n=$(( `ls tst_all.*.log | wc -l` - 10 ))
218 if test $n -gt 0 ; then
219 echo ' remove old log files:' | tee -a $LOG_FIL
220 ls -lt tst_all.*.log | tail -"$n" | tee -a $LOG_FIL
221 ls -t tst_all.*.log | tail -"$n" | xargs rm -f
222 fi
223
224 . /etc/profile.d/modules.sh
225 module list >> $LOG_FIL 2>&1
226
227 #-- now really do something:
228 cd $TST_DIR
229
230 JOB_LIST=$TEST_LIST
231 sub_list_jobs
232 #echo " info-main: NB_SUB_JOBS='$NB_SUB_JOBS'" >> $LOG_FIL
233
234 if test $action = 1 ; then
235 NB_JOBS2GET=0
236 else
237 #date_str=`date +%Y%m%d`"_0"
238
239 #- build the mpack utility (from build_mpack in testreport):
240 MPACKDIR=`dirname $MPACK`
241 ( cd $MPACKDIR && ./configure && make ) > TTT.build_mpack.$$ 2>&1
242 RETVAL=$?
243 if test "x$RETVAL" != x0 ; then
244 echo
245 echo "Error building the mpack tools at: $MPACK_DIR" | tee -a $LOG_FIL
246 if test -x $MPACK ; then
247 HAVE_MPACK=t
248 echo " use (old ?) executable:" | tee -a $LOG_FIL
249 ls -l $MPACK | tee -a $LOG_FIL
250 else
251 HAVE_MPACK=f
252 fi
253 else
254 if test -x $MPACK ; then
255 rm -f TTT.build_mpack.$$
256 HAVE_MPACK=t
257 echo " building mpack: OK" | tee -a $LOG_FIL
258 else
259 echo " $MPACK not executable" | tee -a $LOG_FIL
260 HAVE_MPACK=f
261 fi
262 fi
263 echo
264
265 NB_JOBS2GET=$NB_SUB_JOBS
266 fi
267
268 #- when it's done, retrieve output and send e-mail
269 get_outp_back $NB_JOBS2GET
270 #echo " info-main: REJECTED='$REJECTED'" >> $LOG_FIL
271
272 if test $action = 4 -a "x$REJECTED" != x ; then
273
274 echo "Try 2nd round for fast-failed jobs: '$REJECTED'" | tee -a $LOG_FIL
275 JOB_LIST=$REJECTED
276 sub_list_jobs
277 #echo " info-main: NB_SUB_JOBS='$NB_SUB_JOBS'" >> $LOG_FIL
278
279 get_outp_back $NB_SUB_JOBS
280 #echo " info-main: REJECTED='$REJECTED'" >> $LOG_FIL
281
282 fi
283
284 #------------------------
285 exit 0

  ViewVC Help
Powered by ViewVC 1.1.22