/[MITgcm]/MITgcm/tools/example_scripts/ACESgrid/aces_test_all
ViewVC logotype

Contents of /MITgcm/tools/example_scripts/ACESgrid/aces_test_all

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.28 - (show annotations) (download)
Sat Mar 3 15:38:39 2012 UTC (12 years, 2 months ago) by jmc
Branch: MAIN
CVS Tags: checkpoint63p, checkpoint63q, checkpoint63r, checkpoint63l, checkpoint63m, checkpoint63n, checkpoint63o, checkpoint63k
Changes since 1.27: +3 -2 lines
fix previous modif

1 #! /usr/bin/env bash
2
3 # $Header: /u/gcmpack/MITgcm/tools/example_scripts/ACESgrid/aces_test_all,v 1.27 2012/03/02 19:00:22 jmc Exp $
4 # $Name: $
5
6 # submit list of jobs or get list of submitted jobs
7 sub_list_jobs()
8 {
9 # sub_list_jobs
10 # input : JOB_LIST
11 # output: NB_SUB_JOBS (+ status of jobs: M_{sfx}=submitted or skipped )
12
13 NB_SUB_JOBS=0
14 for i in $JOB_LIST
15 do
16 case $i in
17 'mth') sfx='ifc_'${i} ;;
18 'tuv') sfx='op64_'${i} ;;
19 'opa') sfx='op64_adm' ;;
20 'g77') sfx=${i}'_adm' ;;
21 'mp2') sfx=${i}'_mth' ;;
22 *) sfx=${i}'_mpi' ;;
23 esac
24 if test -f $SUB_DIR/aces_test_$sfx ; then
25 JOB="tst_"$i
26 job_exist=`$QSTAT -a | grep $USER | grep $JOB | wc -l`
27 if [ $action -eq 2 ] ; then
28 #-- to get outp back:
29 if test "x_$job_exist" = x_0 ; then
30 echo "did not find any job: $JOB" | tee -a $LOG_FIL
31 eval M_$i='skipped'
32 else
33 echo -n "found a job: $JOB" | tee -a $LOG_FIL
34 $QSTAT -a | grep $USER | grep $JOB | tee -a $LOG_FIL
35 eval M_$i='submitted'
36 NB_SUB_JOBS=`expr $NB_SUB_JOBS + 1`
37 fi
38 else
39 #-- to submit job
40 if test "x_$job_exist" = x_0 ; then
41 #-- update code if not done within submitted script
42 doUp=`grep -c '^ *checkOut=0' $SUB_DIR/aces_test_$sfx`
43 if test ! -e $TST_DIR/MITgcm_$i ; then doUp=0; fi
44 if [ $doUp -ge 1 ] ; then
45 echo "" >> $LOG_FIL
46 echo " update $TST_DIR/MITgcm_$i :" | tee -a $LOG_FIL
47 ( cd $TST_DIR/MITgcm_$i
48 cvs -q -d :pserver:cvsanon@mitgcm.org:/u/gcmpack update -P -d
49 ) >> $LOG_FIL 2>&1
50 fi
51 echo -n " $JOB : " | tee -a $LOG_FIL
52 $QSUB $SUB_DIR/aces_test_$sfx | tee -a $LOG_FIL
53 eval M_$i='submitted'
54 NB_SUB_JOBS=`expr $NB_SUB_JOBS + 1`
55 else
56 echo $JOB | tee -a $LOG_FIL
57 $QSTAT -a | grep $USER | grep $JOB | tee -a $LOG_FIL
58 echo 'job already exist => skip this test' | tee -a $LOG_FIL
59 eval M_$i='skipped'
60 fi
61 fi
62 else
63 echo 'no file:' aces_test_$sfx 'to submit' | tee -a $LOG_FIL
64 eval M_$i='skipped'
65 fi
66 done
67 echo " info-sub-list: NB_SUB_JOBS='$NB_SUB_JOBS'" >> $LOG_FIL
68 }
69
70 # retrieve output when job is finished
71 get_outp_back()
72 {
73 # get_outp_back number_of_jobs
74 # input : JOB_LIST (+ status of jobs: M_{sfx}=submitted )
75 # output: REJECTED (= list of fast-to-fail jobs)
76 # (+ change status of jobs to: M_{sfx}=done )
77
78 nbJobs=$1
79 REJECTED=
80 minutes=0 ; freq=10
81 fsec=`expr $freq \* 60`
82 echo "Check every $freq mn for $nbJobs test(s) to finish" | tee -a $LOG_FIL
83 echo "- start at :" `date` | tee -a $LOG_FIL
84 while test $nbJobs != 0 ; do
85
86 sleep $fsec
87 minutes=$(( $minutes + $freq ))
88
89 for i in $JOB_LIST ; do
90
91 eval comm=\$M_$i
92 if test $comm = 'submitted' ; then
93 JOB="tst_"$i
94 $QSTAT -a > $TMP_FIL
95 RETVAL=$?
96 ready_to_send=`grep $USER $TMP_FIL | grep $JOB | wc -l`
97 rm -f $TMP_FIL
98 if test "x$RETVAL" != x0 ; then
99 echo " $QSTAT returned with error code: $RETVAL" | tee -a $LOG_FIL
100 continue
101 fi
102
103 if test "x_$ready_to_send" = x_0 ; then
104 run_dir=${TST_DIR}"/MITgcm_"$i"/verification"
105 #- results output:
106 tdir=`ls -1 -t $run_dir | grep -v tr_out | grep "^tr_$outPfix" | head -1`
107 if test "x$tdir" != x ; then
108 #- check this is the right output
109 chk=`echo $tdir | grep -c $today`
110 if test $chk = '0' ; then
111 curday=`date +%Y%m%d`
112 chk=`echo $tdir | grep -c $curday`
113 fi
114 if test $chk = '0' ; then
115 echo "tdir='$tdir'" | tee -a $LOG_FIL
116 echo "Output do not match, no email sent for $i" | tee -a $LOG_FIL
117 if [ $minutes -eq $freq ] ; then
118 #- add to rejected list if it fails in less than "freq" minutes
119 REJECTED="$REJECTED $i"
120 fi
121 else
122 rm -f "/tmp/tr_$outPfix-"$i".tar.gz"
123 ( cd $run_dir ; tar -czf "/tmp/tr_$outPfix-"$i".tar.gz" ./$tdir )
124 if test "x$HAVE_MPACK" = xt ; then
125 $MPACK -s MITgcm-test -m 3555000 "/tmp/tr_$outPfix-"$i".tar.gz" jmc@mitgcm.org
126 echo "Email sent for $i at: " `date` | tee -a $LOG_FIL
127 else
128 echo " no email sent for $i (no mpack)" | tee -a $LOG_FIL
129 fi
130 fi
131 else
132 echo " no output found for $i" | tee -a $LOG_FIL
133 if [ $minutes -eq $freq ] ; then
134 #- add to rejected list if it fails in less than "freq" minutes
135 REJECTED="$REJECTED $i"
136 fi
137 fi
138 #- restart output:
139 tdir=`ls -1 -t $run_dir | grep -v tr_out | grep "^rs_$outPfix" | head -1`
140 if test "x$tdir" != x ; then
141 #- check this is the right output
142 chk=`echo $tdir | grep -c $today`
143 if test $chk = '0' ; then
144 curday=`date +%Y%m%d`
145 chk=`echo $tdir | grep -c $curday`
146 fi
147 if test $chk = '0' ; then
148 echo "tdir='$tdir'" | tee -a $LOG_FIL
149 echo "Restart output do not match, no email sent for $i" | tee -a $LOG_FIL
150 else
151 rm -f "/tmp/rs_$outPfix-"$i".tar.gz"
152 ( cd $run_dir ; tar -czf "/tmp/rs_$outPfix-"$i".tar.gz" ./$tdir )
153 if test "x$HAVE_MPACK" = xt ; then
154 $MPACK -s MITgcm-test -m 3555000 "/tmp/rs_$outPfix-"$i".tar.gz" jmc@mitgcm.org
155 echo "Email sent for $i restart:" `date` | tee -a $LOG_FIL
156 else
157 echo " no email sent for $i restart (no mpack)" | tee -a $LOG_FIL
158 fi
159 fi
160 else
161 echo " no restart output for $i" | tee -a $LOG_FIL
162 fi
163 #- record successful sending
164 eval M_$i=done
165 nbJobs=`expr $nbJobs - 1`
166 chmod 644 output/tst_$i.std*
167 fi
168 fi
169 done
170
171 # "long" queue is 24hrs = 24*60min = 1440min
172 if test $minutes -gt 2160 ; then
173 hrs=$(( $minutes / 60 ));
174 echo "Time expired after $minutes minutes ($hrs hours)" | tee -a $LOG_FIL
175 echo ' ' $nbJobs '/' $NB_SUB_JOBS 'tests not yet finished' | tee -a $LOG_FIL
176 exit 1
177 fi
178
179 done
180
181 echo "Retrieving $NB_SUB_JOBS tests finish :" `date` | tee -a $LOG_FIL
182 echo " info-get-outp: REJECTED='$REJECTED'" >> $LOG_FIL
183
184 }
185
186 #---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|
187 #-- Sequential part of the script starts here:
188 #---------------------------------------------
189
190 # action =1 : submit test jobs ; =2 : get jobs output ; =3 : do both
191 case $1 in
192 '-subOnly') action=1 ; shift ;;
193 '-getOnly') action=2 ; shift ;;
194 '-sub+get') action=3 ; shift ;;
195 '-double' ) action=4 ; shift ;;
196 *) action=4 ;;
197 esac
198 #echo "action= $action , Arg= $# "
199
200 today=`date +%Y%m%d`
201
202 if test $# = 0
203 then
204 TEST_LIST='g77 gnu ifc pgi adm mp2 mth tuv opa'
205 else
206 TEST_LIST=$*
207 fi
208
209 # QSUB="/usr/local/bin/qsub"
210 # QSTAT="/usr/local/bin/qstat"
211 # TST_DIR="/mit/jm_c/test_acesgrid"
212 # logPfix='tst_submit'
213 # outPfix='acesgrid'
214 QSUB=qsub
215 QSTAT=qstat
216 TST_DIR="/home/jmc/test_aces"
217 logPfix='tst_all'
218 outPfix='aces'
219
220 MPACK="MITgcm_tools/mpack-1.6/mpack"
221 SUB_DIR="MITgcm_tools/example_scripts/ACESgrid"
222 TMP_FIL="$TST_DIR/output/TTT.$$"
223 LOG_FIL="$TST_DIR/output/$logPfix."`date +%m%d`".log"
224
225 #-- clean up old log files and start a new one:
226 cd $TST_DIR/output
227
228 rm -f $logPfix.*.log_bak
229 if test -f $LOG_FIL ; then mv -f $LOG_FIL ${LOG_FIL}_bak ; fi
230 echo -n '-- Starting: ' | tee -a $LOG_FIL
231 date | tee -a $LOG_FIL
232
233 n=$(( `ls $logPfix.*.log | wc -l` - 10 ))
234 if test $n -gt 0 ; then
235 echo ' remove old log files:' | tee -a $LOG_FIL
236 ls -lt $logPfix.*.log | tail -"$n" | tee -a $LOG_FIL
237 ls -t $logPfix.*.log | tail -"$n" | xargs rm -f
238 fi
239
240 . /etc/profile.d/modules.sh
241 module list >> $LOG_FIL 2>&1
242
243 #-- now really do something:
244 cd $TST_DIR
245
246 JOB_LIST=$TEST_LIST
247 sub_list_jobs
248 #echo " info-main: NB_SUB_JOBS='$NB_SUB_JOBS'" >> $LOG_FIL
249
250 if test $action = 1 ; then
251 NB_JOBS2GET=0
252 else
253 #date_str=`date +%Y%m%d`"_0"
254
255 echo "" | tee -a $LOG_FIL
256 #- build the mpack utility (from build_mpack in testreport):
257 MPACKDIR=`dirname $MPACK`
258 ( cd $MPACKDIR && ./configure && make ) > TTT.build_mpack.$$ 2>&1
259 RETVAL=$?
260 if test "x$RETVAL" != x0 ; then
261 echo "Error building the mpack tools at: $MPACK_DIR" | tee -a $LOG_FIL
262 if test -x $MPACK ; then
263 HAVE_MPACK=t
264 echo " use (old ?) executable:" | tee -a $LOG_FIL
265 ls -l $MPACK | tee -a $LOG_FIL
266 else
267 HAVE_MPACK=f
268 fi
269 else
270 if test -x $MPACK ; then
271 rm -f TTT.build_mpack.$$
272 HAVE_MPACK=t
273 echo "Building mpack: OK" | tee -a $LOG_FIL
274 else
275 echo " $MPACK not executable" | tee -a $LOG_FIL
276 HAVE_MPACK=f
277 fi
278 fi
279 echo "" >> $LOG_FIL
280
281 NB_JOBS2GET=$NB_SUB_JOBS
282 fi
283
284 #- when it's done, retrieve output and send e-mail
285 get_outp_back $NB_JOBS2GET
286 #echo " info-main: REJECTED='$REJECTED'" >> $LOG_FIL
287
288 if test $action = 4 -a "x$REJECTED" != x ; then
289
290 echo "" >> $LOG_FIL
291 echo "Try 2nd round for fast-failed jobs: '$REJECTED'" | tee -a $LOG_FIL
292 JOB_LIST=$REJECTED
293 sub_list_jobs
294 #echo " info-main: NB_SUB_JOBS='$NB_SUB_JOBS'" >> $LOG_FIL
295
296 echo "" >> $LOG_FIL
297 get_outp_back $NB_SUB_JOBS
298 #echo " info-main: REJECTED='$REJECTED'" >> $LOG_FIL
299
300 fi
301
302 #------------------------
303 exit 0

  ViewVC Help
Powered by ViewVC 1.1.22