/[MITgcm]/MITgcm/tools/example_scripts/ACESgrid/aces_test_all
ViewVC logotype

Contents of /MITgcm/tools/example_scripts/ACESgrid/aces_test_all

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.25 - (show annotations) (download)
Thu Nov 3 17:36:33 2011 UTC (12 years, 6 months ago) by jmc
Branch: MAIN
CVS Tags: checkpoint63h, checkpoint63i, checkpoint63e, checkpoint63f, checkpoint63g
Changes since 1.24: +2 -2 lines
move dir where all test are run (on geo). Improve disk-space test.

1 #! /usr/bin/env bash
2
3 # $Header: /u/gcmpack/MITgcm/tools/example_scripts/ACESgrid/aces_test_all,v 1.24 2011/08/24 13:21:43 jmc Exp $
4 # $Name: $
5
6 # submit list of jobs or get list of submitted jobs
7 sub_list_jobs()
8 {
9 # sub_list_jobs
10 # input : JOB_LIST
11 # output: NB_SUB_JOBS (+ status of jobs: M_{sfx}=submitted or skipped )
12
13 NB_SUB_JOBS=0
14 for i in $JOB_LIST
15 do
16 case $i in
17 'mth') sfx='ifc_'${i} ;;
18 'tuv') sfx='op64_'${i} ;;
19 'mp2') sfx=${i}'_mth' ;;
20 'g77') sfx=${i}'_adm' ;;
21 *) sfx=${i}'_mpi' ;;
22 esac
23 if test -f $SUB_DIR/aces_test_$sfx ; then
24 JOB="tst_"$i
25 job_exist=`$QSTAT -a | grep $USER | grep $JOB | wc -l`
26 if [ $action -eq 2 ] ; then
27 #-- to get outp back:
28 if test "x_$job_exist" = x_0 ; then
29 echo "did not find any job: $JOB" | tee -a $LOG_FIL
30 eval M_$i='skipped'
31 else
32 echo -n "found a job: $JOB" | tee -a $LOG_FIL
33 $QSTAT -a | grep $USER | grep $JOB | tee -a $LOG_FIL
34 eval M_$i='submitted'
35 NB_SUB_JOBS=`expr $NB_SUB_JOBS + 1`
36 fi
37 else
38 #-- to submit job
39 if test "x_$job_exist" = x_0 ; then
40 #-- update code if not done within submitted script
41 doUp=`grep -c '^ *checkOut=0' $SUB_DIR/aces_test_$sfx`
42 if test ! -e $TST_DIR/MITgcm_$i ; then doUp=0; fi
43 if [ $doUp -ge 1 ] ; then
44 echo "" >> $LOG_FIL
45 echo " update $TST_DIR/MITgcm_$i :" | tee -a $LOG_FIL
46 ( cd $TST_DIR/MITgcm_$i
47 cvs -q -d :pserver:cvsanon@mitgcm.org:/u/gcmpack update -P -d
48 ) >> $LOG_FIL 2>&1
49 fi
50 echo -n " $JOB : " | tee -a $LOG_FIL
51 $QSUB $SUB_DIR/aces_test_$sfx | tee -a $LOG_FIL
52 eval M_$i='submitted'
53 NB_SUB_JOBS=`expr $NB_SUB_JOBS + 1`
54 else
55 echo $JOB | tee -a $LOG_FIL
56 $QSTAT -a | grep $USER | grep $JOB | tee -a $LOG_FIL
57 echo 'job already exist => skip this test' | tee -a $LOG_FIL
58 eval M_$i='skipped'
59 fi
60 fi
61 else
62 echo 'no file:' aces_test_$sfx 'to submit' | tee -a $LOG_FIL
63 eval M_$i='skipped'
64 fi
65 done
66 echo " info-sub-list: NB_SUB_JOBS='$NB_SUB_JOBS'" >> $LOG_FIL
67 }
68
69 # retrieve output when job is finished
70 get_outp_back()
71 {
72 # get_outp_back number_of_jobs
73 # input : JOB_LIST (+ status of jobs: M_{sfx}=submitted )
74 # output: REJECTED (= list of fast-to-fail jobs)
75 # (+ change status of jobs to: M_{sfx}=done )
76
77 nbJobs=$1
78 REJECTED=
79 minutes=0 ; freq=10
80 fsec=`expr $freq \* 60`
81 echo "Check every $freq mn for $nbJobs test(s) to finish" | tee -a $LOG_FIL
82 echo "- start at :" `date` | tee -a $LOG_FIL
83 while test $nbJobs != 0 ; do
84
85 sleep $fsec
86 minutes=$(( $minutes + $freq ))
87
88 for i in $JOB_LIST ; do
89
90 eval comm=\$M_$i
91 if test $comm = 'submitted' ; then
92 JOB="tst_"$i
93 $QSTAT -a > $TMP_FIL
94 RETVAL=$?
95 ready_to_send=`grep $USER $TMP_FIL | grep $JOB | wc -l`
96 rm -f $TMP_FIL
97 if test "x$RETVAL" != x0 ; then
98 echo " $QSTAT returned with error code: $RETVAL" | tee -a $LOG_FIL
99 continue
100 fi
101
102 if test "x_$ready_to_send" = x_0 ; then
103 run_dir=${TST_DIR}"/MITgcm_"$i"/verification"
104 #- results output:
105 tdir=`ls -1 -t $run_dir | grep -v tr_out | grep "^tr_$outPfix" | head -1`
106 if test "x$tdir" != x ; then
107 #- check this is the right output
108 chk=`echo $tdir | grep -c $today`
109 if test $chk = '0' ; then
110 curday=`date +%Y%m%d`
111 chk=`echo $tdir | grep -c $curday`
112 fi
113 if test $chk = '0' ; then
114 echo "tdir='$tdir'" | tee -a $LOG_FIL
115 echo "Output do not match, no email sent for $i" | tee -a $LOG_FIL
116 if [ $minutes -eq $freq ] ; then
117 #- add to rejected list if it fails in less than "freq" minutes
118 REJECTED="$REJECTED $i"
119 fi
120 else
121 rm -f "/tmp/tr_$outPfix-"$i".tar.gz"
122 ( cd $run_dir ; tar -czf "/tmp/tr_$outPfix-"$i".tar.gz" ./$tdir )
123 if test "x$HAVE_MPACK" = xt ; then
124 $MPACK -s MITgcm-test -m 3555000 "/tmp/tr_$outPfix-"$i".tar.gz" jmc@mitgcm.org
125 echo "Email sent for $i at: " `date` | tee -a $LOG_FIL
126 else
127 echo " no email sent for $i (no mpack)" | tee -a $LOG_FIL
128 fi
129 fi
130 else
131 echo " no output found for $i" | tee -a $LOG_FIL
132 if [ $minutes -eq $freq ] ; then
133 #- add to rejected list if it fails in less than "freq" minutes
134 REJECTED="$REJECTED $i"
135 fi
136 fi
137 #- restart output:
138 tdir=`ls -1 -t $run_dir | grep -v tr_out | grep "^rs_$outPfix" | head -1`
139 if test "x$tdir" != x ; then
140 #- check this is the right output
141 chk=`echo $tdir | grep -c $today`
142 if test $chk = '0' ; then
143 curday=`date +%Y%m%d`
144 chk=`echo $tdir | grep -c $curday`
145 fi
146 if test $chk = '0' ; then
147 echo "tdir='$tdir'" | tee -a $LOG_FIL
148 echo "Restart output do not match, no email sent for $i" | tee -a $LOG_FIL
149 else
150 rm -f "/tmp/rs_$outPfix-"$i".tar.gz"
151 ( cd $run_dir ; tar -czf "/tmp/rs_$outPfix-"$i".tar.gz" ./$tdir )
152 if test "x$HAVE_MPACK" = xt ; then
153 $MPACK -s MITgcm-test -m 3555000 "/tmp/rs_$outPfix-"$i".tar.gz" jmc@mitgcm.org
154 echo "Email sent for $i restart:" `date` | tee -a $LOG_FIL
155 else
156 echo " no email sent for $i restart (no mpack)" | tee -a $LOG_FIL
157 fi
158 fi
159 else
160 echo " no restart output for $i" | tee -a $LOG_FIL
161 fi
162 #- record successful sending
163 eval M_$i=done
164 nbJobs=`expr $nbJobs - 1`
165 chmod 644 output/tst_$i.std*
166 fi
167 fi
168 done
169
170 # "long" queue is 24hrs = 24*60min = 1440min
171 if test $minutes -gt 2160 ; then
172 hrs=$(( $minutes / 60 ));
173 echo "Time expired after $minutes minutes ($hrs hours)" | tee -a $LOG_FIL
174 echo ' ' $nbJobs '/' $NB_SUB_JOBS 'tests not yet finished' | tee -a $LOG_FIL
175 exit 1
176 fi
177
178 done
179
180 echo "Retrieving $NB_SUB_JOBS tests finish :" `date` | tee -a $LOG_FIL
181 echo " info-get-outp: REJECTED='$REJECTED'" >> $LOG_FIL
182
183 }
184
185 #---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|
186 #-- Sequential part of the script starts here:
187 #---------------------------------------------
188
189 # action =1 : submit test jobs ; =2 : get jobs output ; =3 : do both
190 case $1 in
191 '-subOnly') action=1 ; shift ;;
192 '-getOnly') action=2 ; shift ;;
193 '-sub+get') action=3 ; shift ;;
194 '-double' ) action=4 ; shift ;;
195 *) action=4 ;;
196 esac
197 #echo "action= $action , Arg= $# "
198
199 today=`date +%Y%m%d`
200
201 if test $# = 0
202 then
203 TEST_LIST='g77 gnu ifc pgi adm mp2 mth tuv'
204 else
205 TEST_LIST=$*
206 fi
207
208 # QSUB="/usr/local/bin/qsub"
209 # QSTAT="/usr/local/bin/qstat"
210 # TST_DIR="/mit/jm_c/test_acesgrid"
211 # logPfix='tst_submit'
212 # outPfix='acesgrid'
213 QSUB=qsub
214 QSTAT=qstat
215 TST_DIR="/home/jmc/test_aces"
216 logPfix='tst_all'
217 outPfix='aces'
218
219 MPACK="MITgcm_tools/mpack-1.6/mpack"
220 SUB_DIR="MITgcm_tools/example_scripts/ACESgrid"
221 TMP_FIL="$TST_DIR/output/TTT.$$"
222 LOG_FIL="$TST_DIR/output/$logPfix."`date +%m%d`".log"
223
224 #-- clean up old log files and start a new one:
225 cd $TST_DIR/output
226
227 rm -f $logPfix.*.log_bak
228 if test -f $LOG_FIL ; then mv -f $LOG_FIL ${LOG_FIL}_bak ; fi
229 echo -n '-- Starting: ' | tee -a $LOG_FIL
230 date | tee -a $LOG_FIL
231
232 n=$(( `ls $logPfix.*.log | wc -l` - 10 ))
233 if test $n -gt 0 ; then
234 echo ' remove old log files:' | tee -a $LOG_FIL
235 ls -lt $logPfix.*.log | tail -"$n" | tee -a $LOG_FIL
236 ls -t $logPfix.*.log | tail -"$n" | xargs rm -f
237 fi
238
239 . /etc/profile.d/modules.sh
240 module list >> $LOG_FIL 2>&1
241
242 #-- now really do something:
243 cd $TST_DIR
244
245 JOB_LIST=$TEST_LIST
246 sub_list_jobs
247 #echo " info-main: NB_SUB_JOBS='$NB_SUB_JOBS'" >> $LOG_FIL
248
249 if test $action = 1 ; then
250 NB_JOBS2GET=0
251 else
252 #date_str=`date +%Y%m%d`"_0"
253
254 echo "" | tee -a $LOG_FIL
255 #- build the mpack utility (from build_mpack in testreport):
256 MPACKDIR=`dirname $MPACK`
257 ( cd $MPACKDIR && ./configure && make ) > TTT.build_mpack.$$ 2>&1
258 RETVAL=$?
259 if test "x$RETVAL" != x0 ; then
260 echo "Error building the mpack tools at: $MPACK_DIR" | tee -a $LOG_FIL
261 if test -x $MPACK ; then
262 HAVE_MPACK=t
263 echo " use (old ?) executable:" | tee -a $LOG_FIL
264 ls -l $MPACK | tee -a $LOG_FIL
265 else
266 HAVE_MPACK=f
267 fi
268 else
269 if test -x $MPACK ; then
270 rm -f TTT.build_mpack.$$
271 HAVE_MPACK=t
272 echo "Building mpack: OK" | tee -a $LOG_FIL
273 else
274 echo " $MPACK not executable" | tee -a $LOG_FIL
275 HAVE_MPACK=f
276 fi
277 fi
278 echo "" >> $LOG_FIL
279
280 NB_JOBS2GET=$NB_SUB_JOBS
281 fi
282
283 #- when it's done, retrieve output and send e-mail
284 get_outp_back $NB_JOBS2GET
285 #echo " info-main: REJECTED='$REJECTED'" >> $LOG_FIL
286
287 if test $action = 4 -a "x$REJECTED" != x ; then
288
289 echo "Try 2nd round for fast-failed jobs: '$REJECTED'" | tee -a $LOG_FIL
290 JOB_LIST=$REJECTED
291 sub_list_jobs
292 #echo " info-main: NB_SUB_JOBS='$NB_SUB_JOBS'" >> $LOG_FIL
293
294 get_outp_back $NB_SUB_JOBS
295 #echo " info-main: REJECTED='$REJECTED'" >> $LOG_FIL
296
297 fi
298
299 #------------------------
300 exit 0

  ViewVC Help
Powered by ViewVC 1.1.22