/[MITgcm]/MITgcm/tools/example_scripts/ACESgrid/aces_test_all
ViewVC logotype

Annotation of /MITgcm/tools/example_scripts/ACESgrid/aces_test_all

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.29 - (hide annotations) (download)
Fri Aug 17 17:11:14 2012 UTC (11 years, 9 months ago) by jmc
Branch: MAIN
CVS Tags: checkpoint64i, checkpoint64h, checkpoint64j, checkpoint64a, checkpoint64c, checkpoint64b, checkpoint64e, checkpoint64d, checkpoint64g, checkpoint64f, checkpoint63s, checkpoint64
Changes since 1.28: +3 -2 lines
add TLM test on 32-bit (old) aces cluster, using open64 + devel & gsl

1 jmc 1.1 #! /usr/bin/env bash
2    
3 jmc 1.29 # $Header: /u/gcmpack/MITgcm/tools/example_scripts/ACESgrid/aces_test_all,v 1.28 2012/03/03 15:38:39 jmc Exp $
4 jmc 1.1 # $Name: $
5    
6 jmc 1.22 # submit list of jobs or get list of submitted jobs
7     sub_list_jobs()
8     {
9     # sub_list_jobs
10     # input : JOB_LIST
11     # output: NB_SUB_JOBS (+ status of jobs: M_{sfx}=submitted or skipped )
12    
13     NB_SUB_JOBS=0
14     for i in $JOB_LIST
15     do
16     case $i in
17     'mth') sfx='ifc_'${i} ;;
18     'tuv') sfx='op64_'${i} ;;
19 jmc 1.28 'opa') sfx='op64_adm' ;;
20 jmc 1.29 'tlm') sfx='op64_'${i} ;;
21 jmc 1.28 'g77') sfx=${i}'_adm' ;;
22 jmc 1.22 'mp2') sfx=${i}'_mth' ;;
23     *) sfx=${i}'_mpi' ;;
24     esac
25     if test -f $SUB_DIR/aces_test_$sfx ; then
26     JOB="tst_"$i
27     job_exist=`$QSTAT -a | grep $USER | grep $JOB | wc -l`
28     if [ $action -eq 2 ] ; then
29     #-- to get outp back:
30     if test "x_$job_exist" = x_0 ; then
31     echo "did not find any job: $JOB" | tee -a $LOG_FIL
32     eval M_$i='skipped'
33     else
34     echo -n "found a job: $JOB" | tee -a $LOG_FIL
35     $QSTAT -a | grep $USER | grep $JOB | tee -a $LOG_FIL
36     eval M_$i='submitted'
37     NB_SUB_JOBS=`expr $NB_SUB_JOBS + 1`
38     fi
39     else
40     #-- to submit job
41     if test "x_$job_exist" = x_0 ; then
42 jmc 1.24 #-- update code if not done within submitted script
43     doUp=`grep -c '^ *checkOut=0' $SUB_DIR/aces_test_$sfx`
44     if test ! -e $TST_DIR/MITgcm_$i ; then doUp=0; fi
45     if [ $doUp -ge 1 ] ; then
46     echo "" >> $LOG_FIL
47     echo " update $TST_DIR/MITgcm_$i :" | tee -a $LOG_FIL
48     ( cd $TST_DIR/MITgcm_$i
49     cvs -q -d :pserver:cvsanon@mitgcm.org:/u/gcmpack update -P -d
50     ) >> $LOG_FIL 2>&1
51     fi
52 jmc 1.22 echo -n " $JOB : " | tee -a $LOG_FIL
53     $QSUB $SUB_DIR/aces_test_$sfx | tee -a $LOG_FIL
54     eval M_$i='submitted'
55     NB_SUB_JOBS=`expr $NB_SUB_JOBS + 1`
56     else
57     echo $JOB | tee -a $LOG_FIL
58     $QSTAT -a | grep $USER | grep $JOB | tee -a $LOG_FIL
59     echo 'job already exist => skip this test' | tee -a $LOG_FIL
60     eval M_$i='skipped'
61     fi
62     fi
63     else
64     echo 'no file:' aces_test_$sfx 'to submit' | tee -a $LOG_FIL
65     eval M_$i='skipped'
66     fi
67     done
68     echo " info-sub-list: NB_SUB_JOBS='$NB_SUB_JOBS'" >> $LOG_FIL
69     }
70    
71     # retrieve output when job is finished
72     get_outp_back()
73     {
74     # get_outp_back number_of_jobs
75     # input : JOB_LIST (+ status of jobs: M_{sfx}=submitted )
76     # output: REJECTED (= list of fast-to-fail jobs)
77     # (+ change status of jobs to: M_{sfx}=done )
78    
79     nbJobs=$1
80     REJECTED=
81     minutes=0 ; freq=10
82     fsec=`expr $freq \* 60`
83     echo "Check every $freq mn for $nbJobs test(s) to finish" | tee -a $LOG_FIL
84     echo "- start at :" `date` | tee -a $LOG_FIL
85     while test $nbJobs != 0 ; do
86    
87     sleep $fsec
88     minutes=$(( $minutes + $freq ))
89    
90     for i in $JOB_LIST ; do
91    
92     eval comm=\$M_$i
93     if test $comm = 'submitted' ; then
94     JOB="tst_"$i
95     $QSTAT -a > $TMP_FIL
96     RETVAL=$?
97     ready_to_send=`grep $USER $TMP_FIL | grep $JOB | wc -l`
98     rm -f $TMP_FIL
99     if test "x$RETVAL" != x0 ; then
100     echo " $QSTAT returned with error code: $RETVAL" | tee -a $LOG_FIL
101     continue
102     fi
103    
104     if test "x_$ready_to_send" = x_0 ; then
105     run_dir=${TST_DIR}"/MITgcm_"$i"/verification"
106     #- results output:
107 jmc 1.23 tdir=`ls -1 -t $run_dir | grep -v tr_out | grep "^tr_$outPfix" | head -1`
108 jmc 1.22 if test "x$tdir" != x ; then
109     #- check this is the right output
110     chk=`echo $tdir | grep -c $today`
111     if test $chk = '0' ; then
112     curday=`date +%Y%m%d`
113     chk=`echo $tdir | grep -c $curday`
114     fi
115     if test $chk = '0' ; then
116     echo "tdir='$tdir'" | tee -a $LOG_FIL
117     echo "Output do not match, no email sent for $i" | tee -a $LOG_FIL
118     if [ $minutes -eq $freq ] ; then
119     #- add to rejected list if it fails in less than "freq" minutes
120     REJECTED="$REJECTED $i"
121     fi
122     else
123 jmc 1.23 rm -f "/tmp/tr_$outPfix-"$i".tar.gz"
124     ( cd $run_dir ; tar -czf "/tmp/tr_$outPfix-"$i".tar.gz" ./$tdir )
125 jmc 1.22 if test "x$HAVE_MPACK" = xt ; then
126 jmc 1.23 $MPACK -s MITgcm-test -m 3555000 "/tmp/tr_$outPfix-"$i".tar.gz" jmc@mitgcm.org
127 jmc 1.22 echo "Email sent for $i at: " `date` | tee -a $LOG_FIL
128     else
129     echo " no email sent for $i (no mpack)" | tee -a $LOG_FIL
130     fi
131     fi
132     else
133     echo " no output found for $i" | tee -a $LOG_FIL
134     if [ $minutes -eq $freq ] ; then
135     #- add to rejected list if it fails in less than "freq" minutes
136     REJECTED="$REJECTED $i"
137     fi
138     fi
139     #- restart output:
140 jmc 1.23 tdir=`ls -1 -t $run_dir | grep -v tr_out | grep "^rs_$outPfix" | head -1`
141 jmc 1.22 if test "x$tdir" != x ; then
142     #- check this is the right output
143     chk=`echo $tdir | grep -c $today`
144     if test $chk = '0' ; then
145     curday=`date +%Y%m%d`
146     chk=`echo $tdir | grep -c $curday`
147     fi
148     if test $chk = '0' ; then
149     echo "tdir='$tdir'" | tee -a $LOG_FIL
150     echo "Restart output do not match, no email sent for $i" | tee -a $LOG_FIL
151     else
152 jmc 1.23 rm -f "/tmp/rs_$outPfix-"$i".tar.gz"
153     ( cd $run_dir ; tar -czf "/tmp/rs_$outPfix-"$i".tar.gz" ./$tdir )
154 jmc 1.22 if test "x$HAVE_MPACK" = xt ; then
155 jmc 1.23 $MPACK -s MITgcm-test -m 3555000 "/tmp/rs_$outPfix-"$i".tar.gz" jmc@mitgcm.org
156 jmc 1.22 echo "Email sent for $i restart:" `date` | tee -a $LOG_FIL
157     else
158     echo " no email sent for $i restart (no mpack)" | tee -a $LOG_FIL
159     fi
160     fi
161     else
162     echo " no restart output for $i" | tee -a $LOG_FIL
163     fi
164     #- record successful sending
165     eval M_$i=done
166     nbJobs=`expr $nbJobs - 1`
167     chmod 644 output/tst_$i.std*
168     fi
169     fi
170     done
171    
172     # "long" queue is 24hrs = 24*60min = 1440min
173     if test $minutes -gt 2160 ; then
174     hrs=$(( $minutes / 60 ));
175     echo "Time expired after $minutes minutes ($hrs hours)" | tee -a $LOG_FIL
176     echo ' ' $nbJobs '/' $NB_SUB_JOBS 'tests not yet finished' | tee -a $LOG_FIL
177     exit 1
178     fi
179    
180     done
181    
182     echo "Retrieving $NB_SUB_JOBS tests finish :" `date` | tee -a $LOG_FIL
183     echo " info-get-outp: REJECTED='$REJECTED'" >> $LOG_FIL
184    
185     }
186    
187     #---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|
188     #-- Sequential part of the script starts here:
189     #---------------------------------------------
190    
191     # action =1 : submit test jobs ; =2 : get jobs output ; =3 : do both
192 jmc 1.9 case $1 in
193 jmc 1.22 '-subOnly') action=1 ; shift ;;
194     '-getOnly') action=2 ; shift ;;
195 jmc 1.9 '-sub+get') action=3 ; shift ;;
196 jmc 1.22 '-double' ) action=4 ; shift ;;
197     *) action=4 ;;
198 jmc 1.9 esac
199     #echo "action= $action , Arg= $# "
200    
201 jmc 1.16 today=`date +%Y%m%d`
202 jmc 1.9
203 jmc 1.1 if test $# = 0
204     then
205 jmc 1.29 TEST_LIST='g77 gnu ifc pgi adm mp2 mth tuv opa tlm'
206 jmc 1.1 else
207 jmc 1.22 TEST_LIST=$*
208 jmc 1.1 fi
209    
210     # QSUB="/usr/local/bin/qsub"
211     # QSTAT="/usr/local/bin/qstat"
212 jmc 1.24 # TST_DIR="/mit/jm_c/test_acesgrid"
213 jmc 1.23 # logPfix='tst_submit'
214     # outPfix='acesgrid'
215 jmc 1.1 QSUB=qsub
216     QSTAT=qstat
217 jmc 1.25 TST_DIR="/home/jmc/test_aces"
218 jmc 1.23 logPfix='tst_all'
219     outPfix='aces'
220    
221 jmc 1.4 MPACK="MITgcm_tools/mpack-1.6/mpack"
222 jmc 1.14 SUB_DIR="MITgcm_tools/example_scripts/ACESgrid"
223 jmc 1.16 TMP_FIL="$TST_DIR/output/TTT.$$"
224 jmc 1.23 LOG_FIL="$TST_DIR/output/$logPfix."`date +%m%d`".log"
225 jmc 1.17
226 jmc 1.18 #-- clean up old log files and start a new one:
227     cd $TST_DIR/output
228    
229 jmc 1.23 rm -f $logPfix.*.log_bak
230 jmc 1.18 if test -f $LOG_FIL ; then mv -f $LOG_FIL ${LOG_FIL}_bak ; fi
231 jmc 1.17 echo -n '-- Starting: ' | tee -a $LOG_FIL
232     date | tee -a $LOG_FIL
233 jmc 1.16
234 jmc 1.23 n=$(( `ls $logPfix.*.log | wc -l` - 10 ))
235 jmc 1.16 if test $n -gt 0 ; then
236     echo ' remove old log files:' | tee -a $LOG_FIL
237 jmc 1.23 ls -lt $logPfix.*.log | tail -"$n" | tee -a $LOG_FIL
238     ls -t $logPfix.*.log | tail -"$n" | xargs rm -f
239 jmc 1.16 fi
240    
241     . /etc/profile.d/modules.sh
242 jmc 1.17 module list >> $LOG_FIL 2>&1
243 jmc 1.16
244     #-- now really do something:
245 jmc 1.14 cd $TST_DIR
246 jmc 1.1
247 jmc 1.22 JOB_LIST=$TEST_LIST
248     sub_list_jobs
249     #echo " info-main: NB_SUB_JOBS='$NB_SUB_JOBS'" >> $LOG_FIL
250 jmc 1.1
251 jmc 1.22 if test $action = 1 ; then
252     NB_JOBS2GET=0
253 jmc 1.9 else
254 jmc 1.4 #date_str=`date +%Y%m%d`"_0"
255 jmc 1.1
256 jmc 1.24 echo "" | tee -a $LOG_FIL
257 jmc 1.22 #- build the mpack utility (from build_mpack in testreport):
258 jmc 1.9 MPACKDIR=`dirname $MPACK`
259     ( cd $MPACKDIR && ./configure && make ) > TTT.build_mpack.$$ 2>&1
260     RETVAL=$?
261     if test "x$RETVAL" != x0 ; then
262 jmc 1.17 echo "Error building the mpack tools at: $MPACK_DIR" | tee -a $LOG_FIL
263 jmc 1.7 if test -x $MPACK ; then
264     HAVE_MPACK=t
265 jmc 1.20 echo " use (old ?) executable:" | tee -a $LOG_FIL
266 jmc 1.17 ls -l $MPACK | tee -a $LOG_FIL
267 jmc 1.7 else
268     HAVE_MPACK=f
269     fi
270 jmc 1.9 else
271 jmc 1.7 if test -x $MPACK ; then
272     rm -f TTT.build_mpack.$$
273     HAVE_MPACK=t
274 jmc 1.24 echo "Building mpack: OK" | tee -a $LOG_FIL
275 jmc 1.7 else
276 jmc 1.17 echo " $MPACK not executable" | tee -a $LOG_FIL
277 jmc 1.7 HAVE_MPACK=f
278     fi
279 jmc 1.9 fi
280 jmc 1.24 echo "" >> $LOG_FIL
281 jmc 1.9
282 jmc 1.22 NB_JOBS2GET=$NB_SUB_JOBS
283 jmc 1.7 fi
284    
285 jmc 1.9 #- when it's done, retrieve output and send e-mail
286 jmc 1.22 get_outp_back $NB_JOBS2GET
287     #echo " info-main: REJECTED='$REJECTED'" >> $LOG_FIL
288    
289     if test $action = 4 -a "x$REJECTED" != x ; then
290 jmc 1.1
291 jmc 1.26 echo "" >> $LOG_FIL
292 jmc 1.22 echo "Try 2nd round for fast-failed jobs: '$REJECTED'" | tee -a $LOG_FIL
293     JOB_LIST=$REJECTED
294     sub_list_jobs
295     #echo " info-main: NB_SUB_JOBS='$NB_SUB_JOBS'" >> $LOG_FIL
296 jmc 1.15
297 jmc 1.26 echo "" >> $LOG_FIL
298 jmc 1.22 get_outp_back $NB_SUB_JOBS
299     #echo " info-main: REJECTED='$REJECTED'" >> $LOG_FIL
300 jmc 1.1
301 jmc 1.22 fi
302 jmc 1.1
303 jmc 1.22 #------------------------
304 jmc 1.15 exit 0

  ViewVC Help
Powered by ViewVC 1.1.22