/[MITgcm]/MITgcm/tools/example_scripts/ACESgrid/aces_test_all
ViewVC logotype

Diff of /MITgcm/tools/example_scripts/ACESgrid/aces_test_all

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph | View Patch Patch

revision 1.21 by jmc, Tue Aug 3 04:02:13 2010 UTC revision 1.22 by jmc, Sun May 8 11:27:20 2011 UTC
# Line 3  Line 3 
3  # $Header$  # $Header$
4  # $Name$  # $Name$
5    
6  # action =2 : submit test jobs ; =1 : get jobs output ; =3 : do both  #  submit list of jobs or get list of submitted jobs
7    sub_list_jobs()
8    {
9       # sub_list_jobs
10       # input : JOB_LIST
11       # output: NB_SUB_JOBS (+ status of jobs: M_{sfx}=submitted or skipped )
12    
13       NB_SUB_JOBS=0
14       for i in $JOB_LIST
15       do
16         case $i in
17           'mth') sfx='ifc_'${i}  ;;
18           'tuv') sfx='op64_'${i} ;;
19           'mp2') sfx=${i}'_mth'  ;;
20           'g77') sfx=${i}'_adm'  ;;
21               *) sfx=${i}'_mpi'  ;;
22         esac
23         if test -f $SUB_DIR/aces_test_$sfx ; then
24           JOB="tst_"$i
25           job_exist=`$QSTAT -a | grep $USER | grep $JOB | wc -l`
26           if [ $action -eq 2 ] ; then
27           #-- to get outp back:
28             if test "x_$job_exist" = x_0 ; then
29               echo "did not find any job: $JOB" | tee -a $LOG_FIL
30               eval M_$i='skipped'
31             else
32               echo -n "found a job: $JOB" | tee -a $LOG_FIL
33               $QSTAT -a | grep $USER | grep $JOB | tee -a $LOG_FIL
34               eval M_$i='submitted'
35               NB_SUB_JOBS=`expr $NB_SUB_JOBS + 1`
36             fi
37           else
38           #-- to submit job
39             if test "x_$job_exist" = x_0 ; then
40               echo -n " $JOB : " | tee -a $LOG_FIL
41               $QSUB $SUB_DIR/aces_test_$sfx | tee -a $LOG_FIL
42               eval M_$i='submitted'
43               NB_SUB_JOBS=`expr $NB_SUB_JOBS + 1`
44             else
45               echo $JOB | tee -a $LOG_FIL
46               $QSTAT -a | grep $USER | grep $JOB | tee -a $LOG_FIL
47               echo 'job already exist => skip this test' | tee -a $LOG_FIL
48               eval M_$i='skipped'
49             fi
50           fi
51         else
52           echo 'no file:' aces_test_$sfx 'to submit' | tee -a $LOG_FIL
53           eval M_$i='skipped'
54         fi
55       done
56       echo " info-sub-list: NB_SUB_JOBS='$NB_SUB_JOBS'" >> $LOG_FIL
57    }
58    
59    #  retrieve output when job is finished
60    get_outp_back()
61    {
62       # get_outp_back number_of_jobs
63       # input : JOB_LIST (+ status of jobs: M_{sfx}=submitted )
64       # output: REJECTED (= list of fast-to-fail jobs)
65       #        (+ change status of jobs to: M_{sfx}=done )
66    
67       nbJobs=$1
68       REJECTED=
69       minutes=0 ; freq=10
70       fsec=`expr $freq \* 60`
71       echo "Check every $freq mn for $nbJobs test(s) to finish" | tee -a $LOG_FIL
72       echo "- start at :" `date` | tee -a $LOG_FIL
73       while test $nbJobs != 0 ; do
74    
75         sleep $fsec
76         minutes=$(( $minutes + $freq ))
77    
78         for i in $JOB_LIST ; do
79    
80           eval comm=\$M_$i
81           if test $comm = 'submitted' ; then
82             JOB="tst_"$i
83             $QSTAT -a > $TMP_FIL
84             RETVAL=$?
85             ready_to_send=`grep $USER $TMP_FIL | grep $JOB | wc -l`
86             rm -f $TMP_FIL
87             if test "x$RETVAL" != x0 ; then
88               echo " $QSTAT returned with error code: $RETVAL" | tee -a $LOG_FIL
89               continue
90             fi
91    
92             if test "x_$ready_to_send" = x_0 ; then
93               run_dir=${TST_DIR}"/MITgcm_"$i"/verification"
94    #-      results output:
95               tdir=`ls -1 -t $run_dir | grep -v tr_out | grep '^tr_aces' | head -1`
96               if test "x$tdir" != x ; then
97                 #- check this is the right output
98                 chk=`echo $tdir | grep -c $today`
99                 if test $chk = '0' ; then
100                   curday=`date +%Y%m%d`
101                   chk=`echo $tdir | grep -c $curday`
102                 fi
103                 if test $chk = '0' ; then
104                   echo "tdir='$tdir'" | tee -a $LOG_FIL
105                   echo "Output do not match, no email sent for $i" | tee -a $LOG_FIL
106                   if [ $minutes -eq $freq ] ; then
107    #-         add to rejected list if it fails in less than "freq" minutes
108                     REJECTED="$REJECTED $i"
109                   fi
110                 else
111                   rm -f "/tmp/tr_aces-"$i".tar.gz"
112                   ( cd $run_dir ; tar -czf "/tmp/tr_aces-"$i".tar.gz" ./$tdir )
113                   if test "x$HAVE_MPACK" = xt ; then
114                     $MPACK -s MITgcm-test -m 3555000 "/tmp/tr_aces-"$i".tar.gz" jmc@mitgcm.org
115                     echo "Email sent for $i at:     " `date` | tee -a $LOG_FIL
116                   else
117                     echo " no email sent for $i (no mpack)" | tee -a $LOG_FIL
118                   fi
119                 fi
120               else
121                   echo " no output found for $i" | tee -a $LOG_FIL
122                   if [ $minutes -eq $freq ] ; then
123    #-         add to rejected list if it fails in less than "freq" minutes
124                     REJECTED="$REJECTED $i"
125                   fi
126               fi
127    #-      restart output:
128               tdir=`ls -1 -t $run_dir | grep -v tr_out | grep '^rs_aces' | head -1`
129               if test "x$tdir" != x ; then
130                 #- check this is the right output
131                 chk=`echo $tdir | grep -c $today`
132                 if test $chk = '0' ; then
133                   curday=`date +%Y%m%d`
134                   chk=`echo $tdir | grep -c $curday`
135                 fi
136                 if test $chk = '0' ; then
137                   echo "tdir='$tdir'" | tee -a $LOG_FIL
138                   echo "Restart output do not match, no email sent for $i" | tee -a $LOG_FIL
139                 else
140                   rm -f "/tmp/rs_aces-"$i".tar.gz"
141                   ( cd $run_dir ; tar -czf "/tmp/rs_aces-"$i".tar.gz" ./$tdir )
142                   if test "x$HAVE_MPACK" = xt ; then
143                     $MPACK -s MITgcm-test -m 3555000 "/tmp/rs_aces-"$i".tar.gz" jmc@mitgcm.org
144                     echo "Email sent for $i restart:" `date` | tee -a $LOG_FIL
145                   else
146                     echo " no email sent for $i restart (no mpack)" | tee -a $LOG_FIL
147                   fi
148                 fi
149               else
150                  echo " no restart output for $i" | tee -a $LOG_FIL
151               fi
152    #-      record successful sending
153               eval M_$i=done
154               nbJobs=`expr $nbJobs - 1`
155               chmod 644 output/tst_$i.std*
156             fi
157           fi
158         done
159    
160         #  "long" queue is 24hrs = 24*60min = 1440min
161         if test $minutes -gt 2160 ; then
162            hrs=$(( $minutes / 60 ));
163            echo "Time expired after $minutes minutes ($hrs hours)" | tee -a $LOG_FIL
164            echo ' ' $nbJobs '/' $NB_SUB_JOBS 'tests not yet finished' | tee -a $LOG_FIL
165            exit 1
166         fi
167    
168       done
169    
170       echo "Retrieving $NB_SUB_JOBS tests finish :" `date` | tee -a $LOG_FIL
171       echo " info-get-outp: REJECTED='$REJECTED'" >> $LOG_FIL
172    
173    }
174    
175    #---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|
176    #-- Sequential part of the script starts here:
177    #---------------------------------------------
178    
179    # action =1 : submit test jobs ; =2 : get jobs output ; =3 : do both
180  case $1 in  case $1 in
181   '-subOnly') action=2 ; shift ;;   '-subOnly') action=1 ; shift ;;
182   '-getOnly') action=1 ; shift ;;   '-getOnly') action=2 ; shift ;;
183   '-sub+get') action=3 ; shift ;;   '-sub+get') action=3 ; shift ;;
184            *) action=3 ;;   '-double' ) action=4 ; shift ;;
185              *) action=4 ;;
186  esac  esac
187  #echo "action= $action , Arg= $# "  #echo "action= $action , Arg= $# "
188    
# Line 16  today=`date +%Y%m%d` Line 190  today=`date +%Y%m%d`
190    
191  if test $# = 0  if test $# = 0
192  then  then
193    tst_list='g77 gnu ifc pgi adm mp2 mth tuv'    TEST_LIST='g77 gnu ifc pgi adm mp2 mth tuv'
194  else  else
195    tst_list=$*    TEST_LIST=$*
196  fi  fi
197    
198  # QSUB="/usr/local/bin/qsub"  # QSUB="/usr/local/bin/qsub"
# Line 53  module list >> $LOG_FIL 2>&1 Line 227  module list >> $LOG_FIL 2>&1
227  #-- now really do something:  #-- now really do something:
228  cd $TST_DIR  cd $TST_DIR
229    
230  nbtst=0    JOB_LIST=$TEST_LIST
231  for i in $tst_list    sub_list_jobs
232  do   #echo " info-main:     NB_SUB_JOBS='$NB_SUB_JOBS'" >> $LOG_FIL
   case $i in  
     'mth') sfx='ifc_'${i}  ;;  
     'tuv') sfx='op64_'${i} ;;  
     'mp2') sfx=${i}'_mth'  ;;  
     'g77') sfx=${i}'_adm'  ;;  
         *) sfx=${i}'_mpi'  ;;  
   esac  
   if test -f $SUB_DIR/aces_test_$sfx ; then  
     JOB="tst_"$i  
     job_exist=`$QSTAT -a | grep $USER | grep $JOB | wc -l`  
     if [ $action -ge 2 ] ; then  
     #-- to submit job  
       if test "x_$job_exist" = x_0 ; then  
         echo -n " $JOB : " | tee -a $LOG_FIL  
         $QSUB $SUB_DIR/aces_test_$sfx | tee -a $LOG_FIL  
         eval M_$i='submitted'  
         nbtst=`expr $nbtst + 1`  
       else  
         echo $JOB | tee -a $LOG_FIL  
         $QSTAT -a | grep $USER | grep $JOB | tee -a $LOG_FIL  
         echo 'job already exist => skip this test' | tee -a $LOG_FIL  
         eval M_$i='skipped'  
       fi  
     else  
     #-- to get outp back:  
       if test "x_$job_exist" = x_0 ; then  
         echo "did not find any job: $JOB" | tee -a $LOG_FIL  
         eval M_$i='skipped'  
       else  
         echo -n "found a job: $JOB" | tee -a $LOG_FIL  
         $QSTAT -a | grep $USER | grep $JOB | tee -a $LOG_FIL  
         eval M_$i='submitted'  
         nbtst=`expr $nbtst + 1`  
       fi  
     fi  
   else  
     echo 'no file:' aces_test_$sfx 'to submit' | tee -a $LOG_FIL  
     eval M_$i='skipped'  
   fi  
 done  
233    
234  if test $action = 2 ; then  if test $action = 1 ; then
235    count=0    NB_JOBS2GET=0
236  else  else
237  #date_str=`date +%Y%m%d`"_0"  #date_str=`date +%Y%m%d`"_0"
238    
   MPACKDIR=`dirname $MPACK`  
239  #- build the mpack utility (from build_mpack in testreport):  #- build the mpack utility (from build_mpack in testreport):
240      MPACKDIR=`dirname $MPACK`
241    ( cd $MPACKDIR && ./configure && make ) > TTT.build_mpack.$$ 2>&1    ( cd $MPACKDIR && ./configure && make ) > TTT.build_mpack.$$ 2>&1
242    RETVAL=$?    RETVAL=$?
243    if test "x$RETVAL" != x0 ; then    if test "x$RETVAL" != x0 ; then
# Line 128  else Line 262  else
262    fi    fi
263    echo    echo
264    
265    count=$nbtst    NB_JOBS2GET=$NB_SUB_JOBS
266  fi  fi
267    
268  #- when it's done, retrieve output and send e-mail  #- when it's done, retrieve output and send e-mail
269  minutes=0 ; freq=10    get_outp_back $NB_JOBS2GET
270  fsec=`expr $freq \* 60`   #echo " info-main:     REJECTED='$REJECTED'" >> $LOG_FIL
 echo "Check every $freq mn for $count test(s) to finish" | tee -a $LOG_FIL  
 echo "- start at :" `date` | tee -a $LOG_FIL  
 while test $count != 0 ; do  
   
     sleep $fsec  
     minutes=$(( $minutes + $freq ))  
   
     for i in $tst_list ; do  
   
       eval comm=\$M_$i  
       if test $comm = 'submitted' ; then  
         JOB="tst_"$i  
         $QSTAT -a > $TMP_FIL  
         RETVAL=$?  
         ready_to_send=`grep $USER $TMP_FIL | grep $JOB | wc -l`  
         rm -f $TMP_FIL  
         if test "x$RETVAL" != x0 ; then  
            echo " $QSTAT returned with error code: $RETVAL" | tee -a $LOG_FIL  
            continue  
         fi  
271    
272          if test "x_$ready_to_send" = x_0 ; then  if test $action = 4 -a "x$REJECTED" != x ; then
           run_dir=${TST_DIR}"/MITgcm_"$i"/verification"  
 #-      results output:  
           tdir=`ls -1 -t $run_dir | grep -v tr_out | grep '^tr_aces' | head -1`  
           if test "x$tdir" != x ; then  
             #- check this is the right output  
             chk=`echo $tdir | grep -c $today`  
             if test $chk = '0' ; then  
               curday=`date +%Y%m%d`  
               chk=`echo $tdir | grep -c $curday`  
             fi  
             if test $chk = '0' ; then  
               echo "tdir='$tdir'" | tee -a $LOG_FIL  
               echo "Output do not match, no email sent for $i" | tee -a $LOG_FIL  
             else  
               rm -f "/tmp/tr_aces-"$i".tar.gz"  
               ( cd $run_dir ; tar -czf "/tmp/tr_aces-"$i".tar.gz" ./$tdir )  
               if test "x$HAVE_MPACK" = xt ; then  
                 $MPACK -s MITgcm-test -m 3555000 "/tmp/tr_aces-"$i".tar.gz" jmc@mitgcm.org  
                 echo "Email sent for $i at:     " `date` | tee -a $LOG_FIL  
               else  
                 echo " no email sent for $i (no mpack)" | tee -a $LOG_FIL  
               fi  
             fi  
           else  
               echo " no output found for $i" | tee -a $LOG_FIL  
           fi  
 #-      restart output:  
           tdir=`ls -1 -t $run_dir | grep -v tr_out | grep '^rs_aces' | head -1`  
           if test "x$tdir" != x ; then  
             #- check this is the right output  
             chk=`echo $tdir | grep -c $today`  
             if test $chk = '0' ; then  
               curday=`date +%Y%m%d`  
               chk=`echo $tdir | grep -c $curday`  
             fi  
             if test $chk = '0' ; then  
               echo "tdir='$tdir'" | tee -a $LOG_FIL  
               echo "Restart output do not match, no email sent for $i" | tee -a $LOG_FIL  
             else  
               rm -f "/tmp/rs_aces-"$i".tar.gz"  
               ( cd $run_dir ; tar -czf "/tmp/rs_aces-"$i".tar.gz" ./$tdir )  
               if test "x$HAVE_MPACK" = xt ; then  
                 $MPACK -s MITgcm-test -m 3555000 "/tmp/rs_aces-"$i".tar.gz" jmc@mitgcm.org  
                 echo "Email sent for $i restart:" `date` | tee -a $LOG_FIL  
               else  
                 echo " no email sent for $i restart (no mpack)" | tee -a $LOG_FIL  
               fi  
             fi  
           else  
               echo " no restart output for $i" | tee -a $LOG_FIL  
           fi  
 #-      record successful sending  
           eval M_$i=done  
           count=`expr $count - 1`  
           chmod 644 output/tst_$i.std*  
         fi  
       fi  
     done  
273    
274      #  "long" queue is 24hrs = 24*60min = 1440min    echo "Try 2nd round for fast-failed jobs: '$REJECTED'" | tee -a $LOG_FIL
275      if test $minutes -gt 2160 ; then    JOB_LIST=$REJECTED
276          hrs=$(( $minutes / 60 ));    sub_list_jobs
277          echo "Time expired after $minutes minutes ($hrs hours)" | tee -a $LOG_FIL   #echo " info-main:     NB_SUB_JOBS='$NB_SUB_JOBS'" >> $LOG_FIL
         echo ' ' $count '/' $nbtst 'tests not yet finished' | tee -a $LOG_FIL  
         exit 1  
     fi  
278    
279  done    get_outp_back $NB_SUB_JOBS
280     #echo " info-main:     REJECTED='$REJECTED'" >> $LOG_FIL
281    
282  echo "Retrieving $nbtst tests finish :" `date` | tee -a $LOG_FIL  fi
 exit 0  
283    
284    #------------------------
285    exit 0

Legend:
Removed from v.1.21  
changed lines
  Added in v.1.22

  ViewVC Help
Powered by ViewVC 1.1.22