--- MITgcm/verification/testreport 2011/01/14 23:55:00 1.154 +++ MITgcm/verification/testreport 2011/03/29 02:20:22 1.159 @@ -1,6 +1,6 @@ #! /usr/bin/env bash # -# $Header: /home/ubuntu/mnt/e9_copy/MITgcm/verification/testreport,v 1.154 2011/01/14 23:55:00 jmc Exp $ +# $Header: /home/ubuntu/mnt/e9_copy/MITgcm/verification/testreport,v 1.159 2011/03/29 02:20:22 jmc Exp $ # $Name: $ # @@ -14,6 +14,7 @@ echo " (-mth) run multi-threaded (using eedata.mth)" echo " (-mpi) use MPI to compile and run on 2 processors" echo " (-MPI) NUMBER use MPI to compile and run on max NUMBER procs" + echo " (-mfile|-mf) STRING MPI: file with list of possible machines to run on" echo " (-ieee/-noieee) if possible, use IEEE compiler flags" echo " (DEF=\"-ieee\")" echo " (-gsl) compile with \"-gsl\" flag" @@ -41,18 +42,20 @@ echo " (DEF=\"make\")" echo " (-odir) STRING used to build output directory name" echo " (DEF=\"hostname\")" - echo " (-ptracers|-ptr) STRING specify which ptracers to test" - echo " (DEF=\"1 2 3 4 5\")" +# echo " (-ptracers|-ptr) STRING specify which ptracers to test" +# echo " (DEF=\"1 2 3 4 5\")" echo " (-match) NUMBER Matching Criteria (number of digits)" echo " (DEF=\"$MATCH_CRIT\")" echo " (-j) JOBS use \"make -j JOBS\" for parallel builds" - echo " (-clean) *ONLY* run \"make CLEAN\"" + echo " (-clean) *ONLY* run \"make CLEAN\" & clean run-dir" echo " (-norun|-nr) skip the \"runmodel\" stage (stop after make)" echo " (-runonly|-ro) *ONLY* run stage (=\"-quick\" without make)" echo " (-quick|-q) same as \"-nogenmake -noclean -nodepend\"" echo " (-nogenmake|-ng) skip the genmake stage" echo " (-noclean|-nc) skip the \"make clean\" stage" echo " (-nodepend|-nd) skip the \"make depend\" stage" + echo " (-postclean|-pc) after each exp. test, clean build-dir & run-dir" + echo " (-deloutp|-do) delete output files after successful run" echo " (-deldir|-dd) on success, delete the output directory" echo " (-ts) provide timing information per timestep" echo " (-papis) provide MFlop/s per timestep using PAPI" @@ -235,12 +238,6 @@ listVar=" $sVar "`echo "$listVar " | sed "s/ $sVar / /g"` fi if [ $debug -gt 0 ]; then echo "testoutput_run: listVar(M)='$listVar'" 1>&2 ; fi - #- report to this experiment local summary file --- - echo "MACH='$MACH'" > $locDIR"/summary.txt" - echo "UNAMEA='$UNAMEA'" >> $locDIR"/summary.txt" - echo "DATE='$DATE'" >> $locDIR"/summary.txt" - grep '(PID\.TID 0000\.0001) n.. =' $1/$2/$OUTPUTFILE \ - | sed 's/(PID.TID 0000.0001) //' >> $locDIR"/summary.txt" echo "listVar='$listVar'" >> $locDIR"/summary.txt" #--- allargs="" @@ -463,13 +460,14 @@ { # makemodel directory ( + mk_fail=0 if test "x$NOMAKE" = xt ; then cd $1; if test -x $EXECUTABLE ; then echo "make skipped!" else echo "no executable!" - return 2 + mk_fail=3 fi else cd $1; @@ -492,12 +490,23 @@ echo failed cp make.tr_log genmake.log genmake.tr_log $CDIR rm -f $EXECUTABLE - return 1 + mk_fail=1 else echo successful fi + else + echo "no Makefile !" + mk_fail=2 fi fi + if test "x$ADM" = xt -a -f taf_ad.log ; then + head -1 taf_ad.log >> $CDIR"/summary.txt" + nerr=`grep -c 'TAF *.* ERROR ' taf_ad.log` + nwar=`grep -c 'TAF RECOMPUTATION *.* WARNING ' taf_ad.log` + echo " TAF reports $nerr Errors and $nwar Recomputation Warnings" \ + >> $CDIR"/summary.txt" + fi + if test $mk_fail != 0 ; then return $mk_fail ; fi ) } @@ -521,20 +530,24 @@ #- find the largest divisor of input_file proc Nb, but not larger than $np pp=0 - for i in `seq 1 $px` ; do + i=1 + while [ $i -le $px ] ; do if [ `expr $px % $i` -eq 0 ] ; then - for j in `seq 1 $py` ; do - if [ `expr $py % $j` -eq 0 ] ; then - ij=`expr $i \* $j` - if [ $ij -gt $pp ] ; then - if [ $ij -le $np ] ; then - ix=$i ; jy=$j ; pp=$ij - #echo " ix,jy= $ix,$jy" - fi - fi - fi - done + j=1 + while [ $j -le $py ] ; do + if [ `expr $py % $j` -eq 0 ] ; then + ij=`expr $i \* $j` + if [ $ij -gt $pp ] ; then + if [ $ij -le $np ] ; then + ix=$i ; jy=$j ; pp=$ij + #echo " ix,jy= $ix,$jy" + fi + fi + fi + j=`expr $j + 1` + done fi + i=`expr $i + 1` done #- create new SIZE.h type file: @@ -675,6 +688,15 @@ ( cd $1 printf 'runmodel in %s ... ' $1 + if test "x$MPI" != x0 ; then + #- adjust the MPI run command with the right number of Procs + #echo '' ; echo " COMMAND='$COMMAND'" + COMMAND=`echo $COMMAND | sed "s/ TR_NPROC / $LOC_NPROC /"` + if test "x$MPI_MFILE" != x ; then + COMMAND=`echo $COMMAND | sed "s/ TR_MFILE / ..\/..\/$LOC_MFILE /"` + fi + #echo " COMMAND='$COMMAND'" + fi if test -L $EXECUTABLE ; then if test -x "../"$builddir"/"$EXECUTABLE ; then cmp $EXECUTABLE "../"$builddir"/"$EXECUTABLE > /dev/null 2>&1 @@ -686,48 +708,47 @@ echo " link" $EXECUTABLE "from dir ../"$builddir > run.log_tmp ln -sf "../"$builddir"/"$EXECUTABLE . fi - if test "x$MPI" != x0 ; then - #- adjust the MPI run command with the right number of Procs - #echo "COMMAND='$COMMAND'" - COMMAND=`echo $COMMAND | sed "s/-np * XX /-np $NPROC_MPI /"` - #echo "COMMAND='$COMMAND'" - fi if test ! -x $EXECUTABLE ; then - rm -f $RUNLOG ; touch $RUNLOG - if test -f run.log_tmp ; then cat run.log_tmp >> $RUNLOG ; fi - echo " no executable:" $EXECUTABLE >> $RUNLOG - RETVAL=8 - ENDVAL=-1 + rm -f $RUNLOG ; touch $RUNLOG + if test -f run.log_tmp ; then cat run.log_tmp >> $RUNLOG ; fi + echo " no executable:" $EXECUTABLE >> $RUNLOG + RETVAL=8 + ENDVAL=-1 else - if test ! -f $OUTPUTFILE -o $OUTPUTFILE -ot $EXECUTABLE ; then - # output do not exist or is older than executable: - rm -f $RUNLOG ; touch $RUNLOG - if test -f run.log_tmp ; then cat run.log_tmp >> $RUNLOG ; fi - ( eval $COMMAND ) >> $RUNLOG 2>&1 - RETVAL=$? - else - RETVAL=0 - if test -f $RUNLOG ; then - if test -f run.log_tmp ; then cat run.log_tmp >> $RUNLOG ; fi - echo "---------->> $OUTPUTFILE is up to date " >> $RUNLOG 2>&1 - else - touch $RUNLOG - if test -f run.log_tmp ; then cat run.log_tmp >> $RUNLOG ; fi - echo "---------->> $OUTPUTFILE is up to date " >> $RUNLOG 2>&1 - echo " no previous $RUNLOG: assume NORMAL END" >> $RUNLOG 2>&1 - fi + if test ! -f $OUTPUTFILE -o $OUTPUTFILE -ot $EXECUTABLE ; then + # output do not exist or is older than executable: + rm -f $RUNLOG ; touch $RUNLOG + if test -f run.log_tmp ; then cat run.log_tmp >> $RUNLOG ; fi + ( eval $COMMAND ) >> $RUNLOG 2>&1 + RETVAL=$? + ENDVAL=`tail $OUTPUTFILE | grep -c 'PROGRAM MAIN: Execution ended Normally'` + if [ $POSTCLEAN -eq 1 -a $ENDVAL -gt 0 ] ; then + find . -name "*.meta" -exec rm {} \; + find . -name "*.data" -exec rm {} \; + rm -rf mnc_test_* fi - #ENDVAL=`cat $RUNLOG | grep -v 'ABNORMAL END' | grep -c 'NORMAL END'` + else + RETVAL=0 ENDVAL=`tail $OUTPUTFILE | grep -c 'PROGRAM MAIN: Execution ended Normally'` + touch $RUNLOG + if test -f run.log_tmp ; then cat run.log_tmp >> $RUNLOG ; fi + echo "---------->> $OUTPUTFILE is up to date " >> $RUNLOG 2>&1 + fi fi rm -f run.log_tmp + #- in all cases where OutputFile exists, report SIZE (and AD time) + if test -f $OUTPUTFILE ; then + grep '(PID\.TID 0000\.0001) n.. =' $OUTPUTFILE \ + | sed 's/(PID.TID 0000.0001) //' >> $CDIR"/summary.txt" + if test "x$ADM" = xt ; then + grep -A3 'Seconds in section "ALL' $OUTPUTFILE >> $CDIR"/summary.txt" + fi + fi + if test -s STDERR.0000 ; then cp STDERR.0000 $CDIR"/STDERR.0000" ; fi if [ $RETVAL -eq 0 -a $ENDVAL -gt 0 ] ; then echo successful printf '=> output from running in %s :\n' $1 1>&2 tail $RUNLOG | sed 's/^.*/> &/g' 1>&2 - # === Reduce the size of the testing emails! - #cp $OUTPUTFILE $CDIR"/"$OUTPUTFILE - if test -s STDERR.0000 ; then cp STDERR.0000 $CDIR"/STDERR.0000" ; fi return 0 elif [ $RETVAL -ne 0 -a $ENDVAL -gt 0 ] ; then #-- for some weird cases (run is finihed but with error code) @@ -740,7 +761,6 @@ printf '=> output from running in %s :\n' $1 1>&2 tail $RUNLOG | sed 's/^.*/> &/g' 1>&2 cp $RUNLOG $CDIR"/"$RUNLOG - if test -s STDERR.0000 ; then cp STDERR.0000 $CDIR"/STDERR.0000" ; fi return 1 fi ) @@ -875,7 +895,6 @@ # Default properties debug=0 verbose=1 -clean=0 IEEE=true if test "x$MITGCM_IEEE" != x ; then @@ -890,7 +909,7 @@ NOGENMAKE=f NOCLEAN=f NODEPEND=f -POSTCLEAN=f +POSTCLEAN=0 BASH= OPTFILE=NONE @@ -910,6 +929,7 @@ fi JOBS= MPI=0 +MPI_MFILE= MULTI_THREAD=f OUTDIR= DELDIR= @@ -939,62 +959,38 @@ case $ac_option in - -help | --help | -h | --h) - usage ;; - - -optfile | --optfile | -of | --of) - ac_prev=OPTFILE ;; - -optfile=* | --optfile=* | -of=* | --of=*) - OPTFILE=$ac_optarg ;; - - -addr | --addr | -a | --a) - ac_prev=ADDRESSES ;; - -addr=* | --addr=* | -a=* | --a=*) - ADDRESSES=$ac_optarg ;; - -mpackdir | --mpackdir | -mpd | --mpd) - ac_prev=MPACKDIR ;; - -mpackdir=* | --mpackdir=* | -mpd=* | --mpd=*) - MPACKDIR=$ac_optarg ;; - - -tdir | --tdir | -t | --t) - ac_prev=TESTDIRS ;; - -tdir=* | --tdir=* | -t=* | --t=*) - TESTDIRS=$ac_optarg ;; - - -skipdir | --skipdir | -skd | --skd) - ac_prev=SKIPDIRS ;; - -skipdir=* | --skipdir=* | -skd=* | --skd=*) - SKIPDIRS=$ac_optarg ;; - - -bash | --bash | -b | --b) - ac_prev=BASH ;; - -bash=* | --bash=* | -b=* | --b=*) - BASH=$ac_optarg ;; - - -command | --command | -c | --c) - ac_prev=COMMAND ;; - -command=* | --command=* | -c=* | --c=*) - COMMAND=$ac_optarg ;; - - -makedepend | --makedepend | -md | --md) - ac_prev=MKDEPEND ;; - -makedepend=* | --makedepend=* | -md=* | --md=*) - MKDEPEND=$ac_optarg ;; - - -make | --make | -m | --m) - ac_prev=MAKE ;; - -make=* | --make=* | -m=* | --m=*) - MAKE=$ac_optarg ;; - - -odir | --odir) - ac_prev=OUTDIR ;; - -odir=* | --odir=*) - OUTDIR=$ac_optarg ;; - - -ptracers | --ptracers | -ptr | --ptr) - ac_prev=PTRACERS_NUM ;; - -ptracers=* | --ptracers=* | -ptr=* | --ptr=*) - PTRACERS_NUM=$ac_optarg ;; + -help | --help | -h | --h) usage ;; + + -optfile | --optfile | -of | --of) ac_prev=OPTFILE ;; + -optfile=* | --optfile=* | -of=* | --of=*) OPTFILE=$ac_optarg ;; + + -addr | --addr | -a | --a) ac_prev=ADDRESSES ;; + -addr=* | --addr=* | -a=* | --a=*) ADDRESSES=$ac_optarg ;; + -mpackdir | --mpackdir | -mpd | --mpd) ac_prev=MPACKDIR ;; + -mpackdir=* | --mpackdir=* | -mpd=* | --mpd=*) MPACKDIR=$ac_optarg ;; + + -tdir | --tdir | -t | --t) ac_prev=TESTDIRS ;; + -tdir=* | --tdir=* | -t=* | --t=*) TESTDIRS=$ac_optarg ;; + -skipdir | --skipdir | -skd | --skd) ac_prev=SKIPDIRS ;; + -skipdir=* | --skipdir=* | -skd=* | --skd=*) SKIPDIRS=$ac_optarg ;; + + -bash | --bash | -b | --b) ac_prev=BASH ;; + -bash=* | --bash=* | -b=* | --b=*) BASH=$ac_optarg ;; + + -command | --command | -c | --c) ac_prev=COMMAND ;; + -command=* | --command=* | -c=* | --c=*) COMMAND=$ac_optarg ;; + + -makedepend | --makedepend | -md | --md) ac_prev=MKDEPEND ;; + -makedepend=* | --makedepend=* | -md=* | --md=*) MKDEPEND=$ac_optarg ;; + + -make | --make | -m | --m) ac_prev=MAKE ;; + -make=* | --make=* | -m=* | --m=*) MAKE=$ac_optarg ;; + + -odir | --odir) ac_prev=OUTDIR ;; + -odir=* | --odir=*) OUTDIR=$ac_optarg ;; + + -ptracers | --ptracers | -ptr | --ptr) ac_prev=PTRACERS_NUM ;; + -ptracers=* | --ptracers=* | -ptr=* | --ptr=*) PTRACERS_NUM=$ac_optarg ;; -match | --match ) ac_prev=MATCH_CRIT ;; -match=* | --match=* ) MATCH_CRIT=$ac_optarg ;; @@ -1002,38 +998,33 @@ -j | --j) ac_prev=JOBS ;; -j=* | --j=*) JOBS=$ac_optarg ;; - -clean | --clean) - CLEANUP=t ; DELDIR=t ;; + -clean | --clean) CLEANUP=t ; DELDIR=t ;; - -norun | --norun | -nr | --nr) - NORUN=t ;; - -runonly | --runonly | -ro | --ro) - QUICK=t ; NOMAKE=t ;; - -quick | --quick | -q | --q) - QUICK=t ;; - -nogenmake | --nogenmake | -ng | --ng) - NOGENMAKE=t ;; - -noclean | --noclean | -nc | --nc) - NOCLEAN=t ;; - -nodepend | --nodepend | -nd | --nd) - NODEPEND=t ;; + -norun | --norun | -nr | --nr) NORUN=t ;; + -runonly | --runonly | -ro | --ro) QUICK=t ; NOMAKE=t ;; + -quick | --quick | -q | --q) QUICK=t ;; + -nogenmake | --nogenmake | -ng | --ng) NOGENMAKE=t ;; + -noclean | --noclean | -nc | --nc) NOCLEAN=t ;; + -nodepend | --nodepend | -nd | --nd) NODEPEND=t ;; - -postclean | --postclean | -pc | --pc) - POSTCLEAN=t ;; + -postclean | --postclean | -pc | --pc) POSTCLEAN=2 ;; + -deloutp | --deloutp | -do | --do) POSTCLEAN=1 ;; -mpi | --mpi) MPI=2 ;; -MPI | --MPI) ac_prev=MPI ;; -MPI=* | --MPI=*) MPI=$ac_optarg ;; + -mfile | --mfile | -mf | --mf) ac_prev=MPI_MFILE ;; + -mfile=* | --mfile=* | -mf=* | --mf=*) MPI_MFILE=$ac_optarg ;; + -mth) MULTI_THREAD=t ;; -adm | -ad) ADM=t ;; - -oad) OADM=t; NODEPEND=t ;; -ieee) IEEE=true ;; -noieee) IEEE= ;; - -gsl) GSL=t ;; + -gsl) GSL=t ;; -verbose) verbose=2 ;; -debug) debug=1 ;; @@ -1047,15 +1038,10 @@ -papis) PAPIS=t;; -pcls) PCL=t;; - -*) - echo "Error: unrecognized option: "$ac_option - usage - ;; - - *) - echo "Error: unrecognized argument: "$ac_option - usage - ;; + -*) echo "Error: unrecognized option: "$ac_option + usage ;; + *) echo "Error: unrecognized argument: "$ac_option + usage ;; esac @@ -1067,6 +1053,23 @@ NODEPEND=t fi +#- check length of MPI machine file: +if test "x$MPI" != x0 -a "x$MPI_MFILE" != x ; then + if test -r $MPI_MFILE ; then + nl=`wc -l $MPI_MFILE | awk '{print $1}'` + if [ $nl -lt $MPI ] ; then + echo "Error: need at least $MPI nodes (currently only $nl) in MPI_MFILE=$MPI_FILE" + usage + fi + if [ $verbose -gt 1 ]; then + echo " MPI_MFILE=$MPI_MFILE : $nl procs for MPI=$MPI run" + fi + else + echo "Error: cannot access MPI_MFILE=$MPI_FILE" + usage + fi +fi + #- setting for forward or ADM testing if test "x$ADM" = xt ; then code_dir=code_ad @@ -1134,6 +1137,7 @@ OPTFILE=$MITGCM_OF fi +LOC_MFILE='tr_mpi_mfile' RUNLOG="run.tr_log" OUTPUTFILE=$ref_outp if test "x$COMMAND" = x ; then @@ -1326,7 +1330,7 @@ if test -r $CODE_DIR"/SIZE.h_mpi" ; then #- create new SIZE.h with no more than '$MPI' Procs mk_mpi_size $CODE_DIR"/SIZE.h_mpi" $BUILD_DIR"/tr_size.mpi" $MPI - NPROC_MPI=$? + LOC_NPROC=$? ( cd $BUILD_DIR if test -r SIZE.h.mpi ; then cmp tr_size.mpi SIZE.h.mpi > /dev/null 2>&1 ; RETVAL=$? @@ -1342,6 +1346,21 @@ echo "can't find \"$CODE_DIR/SIZE.h_mpi\" -- skipping $dir" continue fi + if test "x$MPI_MFILE" != x ; then + #- create new MPI machine-file with the right number of Procs + rm -f $LOC_MFILE + cat $MPI_MFILE | sort | uniq | head -$LOC_NPROC > $LOC_MFILE + nl=`wc -l $LOC_MFILE | awk '{print $1}'` + if [ $nl -lt $LOC_NPROC ] ; then + rm -f $LOC_MFILE + cat $MPI_MFILE | head -$LOC_NPROC > $LOC_MFILE + #sed -n "1,$LOC_NPROC p" $MPI_MFILE > $LOC_MFILE + fi + if [ $verbose -gt 1 ]; then + nl=`wc -l $LOC_MFILE | awk '{print $1}'` + echo " new LOC_MFILE=$LOC_MFILE : $nl procs for LOC_NPROC=$LOC_NPROC" + fi + fi fi if test ! -r $dir"/input/eedata.mth" -a "x$MULTI_THREAD" = "xt" ; then echo "can't find \"$dir/input/eedata.mth\" -- skipping $dir" @@ -1396,9 +1415,13 @@ unset genmake makedepend make run results=$EMPTY_RESULTS - # Create an output dir for each OPTFILE/tdir combination + # Create an output dir & summary.txt file for each tested experiment (tdir) locDIR=$DRESULTS"/"$dir mkdir $locDIR + #- report to this experiment local summary file --- + echo "DATE='$DATE' ; tdir='$dir'" > $locDIR"/summary.txt" + echo "MACH='$MACH'" >> $locDIR"/summary.txt" + echo "UNAMEA='$UNAMEA'" >> $locDIR"/summary.txt" CDIR=`pwd`"/$locDIR" if test "x$NORUN" = xt ; then @@ -1424,23 +1447,20 @@ fres=`formatresults $dir ${genmake:-N} ${makedepend:-N} ${make:-N} ${run:-N} $results` echo 1>&2 echo "$fres" | sed 's/ 99/ --/g' | sed 's/ > />/' | sed 's/ < /> $SUMMARY - touch $locDIR"/summary.txt" echo "fresults='$fres'" | sed 's/ 99/ --/g' >> $locDIR"/summary.txt" - echo "tdir='$dir'" >> $locDIR"/summary.txt" - if test "x$ADM" = xt ; then - head -1 $dir/$builddir/taf_ad.log >> $locDIR"/summary.txt" - grep -A3 'Seconds in section "ALL' $dir/$rundir/$OUTPUTFILE \ - >> $locDIR"/summary.txt" - fi for ex in $extra_runs ; do unset run results=$EMPTY_RESULTS # reference output file refExOut=`echo $ref_outp | sed "s/\./.${ex}./g"` - # Create an output dir for each OPTFILE/tdir.ex combination + # Create an output dir & summary.txt file for each extra run (tdir.ex) locDIR=$DRESULTS"/"$dir"."$ex mkdir $locDIR + #- report to this experiment local summary file --- + echo "DATE='$DATE' ; tdir='$dir.$ex'" > $locDIR"/summary.txt" + #echo "MACH='$MACH'" >> $locDIR"/summary.txt" + #echo "UNAMEA='$UNAMEA'" >> $locDIR"/summary.txt" CDIR=`pwd`"/$locDIR" test ! -e "$dir/$pfxdir.$ex" && mkdir "$dir/$pfxdir.$ex" run_clean $dir/$pfxdir.$ex @@ -1451,15 +1471,8 @@ fres="$fres.$ex" echo 1>&2 echo "$fres" | sed 's/ 99/ --/g' | sed 's/ > />/' | sed 's/ < /> $SUMMARY - touch $locDIR"/summary.txt" echo "fresults='$fres'" | sed 's/ 99/ --/g' >> $locDIR"/summary.txt" - echo "tdir='$dir.$ex'" >> $locDIR"/summary.txt" - if test "x$ADM" = xt ; then - head -1 $dir/$builddir/taf_ad.log >> $locDIR"/summary.txt" - grep -A3 'Seconds in section "ALL' $dir/$pfxdir.$ex/$OUTPUTFILE \ - >> $locDIR"/summary.txt" - fi - if test "x$POSTCLEAN" = xt ; then + if test "x$POSTCLEAN" = x2 ; then run_clean $dir/$pfxdir.$ex fi done @@ -1494,10 +1507,11 @@ fi fi #postclean $dir/$builddir - if test "x$POSTCLEAN" = xt ; then + if test "x$POSTCLEAN" = x2 ; then makeclean $dir/$builddir \ && run_clean $dir/$rundir fi + if test "x$MPI" != x0 -a "x$MPI_MFILE" != x ; then rm -f $LOC_MFILE ; fi echo "-------------------------------------------------------------------------------"